1 //===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
8 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
9 #include "llvm/ADT/APFloat.h"
10 #include "llvm/ADT/STLExtras.h"
11 #include "llvm/ADT/SetVector.h"
12 #include "llvm/ADT/SmallBitVector.h"
13 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
14 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
15 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
16 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
17 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
18 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
19 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
20 #include "llvm/CodeGen/GlobalISel/Utils.h"
21 #include "llvm/CodeGen/LowLevelTypeUtils.h"
22 #include "llvm/CodeGen/MachineBasicBlock.h"
23 #include "llvm/CodeGen/MachineDominators.h"
24 #include "llvm/CodeGen/MachineInstr.h"
25 #include "llvm/CodeGen/MachineMemOperand.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/RegisterBankInfo.h"
28 #include "llvm/CodeGen/TargetInstrInfo.h"
29 #include "llvm/CodeGen/TargetLowering.h"
30 #include "llvm/CodeGen/TargetOpcodes.h"
31 #include "llvm/IR/DataLayout.h"
32 #include "llvm/IR/InstrTypes.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/DivisionByConstantInfo.h"
35 #include "llvm/Support/MathExtras.h"
36 #include "llvm/Target/TargetMachine.h"
41 #define DEBUG_TYPE "gi-combiner"
44 using namespace MIPatternMatch
;
46 // Option to allow testing of the combiner while no targets know about indexed
49 ForceLegalIndexing("force-legal-indexing", cl::Hidden
, cl::init(false),
50 cl::desc("Force all indexed operations to be "
51 "legal for the GlobalISel combiner"));
53 CombinerHelper::CombinerHelper(GISelChangeObserver
&Observer
,
54 MachineIRBuilder
&B
, bool IsPreLegalize
,
55 GISelKnownBits
*KB
, MachineDominatorTree
*MDT
,
56 const LegalizerInfo
*LI
)
57 : Builder(B
), MRI(Builder
.getMF().getRegInfo()), Observer(Observer
), KB(KB
),
58 MDT(MDT
), IsPreLegalize(IsPreLegalize
), LI(LI
),
59 RBI(Builder
.getMF().getSubtarget().getRegBankInfo()),
60 TRI(Builder
.getMF().getSubtarget().getRegisterInfo()) {
64 const TargetLowering
&CombinerHelper::getTargetLowering() const {
65 return *Builder
.getMF().getSubtarget().getTargetLowering();
68 /// \returns The little endian in-memory byte position of byte \p I in a
69 /// \p ByteWidth bytes wide type.
71 /// E.g. Given a 4-byte type x, x[0] -> byte 0
72 static unsigned littleEndianByteAt(const unsigned ByteWidth
, const unsigned I
) {
73 assert(I
< ByteWidth
&& "I must be in [0, ByteWidth)");
77 /// Determines the LogBase2 value for a non-null input value using the
78 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
79 static Register
buildLogBase2(Register V
, MachineIRBuilder
&MIB
) {
80 auto &MRI
= *MIB
.getMRI();
81 LLT Ty
= MRI
.getType(V
);
82 auto Ctlz
= MIB
.buildCTLZ(Ty
, V
);
83 auto Base
= MIB
.buildConstant(Ty
, Ty
.getScalarSizeInBits() - 1);
84 return MIB
.buildSub(Ty
, Base
, Ctlz
).getReg(0);
87 /// \returns The big endian in-memory byte position of byte \p I in a
88 /// \p ByteWidth bytes wide type.
90 /// E.g. Given a 4-byte type x, x[0] -> byte 3
91 static unsigned bigEndianByteAt(const unsigned ByteWidth
, const unsigned I
) {
92 assert(I
< ByteWidth
&& "I must be in [0, ByteWidth)");
93 return ByteWidth
- I
- 1;
96 /// Given a map from byte offsets in memory to indices in a load/store,
97 /// determine if that map corresponds to a little or big endian byte pattern.
99 /// \param MemOffset2Idx maps memory offsets to address offsets.
100 /// \param LowestIdx is the lowest index in \p MemOffset2Idx.
102 /// \returns true if the map corresponds to a big endian byte pattern, false if
103 /// it corresponds to a little endian byte pattern, and std::nullopt otherwise.
105 /// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
108 /// AddrOffset Little endian Big endian
113 static std::optional
<bool>
114 isBigEndian(const SmallDenseMap
<int64_t, int64_t, 8> &MemOffset2Idx
,
116 // Need at least two byte positions to decide on endianness.
117 unsigned Width
= MemOffset2Idx
.size();
120 bool BigEndian
= true, LittleEndian
= true;
121 for (unsigned MemOffset
= 0; MemOffset
< Width
; ++ MemOffset
) {
122 auto MemOffsetAndIdx
= MemOffset2Idx
.find(MemOffset
);
123 if (MemOffsetAndIdx
== MemOffset2Idx
.end())
125 const int64_t Idx
= MemOffsetAndIdx
->second
- LowestIdx
;
126 assert(Idx
>= 0 && "Expected non-negative byte offset?");
127 LittleEndian
&= Idx
== littleEndianByteAt(Width
, MemOffset
);
128 BigEndian
&= Idx
== bigEndianByteAt(Width
, MemOffset
);
129 if (!BigEndian
&& !LittleEndian
)
133 assert((BigEndian
!= LittleEndian
) &&
134 "Pattern cannot be both big and little endian!");
138 bool CombinerHelper::isPreLegalize() const { return IsPreLegalize
; }
140 bool CombinerHelper::isLegal(const LegalityQuery
&Query
) const {
141 assert(LI
&& "Must have LegalizerInfo to query isLegal!");
142 return LI
->getAction(Query
).Action
== LegalizeActions::Legal
;
145 bool CombinerHelper::isLegalOrBeforeLegalizer(
146 const LegalityQuery
&Query
) const {
147 return isPreLegalize() || isLegal(Query
);
150 bool CombinerHelper::isConstantLegalOrBeforeLegalizer(const LLT Ty
) const {
152 return isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT
, {Ty
}});
153 // Vector constants are represented as a G_BUILD_VECTOR of scalar G_CONSTANTs.
156 LLT EltTy
= Ty
.getElementType();
157 return isLegal({TargetOpcode::G_BUILD_VECTOR
, {Ty
, EltTy
}}) &&
158 isLegal({TargetOpcode::G_CONSTANT
, {EltTy
}});
161 void CombinerHelper::replaceRegWith(MachineRegisterInfo
&MRI
, Register FromReg
,
162 Register ToReg
) const {
163 Observer
.changingAllUsesOfReg(MRI
, FromReg
);
165 if (MRI
.constrainRegAttrs(ToReg
, FromReg
))
166 MRI
.replaceRegWith(FromReg
, ToReg
);
168 Builder
.buildCopy(ToReg
, FromReg
);
170 Observer
.finishedChangingAllUsesOfReg();
173 void CombinerHelper::replaceRegOpWith(MachineRegisterInfo
&MRI
,
174 MachineOperand
&FromRegOp
,
175 Register ToReg
) const {
176 assert(FromRegOp
.getParent() && "Expected an operand in an MI");
177 Observer
.changingInstr(*FromRegOp
.getParent());
179 FromRegOp
.setReg(ToReg
);
181 Observer
.changedInstr(*FromRegOp
.getParent());
184 void CombinerHelper::replaceOpcodeWith(MachineInstr
&FromMI
,
185 unsigned ToOpcode
) const {
186 Observer
.changingInstr(FromMI
);
188 FromMI
.setDesc(Builder
.getTII().get(ToOpcode
));
190 Observer
.changedInstr(FromMI
);
193 const RegisterBank
*CombinerHelper::getRegBank(Register Reg
) const {
194 return RBI
->getRegBank(Reg
, MRI
, *TRI
);
197 void CombinerHelper::setRegBank(Register Reg
, const RegisterBank
*RegBank
) {
199 MRI
.setRegBank(Reg
, *RegBank
);
202 bool CombinerHelper::tryCombineCopy(MachineInstr
&MI
) {
203 if (matchCombineCopy(MI
)) {
204 applyCombineCopy(MI
);
209 bool CombinerHelper::matchCombineCopy(MachineInstr
&MI
) {
210 if (MI
.getOpcode() != TargetOpcode::COPY
)
212 Register DstReg
= MI
.getOperand(0).getReg();
213 Register SrcReg
= MI
.getOperand(1).getReg();
214 return canReplaceReg(DstReg
, SrcReg
, MRI
);
216 void CombinerHelper::applyCombineCopy(MachineInstr
&MI
) {
217 Register DstReg
= MI
.getOperand(0).getReg();
218 Register SrcReg
= MI
.getOperand(1).getReg();
219 MI
.eraseFromParent();
220 replaceRegWith(MRI
, DstReg
, SrcReg
);
223 bool CombinerHelper::tryCombineConcatVectors(MachineInstr
&MI
) {
224 bool IsUndef
= false;
225 SmallVector
<Register
, 4> Ops
;
226 if (matchCombineConcatVectors(MI
, IsUndef
, Ops
)) {
227 applyCombineConcatVectors(MI
, IsUndef
, Ops
);
233 bool CombinerHelper::matchCombineConcatVectors(MachineInstr
&MI
, bool &IsUndef
,
234 SmallVectorImpl
<Register
> &Ops
) {
235 assert(MI
.getOpcode() == TargetOpcode::G_CONCAT_VECTORS
&&
236 "Invalid instruction");
238 MachineInstr
*Undef
= nullptr;
240 // Walk over all the operands of concat vectors and check if they are
241 // build_vector themselves or undef.
242 // Then collect their operands in Ops.
243 for (const MachineOperand
&MO
: MI
.uses()) {
244 Register Reg
= MO
.getReg();
245 MachineInstr
*Def
= MRI
.getVRegDef(Reg
);
246 assert(Def
&& "Operand not defined");
247 switch (Def
->getOpcode()) {
248 case TargetOpcode::G_BUILD_VECTOR
:
250 // Remember the operands of the build_vector to fold
251 // them into the yet-to-build flattened concat vectors.
252 for (const MachineOperand
&BuildVecMO
: Def
->uses())
253 Ops
.push_back(BuildVecMO
.getReg());
255 case TargetOpcode::G_IMPLICIT_DEF
: {
256 LLT OpType
= MRI
.getType(Reg
);
257 // Keep one undef value for all the undef operands.
259 Builder
.setInsertPt(*MI
.getParent(), MI
);
260 Undef
= Builder
.buildUndef(OpType
.getScalarType());
262 assert(MRI
.getType(Undef
->getOperand(0).getReg()) ==
263 OpType
.getScalarType() &&
264 "All undefs should have the same type");
265 // Break the undef vector in as many scalar elements as needed
266 // for the flattening.
267 for (unsigned EltIdx
= 0, EltEnd
= OpType
.getNumElements();
268 EltIdx
!= EltEnd
; ++EltIdx
)
269 Ops
.push_back(Undef
->getOperand(0).getReg());
278 void CombinerHelper::applyCombineConcatVectors(
279 MachineInstr
&MI
, bool IsUndef
, const ArrayRef
<Register
> Ops
) {
280 // We determined that the concat_vectors can be flatten.
281 // Generate the flattened build_vector.
282 Register DstReg
= MI
.getOperand(0).getReg();
283 Builder
.setInsertPt(*MI
.getParent(), MI
);
284 Register NewDstReg
= MRI
.cloneVirtualRegister(DstReg
);
286 // Note: IsUndef is sort of redundant. We could have determine it by
287 // checking that at all Ops are undef. Alternatively, we could have
288 // generate a build_vector of undefs and rely on another combine to
289 // clean that up. For now, given we already gather this information
290 // in tryCombineConcatVectors, just save compile time and issue the
293 Builder
.buildUndef(NewDstReg
);
295 Builder
.buildBuildVector(NewDstReg
, Ops
);
296 MI
.eraseFromParent();
297 replaceRegWith(MRI
, DstReg
, NewDstReg
);
300 bool CombinerHelper::tryCombineShuffleVector(MachineInstr
&MI
) {
301 SmallVector
<Register
, 4> Ops
;
302 if (matchCombineShuffleVector(MI
, Ops
)) {
303 applyCombineShuffleVector(MI
, Ops
);
309 bool CombinerHelper::matchCombineShuffleVector(MachineInstr
&MI
,
310 SmallVectorImpl
<Register
> &Ops
) {
311 assert(MI
.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR
&&
312 "Invalid instruction kind");
313 LLT DstType
= MRI
.getType(MI
.getOperand(0).getReg());
314 Register Src1
= MI
.getOperand(1).getReg();
315 LLT SrcType
= MRI
.getType(Src1
);
316 // As bizarre as it may look, shuffle vector can actually produce
317 // scalar! This is because at the IR level a <1 x ty> shuffle
318 // vector is perfectly valid.
319 unsigned DstNumElts
= DstType
.isVector() ? DstType
.getNumElements() : 1;
320 unsigned SrcNumElts
= SrcType
.isVector() ? SrcType
.getNumElements() : 1;
322 // If the resulting vector is smaller than the size of the source
323 // vectors being concatenated, we won't be able to replace the
324 // shuffle vector into a concat_vectors.
326 // Note: We may still be able to produce a concat_vectors fed by
327 // extract_vector_elt and so on. It is less clear that would
328 // be better though, so don't bother for now.
330 // If the destination is a scalar, the size of the sources doesn't
331 // matter. we will lower the shuffle to a plain copy. This will
332 // work only if the source and destination have the same size. But
333 // that's covered by the next condition.
335 // TODO: If the size between the source and destination don't match
336 // we could still emit an extract vector element in that case.
337 if (DstNumElts
< 2 * SrcNumElts
&& DstNumElts
!= 1)
340 // Check that the shuffle mask can be broken evenly between the
341 // different sources.
342 if (DstNumElts
% SrcNumElts
!= 0)
345 // Mask length is a multiple of the source vector length.
346 // Check if the shuffle is some kind of concatenation of the input
348 unsigned NumConcat
= DstNumElts
/ SrcNumElts
;
349 SmallVector
<int, 8> ConcatSrcs(NumConcat
, -1);
350 ArrayRef
<int> Mask
= MI
.getOperand(3).getShuffleMask();
351 for (unsigned i
= 0; i
!= DstNumElts
; ++i
) {
356 // Ensure the indices in each SrcType sized piece are sequential and that
357 // the same source is used for the whole piece.
358 if ((Idx
% SrcNumElts
!= (i
% SrcNumElts
)) ||
359 (ConcatSrcs
[i
/ SrcNumElts
] >= 0 &&
360 ConcatSrcs
[i
/ SrcNumElts
] != (int)(Idx
/ SrcNumElts
)))
362 // Remember which source this index came from.
363 ConcatSrcs
[i
/ SrcNumElts
] = Idx
/ SrcNumElts
;
366 // The shuffle is concatenating multiple vectors together.
367 // Collect the different operands for that.
369 Register Src2
= MI
.getOperand(2).getReg();
370 for (auto Src
: ConcatSrcs
) {
373 Builder
.setInsertPt(*MI
.getParent(), MI
);
374 UndefReg
= Builder
.buildUndef(SrcType
).getReg(0);
376 Ops
.push_back(UndefReg
);
385 void CombinerHelper::applyCombineShuffleVector(MachineInstr
&MI
,
386 const ArrayRef
<Register
> Ops
) {
387 Register DstReg
= MI
.getOperand(0).getReg();
388 Builder
.setInsertPt(*MI
.getParent(), MI
);
389 Register NewDstReg
= MRI
.cloneVirtualRegister(DstReg
);
392 Builder
.buildCopy(NewDstReg
, Ops
[0]);
394 Builder
.buildMergeLikeInstr(NewDstReg
, Ops
);
396 MI
.eraseFromParent();
397 replaceRegWith(MRI
, DstReg
, NewDstReg
);
400 bool CombinerHelper::matchShuffleToExtract(MachineInstr
&MI
) {
401 assert(MI
.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR
&&
402 "Invalid instruction kind");
404 ArrayRef
<int> Mask
= MI
.getOperand(3).getShuffleMask();
405 return Mask
.size() == 1;
408 void CombinerHelper::applyShuffleToExtract(MachineInstr
&MI
) {
409 Register DstReg
= MI
.getOperand(0).getReg();
410 Builder
.setInsertPt(*MI
.getParent(), MI
);
412 int I
= MI
.getOperand(3).getShuffleMask()[0];
413 Register Src1
= MI
.getOperand(1).getReg();
414 LLT Src1Ty
= MRI
.getType(Src1
);
415 int Src1NumElts
= Src1Ty
.isVector() ? Src1Ty
.getNumElements() : 1;
417 if (I
>= Src1NumElts
) {
418 SrcReg
= MI
.getOperand(2).getReg();
424 Builder
.buildUndef(DstReg
);
425 else if (!MRI
.getType(SrcReg
).isVector())
426 Builder
.buildCopy(DstReg
, SrcReg
);
428 Builder
.buildExtractVectorElementConstant(DstReg
, SrcReg
, I
);
430 MI
.eraseFromParent();
435 /// Select a preference between two uses. CurrentUse is the current preference
436 /// while *ForCandidate is attributes of the candidate under consideration.
437 PreferredTuple
ChoosePreferredUse(MachineInstr
&LoadMI
,
438 PreferredTuple
&CurrentUse
,
439 const LLT TyForCandidate
,
440 unsigned OpcodeForCandidate
,
441 MachineInstr
*MIForCandidate
) {
442 if (!CurrentUse
.Ty
.isValid()) {
443 if (CurrentUse
.ExtendOpcode
== OpcodeForCandidate
||
444 CurrentUse
.ExtendOpcode
== TargetOpcode::G_ANYEXT
)
445 return {TyForCandidate
, OpcodeForCandidate
, MIForCandidate
};
449 // We permit the extend to hoist through basic blocks but this is only
450 // sensible if the target has extending loads. If you end up lowering back
451 // into a load and extend during the legalizer then the end result is
452 // hoisting the extend up to the load.
454 // Prefer defined extensions to undefined extensions as these are more
455 // likely to reduce the number of instructions.
456 if (OpcodeForCandidate
== TargetOpcode::G_ANYEXT
&&
457 CurrentUse
.ExtendOpcode
!= TargetOpcode::G_ANYEXT
)
459 else if (CurrentUse
.ExtendOpcode
== TargetOpcode::G_ANYEXT
&&
460 OpcodeForCandidate
!= TargetOpcode::G_ANYEXT
)
461 return {TyForCandidate
, OpcodeForCandidate
, MIForCandidate
};
463 // Prefer sign extensions to zero extensions as sign-extensions tend to be
464 // more expensive. Don't do this if the load is already a zero-extend load
465 // though, otherwise we'll rewrite a zero-extend load into a sign-extend
467 if (!isa
<GZExtLoad
>(LoadMI
) && CurrentUse
.Ty
== TyForCandidate
) {
468 if (CurrentUse
.ExtendOpcode
== TargetOpcode::G_SEXT
&&
469 OpcodeForCandidate
== TargetOpcode::G_ZEXT
)
471 else if (CurrentUse
.ExtendOpcode
== TargetOpcode::G_ZEXT
&&
472 OpcodeForCandidate
== TargetOpcode::G_SEXT
)
473 return {TyForCandidate
, OpcodeForCandidate
, MIForCandidate
};
476 // This is potentially target specific. We've chosen the largest type
477 // because G_TRUNC is usually free. One potential catch with this is that
478 // some targets have a reduced number of larger registers than smaller
479 // registers and this choice potentially increases the live-range for the
481 if (TyForCandidate
.getSizeInBits() > CurrentUse
.Ty
.getSizeInBits()) {
482 return {TyForCandidate
, OpcodeForCandidate
, MIForCandidate
};
487 /// Find a suitable place to insert some instructions and insert them. This
488 /// function accounts for special cases like inserting before a PHI node.
489 /// The current strategy for inserting before PHI's is to duplicate the
490 /// instructions for each predecessor. However, while that's ok for G_TRUNC
491 /// on most targets since it generally requires no code, other targets/cases may
492 /// want to try harder to find a dominating block.
493 static void InsertInsnsWithoutSideEffectsBeforeUse(
494 MachineIRBuilder
&Builder
, MachineInstr
&DefMI
, MachineOperand
&UseMO
,
495 std::function
<void(MachineBasicBlock
*, MachineBasicBlock::iterator
,
496 MachineOperand
&UseMO
)>
498 MachineInstr
&UseMI
= *UseMO
.getParent();
500 MachineBasicBlock
*InsertBB
= UseMI
.getParent();
502 // If the use is a PHI then we want the predecessor block instead.
504 MachineOperand
*PredBB
= std::next(&UseMO
);
505 InsertBB
= PredBB
->getMBB();
508 // If the block is the same block as the def then we want to insert just after
509 // the def instead of at the start of the block.
510 if (InsertBB
== DefMI
.getParent()) {
511 MachineBasicBlock::iterator InsertPt
= &DefMI
;
512 Inserter(InsertBB
, std::next(InsertPt
), UseMO
);
516 // Otherwise we want the start of the BB
517 Inserter(InsertBB
, InsertBB
->getFirstNonPHI(), UseMO
);
519 } // end anonymous namespace
521 bool CombinerHelper::tryCombineExtendingLoads(MachineInstr
&MI
) {
522 PreferredTuple Preferred
;
523 if (matchCombineExtendingLoads(MI
, Preferred
)) {
524 applyCombineExtendingLoads(MI
, Preferred
);
530 static unsigned getExtLoadOpcForExtend(unsigned ExtOpc
) {
531 unsigned CandidateLoadOpc
;
533 case TargetOpcode::G_ANYEXT
:
534 CandidateLoadOpc
= TargetOpcode::G_LOAD
;
536 case TargetOpcode::G_SEXT
:
537 CandidateLoadOpc
= TargetOpcode::G_SEXTLOAD
;
539 case TargetOpcode::G_ZEXT
:
540 CandidateLoadOpc
= TargetOpcode::G_ZEXTLOAD
;
543 llvm_unreachable("Unexpected extend opc");
545 return CandidateLoadOpc
;
548 bool CombinerHelper::matchCombineExtendingLoads(MachineInstr
&MI
,
549 PreferredTuple
&Preferred
) {
550 // We match the loads and follow the uses to the extend instead of matching
551 // the extends and following the def to the load. This is because the load
552 // must remain in the same position for correctness (unless we also add code
553 // to find a safe place to sink it) whereas the extend is freely movable.
554 // It also prevents us from duplicating the load for the volatile case or just
556 GAnyLoad
*LoadMI
= dyn_cast
<GAnyLoad
>(&MI
);
560 Register LoadReg
= LoadMI
->getDstReg();
562 LLT LoadValueTy
= MRI
.getType(LoadReg
);
563 if (!LoadValueTy
.isScalar())
566 // Most architectures are going to legalize <s8 loads into at least a 1 byte
567 // load, and the MMOs can only describe memory accesses in multiples of bytes.
568 // If we try to perform extload combining on those, we can end up with
569 // %a(s8) = extload %ptr (load 1 byte from %ptr)
570 // ... which is an illegal extload instruction.
571 if (LoadValueTy
.getSizeInBits() < 8)
574 // For non power-of-2 types, they will very likely be legalized into multiple
575 // loads. Don't bother trying to match them into extending loads.
576 if (!llvm::has_single_bit
<uint32_t>(LoadValueTy
.getSizeInBits()))
579 // Find the preferred type aside from the any-extends (unless it's the only
580 // one) and non-extending ops. We'll emit an extending load to that type and
581 // and emit a variant of (extend (trunc X)) for the others according to the
582 // relative type sizes. At the same time, pick an extend to use based on the
583 // extend involved in the chosen type.
584 unsigned PreferredOpcode
=
586 ? TargetOpcode::G_ANYEXT
587 : isa
<GSExtLoad
>(&MI
) ? TargetOpcode::G_SEXT
: TargetOpcode::G_ZEXT
;
588 Preferred
= {LLT(), PreferredOpcode
, nullptr};
589 for (auto &UseMI
: MRI
.use_nodbg_instructions(LoadReg
)) {
590 if (UseMI
.getOpcode() == TargetOpcode::G_SEXT
||
591 UseMI
.getOpcode() == TargetOpcode::G_ZEXT
||
592 (UseMI
.getOpcode() == TargetOpcode::G_ANYEXT
)) {
593 const auto &MMO
= LoadMI
->getMMO();
594 // For atomics, only form anyextending loads.
595 if (MMO
.isAtomic() && UseMI
.getOpcode() != TargetOpcode::G_ANYEXT
)
597 // Check for legality.
598 if (!isPreLegalize()) {
599 LegalityQuery::MemDesc
MMDesc(MMO
);
600 unsigned CandidateLoadOpc
= getExtLoadOpcForExtend(UseMI
.getOpcode());
601 LLT UseTy
= MRI
.getType(UseMI
.getOperand(0).getReg());
602 LLT SrcTy
= MRI
.getType(LoadMI
->getPointerReg());
603 if (LI
->getAction({CandidateLoadOpc
, {UseTy
, SrcTy
}, {MMDesc
}})
604 .Action
!= LegalizeActions::Legal
)
607 Preferred
= ChoosePreferredUse(MI
, Preferred
,
608 MRI
.getType(UseMI
.getOperand(0).getReg()),
609 UseMI
.getOpcode(), &UseMI
);
613 // There were no extends
616 // It should be impossible to chose an extend without selecting a different
617 // type since by definition the result of an extend is larger.
618 assert(Preferred
.Ty
!= LoadValueTy
&& "Extending to same type?");
620 LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred
.MI
);
624 void CombinerHelper::applyCombineExtendingLoads(MachineInstr
&MI
,
625 PreferredTuple
&Preferred
) {
626 // Rewrite the load to the chosen extending load.
627 Register ChosenDstReg
= Preferred
.MI
->getOperand(0).getReg();
629 // Inserter to insert a truncate back to the original type at a given point
630 // with some basic CSE to limit truncate duplication to one per BB.
631 DenseMap
<MachineBasicBlock
*, MachineInstr
*> EmittedInsns
;
632 auto InsertTruncAt
= [&](MachineBasicBlock
*InsertIntoBB
,
633 MachineBasicBlock::iterator InsertBefore
,
634 MachineOperand
&UseMO
) {
635 MachineInstr
*PreviouslyEmitted
= EmittedInsns
.lookup(InsertIntoBB
);
636 if (PreviouslyEmitted
) {
637 Observer
.changingInstr(*UseMO
.getParent());
638 UseMO
.setReg(PreviouslyEmitted
->getOperand(0).getReg());
639 Observer
.changedInstr(*UseMO
.getParent());
643 Builder
.setInsertPt(*InsertIntoBB
, InsertBefore
);
644 Register NewDstReg
= MRI
.cloneVirtualRegister(MI
.getOperand(0).getReg());
645 MachineInstr
*NewMI
= Builder
.buildTrunc(NewDstReg
, ChosenDstReg
);
646 EmittedInsns
[InsertIntoBB
] = NewMI
;
647 replaceRegOpWith(MRI
, UseMO
, NewDstReg
);
650 Observer
.changingInstr(MI
);
651 unsigned LoadOpc
= getExtLoadOpcForExtend(Preferred
.ExtendOpcode
);
652 MI
.setDesc(Builder
.getTII().get(LoadOpc
));
654 // Rewrite all the uses to fix up the types.
655 auto &LoadValue
= MI
.getOperand(0);
656 SmallVector
<MachineOperand
*, 4> Uses
;
657 for (auto &UseMO
: MRI
.use_operands(LoadValue
.getReg()))
658 Uses
.push_back(&UseMO
);
660 for (auto *UseMO
: Uses
) {
661 MachineInstr
*UseMI
= UseMO
->getParent();
663 // If the extend is compatible with the preferred extend then we should fix
664 // up the type and extend so that it uses the preferred use.
665 if (UseMI
->getOpcode() == Preferred
.ExtendOpcode
||
666 UseMI
->getOpcode() == TargetOpcode::G_ANYEXT
) {
667 Register UseDstReg
= UseMI
->getOperand(0).getReg();
668 MachineOperand
&UseSrcMO
= UseMI
->getOperand(1);
669 const LLT UseDstTy
= MRI
.getType(UseDstReg
);
670 if (UseDstReg
!= ChosenDstReg
) {
671 if (Preferred
.Ty
== UseDstTy
) {
672 // If the use has the same type as the preferred use, then merge
673 // the vregs and erase the extend. For example:
674 // %1:_(s8) = G_LOAD ...
675 // %2:_(s32) = G_SEXT %1(s8)
676 // %3:_(s32) = G_ANYEXT %1(s8)
679 // %2:_(s32) = G_SEXTLOAD ...
681 replaceRegWith(MRI
, UseDstReg
, ChosenDstReg
);
682 Observer
.erasingInstr(*UseMO
->getParent());
683 UseMO
->getParent()->eraseFromParent();
684 } else if (Preferred
.Ty
.getSizeInBits() < UseDstTy
.getSizeInBits()) {
685 // If the preferred size is smaller, then keep the extend but extend
686 // from the result of the extending load. For example:
687 // %1:_(s8) = G_LOAD ...
688 // %2:_(s32) = G_SEXT %1(s8)
689 // %3:_(s64) = G_ANYEXT %1(s8)
692 // %2:_(s32) = G_SEXTLOAD ...
693 // %3:_(s64) = G_ANYEXT %2:_(s32)
695 replaceRegOpWith(MRI
, UseSrcMO
, ChosenDstReg
);
697 // If the preferred size is large, then insert a truncate. For
699 // %1:_(s8) = G_LOAD ...
700 // %2:_(s64) = G_SEXT %1(s8)
701 // %3:_(s32) = G_ZEXT %1(s8)
704 // %2:_(s64) = G_SEXTLOAD ...
705 // %4:_(s8) = G_TRUNC %2:_(s32)
706 // %3:_(s64) = G_ZEXT %2:_(s8)
708 InsertInsnsWithoutSideEffectsBeforeUse(Builder
, MI
, *UseMO
,
713 // The use is (one of) the uses of the preferred use we chose earlier.
714 // We're going to update the load to def this value later so just erase
716 Observer
.erasingInstr(*UseMO
->getParent());
717 UseMO
->getParent()->eraseFromParent();
721 // The use isn't an extend. Truncate back to the type we originally loaded.
722 // This is free on many targets.
723 InsertInsnsWithoutSideEffectsBeforeUse(Builder
, MI
, *UseMO
, InsertTruncAt
);
726 MI
.getOperand(0).setReg(ChosenDstReg
);
727 Observer
.changedInstr(MI
);
730 bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr
&MI
,
731 BuildFnTy
&MatchInfo
) {
732 assert(MI
.getOpcode() == TargetOpcode::G_AND
);
734 // If we have the following code:
735 // %mask = G_CONSTANT 255
736 // %ld = G_LOAD %ptr, (load s16)
737 // %and = G_AND %ld, %mask
739 // Try to fold it into
740 // %ld = G_ZEXTLOAD %ptr, (load s8)
742 Register Dst
= MI
.getOperand(0).getReg();
743 if (MRI
.getType(Dst
).isVector())
747 getIConstantVRegValWithLookThrough(MI
.getOperand(2).getReg(), MRI
);
751 APInt MaskVal
= MaybeMask
->Value
;
753 if (!MaskVal
.isMask())
756 Register SrcReg
= MI
.getOperand(1).getReg();
757 // Don't use getOpcodeDef() here since intermediate instructions may have
759 GAnyLoad
*LoadMI
= dyn_cast
<GAnyLoad
>(MRI
.getVRegDef(SrcReg
));
760 if (!LoadMI
|| !MRI
.hasOneNonDBGUse(LoadMI
->getDstReg()))
763 Register LoadReg
= LoadMI
->getDstReg();
764 LLT RegTy
= MRI
.getType(LoadReg
);
765 Register PtrReg
= LoadMI
->getPointerReg();
766 unsigned RegSize
= RegTy
.getSizeInBits();
767 uint64_t LoadSizeBits
= LoadMI
->getMemSizeInBits();
768 unsigned MaskSizeBits
= MaskVal
.countr_one();
770 // The mask may not be larger than the in-memory type, as it might cover sign
772 if (MaskSizeBits
> LoadSizeBits
)
775 // If the mask covers the whole destination register, there's nothing to
777 if (MaskSizeBits
>= RegSize
)
780 // Most targets cannot deal with loads of size < 8 and need to re-legalize to
781 // at least byte loads. Avoid creating such loads here
782 if (MaskSizeBits
< 8 || !isPowerOf2_32(MaskSizeBits
))
785 const MachineMemOperand
&MMO
= LoadMI
->getMMO();
786 LegalityQuery::MemDesc
MemDesc(MMO
);
788 // Don't modify the memory access size if this is atomic/volatile, but we can
789 // still adjust the opcode to indicate the high bit behavior.
790 if (LoadMI
->isSimple())
791 MemDesc
.MemoryTy
= LLT::scalar(MaskSizeBits
);
792 else if (LoadSizeBits
> MaskSizeBits
|| LoadSizeBits
== RegSize
)
795 // TODO: Could check if it's legal with the reduced or original memory size.
796 if (!isLegalOrBeforeLegalizer(
797 {TargetOpcode::G_ZEXTLOAD
, {RegTy
, MRI
.getType(PtrReg
)}, {MemDesc
}}))
800 MatchInfo
= [=](MachineIRBuilder
&B
) {
801 B
.setInstrAndDebugLoc(*LoadMI
);
802 auto &MF
= B
.getMF();
803 auto PtrInfo
= MMO
.getPointerInfo();
804 auto *NewMMO
= MF
.getMachineMemOperand(&MMO
, PtrInfo
, MemDesc
.MemoryTy
);
805 B
.buildLoadInstr(TargetOpcode::G_ZEXTLOAD
, Dst
, PtrReg
, *NewMMO
);
806 LoadMI
->eraseFromParent();
811 bool CombinerHelper::isPredecessor(const MachineInstr
&DefMI
,
812 const MachineInstr
&UseMI
) {
813 assert(!DefMI
.isDebugInstr() && !UseMI
.isDebugInstr() &&
814 "shouldn't consider debug uses");
815 assert(DefMI
.getParent() == UseMI
.getParent());
816 if (&DefMI
== &UseMI
)
818 const MachineBasicBlock
&MBB
= *DefMI
.getParent();
819 auto DefOrUse
= find_if(MBB
, [&DefMI
, &UseMI
](const MachineInstr
&MI
) {
820 return &MI
== &DefMI
|| &MI
== &UseMI
;
822 if (DefOrUse
== MBB
.end())
823 llvm_unreachable("Block must contain both DefMI and UseMI!");
824 return &*DefOrUse
== &DefMI
;
827 bool CombinerHelper::dominates(const MachineInstr
&DefMI
,
828 const MachineInstr
&UseMI
) {
829 assert(!DefMI
.isDebugInstr() && !UseMI
.isDebugInstr() &&
830 "shouldn't consider debug uses");
832 return MDT
->dominates(&DefMI
, &UseMI
);
833 else if (DefMI
.getParent() != UseMI
.getParent())
836 return isPredecessor(DefMI
, UseMI
);
839 bool CombinerHelper::matchSextTruncSextLoad(MachineInstr
&MI
) {
840 assert(MI
.getOpcode() == TargetOpcode::G_SEXT_INREG
);
841 Register SrcReg
= MI
.getOperand(1).getReg();
842 Register LoadUser
= SrcReg
;
844 if (MRI
.getType(SrcReg
).isVector())
848 if (mi_match(SrcReg
, MRI
, m_GTrunc(m_Reg(TruncSrc
))))
851 uint64_t SizeInBits
= MI
.getOperand(2).getImm();
852 // If the source is a G_SEXTLOAD from the same bit width, then we don't
853 // need any extend at all, just a truncate.
854 if (auto *LoadMI
= getOpcodeDef
<GSExtLoad
>(LoadUser
, MRI
)) {
855 // If truncating more than the original extended value, abort.
856 auto LoadSizeBits
= LoadMI
->getMemSizeInBits();
857 if (TruncSrc
&& MRI
.getType(TruncSrc
).getSizeInBits() < LoadSizeBits
)
859 if (LoadSizeBits
== SizeInBits
)
865 void CombinerHelper::applySextTruncSextLoad(MachineInstr
&MI
) {
866 assert(MI
.getOpcode() == TargetOpcode::G_SEXT_INREG
);
867 Builder
.setInstrAndDebugLoc(MI
);
868 Builder
.buildCopy(MI
.getOperand(0).getReg(), MI
.getOperand(1).getReg());
869 MI
.eraseFromParent();
872 bool CombinerHelper::matchSextInRegOfLoad(
873 MachineInstr
&MI
, std::tuple
<Register
, unsigned> &MatchInfo
) {
874 assert(MI
.getOpcode() == TargetOpcode::G_SEXT_INREG
);
876 Register DstReg
= MI
.getOperand(0).getReg();
877 LLT RegTy
= MRI
.getType(DstReg
);
879 // Only supports scalars for now.
880 if (RegTy
.isVector())
883 Register SrcReg
= MI
.getOperand(1).getReg();
884 auto *LoadDef
= getOpcodeDef
<GLoad
>(SrcReg
, MRI
);
885 if (!LoadDef
|| !MRI
.hasOneNonDBGUse(DstReg
))
888 uint64_t MemBits
= LoadDef
->getMemSizeInBits();
890 // If the sign extend extends from a narrower width than the load's width,
891 // then we can narrow the load width when we combine to a G_SEXTLOAD.
892 // Avoid widening the load at all.
893 unsigned NewSizeBits
= std::min((uint64_t)MI
.getOperand(2).getImm(), MemBits
);
895 // Don't generate G_SEXTLOADs with a < 1 byte width.
898 // Don't bother creating a non-power-2 sextload, it will likely be broken up
899 // anyway for most targets.
900 if (!isPowerOf2_32(NewSizeBits
))
903 const MachineMemOperand
&MMO
= LoadDef
->getMMO();
904 LegalityQuery::MemDesc
MMDesc(MMO
);
906 // Don't modify the memory access size if this is atomic/volatile, but we can
907 // still adjust the opcode to indicate the high bit behavior.
908 if (LoadDef
->isSimple())
909 MMDesc
.MemoryTy
= LLT::scalar(NewSizeBits
);
910 else if (MemBits
> NewSizeBits
|| MemBits
== RegTy
.getSizeInBits())
913 // TODO: Could check if it's legal with the reduced or original memory size.
914 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD
,
915 {MRI
.getType(LoadDef
->getDstReg()),
916 MRI
.getType(LoadDef
->getPointerReg())},
920 MatchInfo
= std::make_tuple(LoadDef
->getDstReg(), NewSizeBits
);
924 void CombinerHelper::applySextInRegOfLoad(
925 MachineInstr
&MI
, std::tuple
<Register
, unsigned> &MatchInfo
) {
926 assert(MI
.getOpcode() == TargetOpcode::G_SEXT_INREG
);
928 unsigned ScalarSizeBits
;
929 std::tie(LoadReg
, ScalarSizeBits
) = MatchInfo
;
930 GLoad
*LoadDef
= cast
<GLoad
>(MRI
.getVRegDef(LoadReg
));
932 // If we have the following:
933 // %ld = G_LOAD %ptr, (load 2)
934 // %ext = G_SEXT_INREG %ld, 8
936 // %ld = G_SEXTLOAD %ptr (load 1)
938 auto &MMO
= LoadDef
->getMMO();
939 Builder
.setInstrAndDebugLoc(*LoadDef
);
940 auto &MF
= Builder
.getMF();
941 auto PtrInfo
= MMO
.getPointerInfo();
942 auto *NewMMO
= MF
.getMachineMemOperand(&MMO
, PtrInfo
, ScalarSizeBits
/ 8);
943 Builder
.buildLoadInstr(TargetOpcode::G_SEXTLOAD
, MI
.getOperand(0).getReg(),
944 LoadDef
->getPointerReg(), *NewMMO
);
945 MI
.eraseFromParent();
948 static Type
*getTypeForLLT(LLT Ty
, LLVMContext
&C
) {
950 return FixedVectorType::get(IntegerType::get(C
, Ty
.getScalarSizeInBits()),
951 Ty
.getNumElements());
952 return IntegerType::get(C
, Ty
.getSizeInBits());
955 /// Return true if 'MI' is a load or a store that may be fold it's address
956 /// operand into the load / store addressing mode.
957 static bool canFoldInAddressingMode(GLoadStore
*MI
, const TargetLowering
&TLI
,
958 MachineRegisterInfo
&MRI
) {
959 TargetLowering::AddrMode AM
;
960 auto *MF
= MI
->getMF();
961 auto *Addr
= getOpcodeDef
<GPtrAdd
>(MI
->getPointerReg(), MRI
);
965 AM
.HasBaseReg
= true;
966 if (auto CstOff
= getIConstantVRegVal(Addr
->getOffsetReg(), MRI
))
967 AM
.BaseOffs
= CstOff
->getSExtValue(); // [reg +/- imm]
969 AM
.Scale
= 1; // [reg +/- reg]
971 return TLI
.isLegalAddressingMode(
972 MF
->getDataLayout(), AM
,
973 getTypeForLLT(MI
->getMMO().getMemoryType(),
974 MF
->getFunction().getContext()),
975 MI
->getMMO().getAddrSpace());
978 static unsigned getIndexedOpc(unsigned LdStOpc
) {
980 case TargetOpcode::G_LOAD
:
981 return TargetOpcode::G_INDEXED_LOAD
;
982 case TargetOpcode::G_STORE
:
983 return TargetOpcode::G_INDEXED_STORE
;
984 case TargetOpcode::G_ZEXTLOAD
:
985 return TargetOpcode::G_INDEXED_ZEXTLOAD
;
986 case TargetOpcode::G_SEXTLOAD
:
987 return TargetOpcode::G_INDEXED_SEXTLOAD
;
989 llvm_unreachable("Unexpected opcode");
993 bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore
&LdSt
) const {
994 // Check for legality.
995 LLT PtrTy
= MRI
.getType(LdSt
.getPointerReg());
996 LLT Ty
= MRI
.getType(LdSt
.getReg(0));
997 LLT MemTy
= LdSt
.getMMO().getMemoryType();
998 SmallVector
<LegalityQuery::MemDesc
, 2> MemDescrs(
999 {{MemTy
, MemTy
.getSizeInBits(), AtomicOrdering::NotAtomic
}});
1000 unsigned IndexedOpc
= getIndexedOpc(LdSt
.getOpcode());
1001 SmallVector
<LLT
> OpTys
;
1002 if (IndexedOpc
== TargetOpcode::G_INDEXED_STORE
)
1003 OpTys
= {PtrTy
, Ty
, Ty
};
1005 OpTys
= {Ty
, PtrTy
}; // For G_INDEXED_LOAD, G_INDEXED_[SZ]EXTLOAD
1007 LegalityQuery
Q(IndexedOpc
, OpTys
, MemDescrs
);
1011 static cl::opt
<unsigned> PostIndexUseThreshold(
1012 "post-index-use-threshold", cl::Hidden
, cl::init(32),
1013 cl::desc("Number of uses of a base pointer to check before it is no longer "
1014 "considered for post-indexing."));
1016 bool CombinerHelper::findPostIndexCandidate(GLoadStore
&LdSt
, Register
&Addr
,
1017 Register
&Base
, Register
&Offset
,
1018 bool &RematOffset
) {
1019 // We're looking for the following pattern, for either load or store:
1020 // %baseptr:_(p0) = ...
1021 // G_STORE %val(s64), %baseptr(p0)
1022 // %offset:_(s64) = G_CONSTANT i64 -256
1023 // %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64)
1024 const auto &TLI
= getTargetLowering();
1026 Register Ptr
= LdSt
.getPointerReg();
1027 // If the store is the only use, don't bother.
1028 if (MRI
.hasOneNonDBGUse(Ptr
))
1031 if (!isIndexedLoadStoreLegal(LdSt
))
1034 if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX
, Ptr
, MRI
))
1037 MachineInstr
*StoredValDef
= getDefIgnoringCopies(LdSt
.getReg(0), MRI
);
1038 auto *PtrDef
= MRI
.getVRegDef(Ptr
);
1040 unsigned NumUsesChecked
= 0;
1041 for (auto &Use
: MRI
.use_nodbg_instructions(Ptr
)) {
1042 if (++NumUsesChecked
> PostIndexUseThreshold
)
1043 return false; // Try to avoid exploding compile time.
1045 auto *PtrAdd
= dyn_cast
<GPtrAdd
>(&Use
);
1046 // The use itself might be dead. This can happen during combines if DCE
1047 // hasn't had a chance to run yet. Don't allow it to form an indexed op.
1048 if (!PtrAdd
|| MRI
.use_nodbg_empty(PtrAdd
->getReg(0)))
1051 // Check the user of this isn't the store, otherwise we'd be generate a
1052 // indexed store defining its own use.
1053 if (StoredValDef
== &Use
)
1056 Offset
= PtrAdd
->getOffsetReg();
1057 if (!ForceLegalIndexing
&&
1058 !TLI
.isIndexingLegal(LdSt
, PtrAdd
->getBaseReg(), Offset
,
1059 /*IsPre*/ false, MRI
))
1062 // Make sure the offset calculation is before the potentially indexed op.
1063 MachineInstr
*OffsetDef
= MRI
.getVRegDef(Offset
);
1064 RematOffset
= false;
1065 if (!dominates(*OffsetDef
, LdSt
)) {
1066 // If the offset however is just a G_CONSTANT, we can always just
1067 // rematerialize it where we need it.
1068 if (OffsetDef
->getOpcode() != TargetOpcode::G_CONSTANT
)
1073 for (auto &BasePtrUse
: MRI
.use_nodbg_instructions(PtrAdd
->getBaseReg())) {
1074 if (&BasePtrUse
== PtrDef
)
1077 // If the user is a later load/store that can be post-indexed, then don't
1078 // combine this one.
1079 auto *BasePtrLdSt
= dyn_cast
<GLoadStore
>(&BasePtrUse
);
1080 if (BasePtrLdSt
&& BasePtrLdSt
!= &LdSt
&&
1081 dominates(LdSt
, *BasePtrLdSt
) &&
1082 isIndexedLoadStoreLegal(*BasePtrLdSt
))
1085 // Now we're looking for the key G_PTR_ADD instruction, which contains
1086 // the offset add that we want to fold.
1087 if (auto *BasePtrUseDef
= dyn_cast
<GPtrAdd
>(&BasePtrUse
)) {
1088 Register PtrAddDefReg
= BasePtrUseDef
->getReg(0);
1089 for (auto &BaseUseUse
: MRI
.use_nodbg_instructions(PtrAddDefReg
)) {
1090 // If the use is in a different block, then we may produce worse code
1091 // due to the extra register pressure.
1092 if (BaseUseUse
.getParent() != LdSt
.getParent())
1095 if (auto *UseUseLdSt
= dyn_cast
<GLoadStore
>(&BaseUseUse
))
1096 if (canFoldInAddressingMode(UseUseLdSt
, TLI
, MRI
))
1099 if (!dominates(LdSt
, BasePtrUse
))
1100 return false; // All use must be dominated by the load/store.
1104 Addr
= PtrAdd
->getReg(0);
1105 Base
= PtrAdd
->getBaseReg();
1112 bool CombinerHelper::findPreIndexCandidate(GLoadStore
&LdSt
, Register
&Addr
,
1113 Register
&Base
, Register
&Offset
) {
1114 auto &MF
= *LdSt
.getParent()->getParent();
1115 const auto &TLI
= *MF
.getSubtarget().getTargetLowering();
1117 Addr
= LdSt
.getPointerReg();
1118 if (!mi_match(Addr
, MRI
, m_GPtrAdd(m_Reg(Base
), m_Reg(Offset
))) ||
1119 MRI
.hasOneNonDBGUse(Addr
))
1122 if (!ForceLegalIndexing
&&
1123 !TLI
.isIndexingLegal(LdSt
, Base
, Offset
, /*IsPre*/ true, MRI
))
1126 if (!isIndexedLoadStoreLegal(LdSt
))
1129 MachineInstr
*BaseDef
= getDefIgnoringCopies(Base
, MRI
);
1130 if (BaseDef
->getOpcode() == TargetOpcode::G_FRAME_INDEX
)
1133 if (auto *St
= dyn_cast
<GStore
>(&LdSt
)) {
1134 // Would require a copy.
1135 if (Base
== St
->getValueReg())
1138 // We're expecting one use of Addr in MI, but it could also be the
1139 // value stored, which isn't actually dominated by the instruction.
1140 if (St
->getValueReg() == Addr
)
1144 // Avoid increasing cross-block register pressure.
1145 for (auto &AddrUse
: MRI
.use_nodbg_instructions(Addr
))
1146 if (AddrUse
.getParent() != LdSt
.getParent())
1149 // FIXME: check whether all uses of the base pointer are constant PtrAdds.
1150 // That might allow us to end base's liveness here by adjusting the constant.
1151 bool RealUse
= false;
1152 for (auto &AddrUse
: MRI
.use_nodbg_instructions(Addr
)) {
1153 if (!dominates(LdSt
, AddrUse
))
1154 return false; // All use must be dominated by the load/store.
1156 // If Ptr may be folded in addressing mode of other use, then it's
1157 // not profitable to do this transformation.
1158 if (auto *UseLdSt
= dyn_cast
<GLoadStore
>(&AddrUse
)) {
1159 if (!canFoldInAddressingMode(UseLdSt
, TLI
, MRI
))
1168 bool CombinerHelper::matchCombineExtractedVectorLoad(MachineInstr
&MI
,
1169 BuildFnTy
&MatchInfo
) {
1170 assert(MI
.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT
);
1172 // Check if there is a load that defines the vector being extracted from.
1173 auto *LoadMI
= getOpcodeDef
<GLoad
>(MI
.getOperand(1).getReg(), MRI
);
1177 Register Vector
= MI
.getOperand(1).getReg();
1178 LLT VecEltTy
= MRI
.getType(Vector
).getElementType();
1180 assert(MRI
.getType(MI
.getOperand(0).getReg()) == VecEltTy
);
1182 // Checking whether we should reduce the load width.
1183 if (!MRI
.hasOneNonDBGUse(Vector
))
1186 // Check if the defining load is simple.
1187 if (!LoadMI
->isSimple())
1190 // If the vector element type is not a multiple of a byte then we are unable
1191 // to correctly compute an address to load only the extracted element as a
1193 if (!VecEltTy
.isByteSized())
1196 // Check if the new load that we are going to create is legal
1197 // if we are in the post-legalization phase.
1198 MachineMemOperand MMO
= LoadMI
->getMMO();
1199 Align Alignment
= MMO
.getAlign();
1200 MachinePointerInfo PtrInfo
;
1203 // Finding the appropriate PtrInfo if offset is a known constant.
1204 // This is required to create the memory operand for the narrowed load.
1205 // This machine memory operand object helps us infer about legality
1206 // before we proceed to combine the instruction.
1207 if (auto CVal
= getIConstantVRegVal(Vector
, MRI
)) {
1208 int Elt
= CVal
->getZExtValue();
1209 // FIXME: should be (ABI size)*Elt.
1210 Offset
= VecEltTy
.getSizeInBits() * Elt
/ 8;
1211 PtrInfo
= MMO
.getPointerInfo().getWithOffset(Offset
);
1213 // Discard the pointer info except the address space because the memory
1214 // operand can't represent this new access since the offset is variable.
1215 Offset
= VecEltTy
.getSizeInBits() / 8;
1216 PtrInfo
= MachinePointerInfo(MMO
.getPointerInfo().getAddrSpace());
1219 Alignment
= commonAlignment(Alignment
, Offset
);
1221 Register VecPtr
= LoadMI
->getPointerReg();
1222 LLT PtrTy
= MRI
.getType(VecPtr
);
1224 MachineFunction
&MF
= *MI
.getMF();
1225 auto *NewMMO
= MF
.getMachineMemOperand(&MMO
, PtrInfo
, VecEltTy
);
1227 LegalityQuery::MemDesc
MMDesc(*NewMMO
);
1229 LegalityQuery Q
= {TargetOpcode::G_LOAD
, {VecEltTy
, PtrTy
}, {MMDesc
}};
1231 if (!isLegalOrBeforeLegalizer(Q
))
1234 // Load must be allowed and fast on the target.
1235 LLVMContext
&C
= MF
.getFunction().getContext();
1236 auto &DL
= MF
.getDataLayout();
1238 if (!getTargetLowering().allowsMemoryAccess(C
, DL
, VecEltTy
, *NewMMO
,
1243 Register Result
= MI
.getOperand(0).getReg();
1244 Register Index
= MI
.getOperand(2).getReg();
1246 MatchInfo
= [=](MachineIRBuilder
&B
) {
1247 GISelObserverWrapper DummyObserver
;
1248 LegalizerHelper
Helper(B
.getMF(), DummyObserver
, B
);
1249 //// Get pointer to the vector element.
1250 Register finalPtr
= Helper
.getVectorElementPointer(
1251 LoadMI
->getPointerReg(), MRI
.getType(LoadMI
->getOperand(0).getReg()),
1253 // New G_LOAD instruction.
1254 B
.buildLoad(Result
, finalPtr
, PtrInfo
, Alignment
);
1255 // Remove original GLOAD instruction.
1256 LoadMI
->eraseFromParent();
1262 bool CombinerHelper::matchCombineIndexedLoadStore(
1263 MachineInstr
&MI
, IndexedLoadStoreMatchInfo
&MatchInfo
) {
1264 auto &LdSt
= cast
<GLoadStore
>(MI
);
1266 if (LdSt
.isAtomic())
1269 MatchInfo
.IsPre
= findPreIndexCandidate(LdSt
, MatchInfo
.Addr
, MatchInfo
.Base
,
1271 if (!MatchInfo
.IsPre
&&
1272 !findPostIndexCandidate(LdSt
, MatchInfo
.Addr
, MatchInfo
.Base
,
1273 MatchInfo
.Offset
, MatchInfo
.RematOffset
))
1279 void CombinerHelper::applyCombineIndexedLoadStore(
1280 MachineInstr
&MI
, IndexedLoadStoreMatchInfo
&MatchInfo
) {
1281 MachineInstr
&AddrDef
= *MRI
.getUniqueVRegDef(MatchInfo
.Addr
);
1282 Builder
.setInstrAndDebugLoc(MI
);
1283 unsigned Opcode
= MI
.getOpcode();
1284 bool IsStore
= Opcode
== TargetOpcode::G_STORE
;
1285 unsigned NewOpcode
= getIndexedOpc(Opcode
);
1287 // If the offset constant didn't happen to dominate the load/store, we can
1288 // just clone it as needed.
1289 if (MatchInfo
.RematOffset
) {
1290 auto *OldCst
= MRI
.getVRegDef(MatchInfo
.Offset
);
1291 auto NewCst
= Builder
.buildConstant(MRI
.getType(MatchInfo
.Offset
),
1292 *OldCst
->getOperand(1).getCImm());
1293 MatchInfo
.Offset
= NewCst
.getReg(0);
1296 auto MIB
= Builder
.buildInstr(NewOpcode
);
1298 MIB
.addDef(MatchInfo
.Addr
);
1299 MIB
.addUse(MI
.getOperand(0).getReg());
1301 MIB
.addDef(MI
.getOperand(0).getReg());
1302 MIB
.addDef(MatchInfo
.Addr
);
1305 MIB
.addUse(MatchInfo
.Base
);
1306 MIB
.addUse(MatchInfo
.Offset
);
1307 MIB
.addImm(MatchInfo
.IsPre
);
1308 MIB
->cloneMemRefs(*MI
.getMF(), MI
);
1309 MI
.eraseFromParent();
1310 AddrDef
.eraseFromParent();
1312 LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
1315 bool CombinerHelper::matchCombineDivRem(MachineInstr
&MI
,
1316 MachineInstr
*&OtherMI
) {
1317 unsigned Opcode
= MI
.getOpcode();
1318 bool IsDiv
, IsSigned
;
1322 llvm_unreachable("Unexpected opcode!");
1323 case TargetOpcode::G_SDIV
:
1324 case TargetOpcode::G_UDIV
: {
1326 IsSigned
= Opcode
== TargetOpcode::G_SDIV
;
1329 case TargetOpcode::G_SREM
:
1330 case TargetOpcode::G_UREM
: {
1332 IsSigned
= Opcode
== TargetOpcode::G_SREM
;
1337 Register Src1
= MI
.getOperand(1).getReg();
1338 unsigned DivOpcode
, RemOpcode
, DivremOpcode
;
1340 DivOpcode
= TargetOpcode::G_SDIV
;
1341 RemOpcode
= TargetOpcode::G_SREM
;
1342 DivremOpcode
= TargetOpcode::G_SDIVREM
;
1344 DivOpcode
= TargetOpcode::G_UDIV
;
1345 RemOpcode
= TargetOpcode::G_UREM
;
1346 DivremOpcode
= TargetOpcode::G_UDIVREM
;
1349 if (!isLegalOrBeforeLegalizer({DivremOpcode
, {MRI
.getType(Src1
)}}))
1353 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1354 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1356 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1359 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1360 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1362 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1364 for (auto &UseMI
: MRI
.use_nodbg_instructions(Src1
)) {
1365 if (MI
.getParent() == UseMI
.getParent() &&
1366 ((IsDiv
&& UseMI
.getOpcode() == RemOpcode
) ||
1367 (!IsDiv
&& UseMI
.getOpcode() == DivOpcode
)) &&
1368 matchEqualDefs(MI
.getOperand(2), UseMI
.getOperand(2)) &&
1369 matchEqualDefs(MI
.getOperand(1), UseMI
.getOperand(1))) {
1378 void CombinerHelper::applyCombineDivRem(MachineInstr
&MI
,
1379 MachineInstr
*&OtherMI
) {
1380 unsigned Opcode
= MI
.getOpcode();
1381 assert(OtherMI
&& "OtherMI shouldn't be empty.");
1383 Register DestDivReg
, DestRemReg
;
1384 if (Opcode
== TargetOpcode::G_SDIV
|| Opcode
== TargetOpcode::G_UDIV
) {
1385 DestDivReg
= MI
.getOperand(0).getReg();
1386 DestRemReg
= OtherMI
->getOperand(0).getReg();
1388 DestDivReg
= OtherMI
->getOperand(0).getReg();
1389 DestRemReg
= MI
.getOperand(0).getReg();
1393 Opcode
== TargetOpcode::G_SDIV
|| Opcode
== TargetOpcode::G_SREM
;
1395 // Check which instruction is first in the block so we don't break def-use
1396 // deps by "moving" the instruction incorrectly. Also keep track of which
1397 // instruction is first so we pick it's operands, avoiding use-before-def
1399 MachineInstr
*FirstInst
;
1400 if (dominates(MI
, *OtherMI
)) {
1401 Builder
.setInstrAndDebugLoc(MI
);
1404 Builder
.setInstrAndDebugLoc(*OtherMI
);
1405 FirstInst
= OtherMI
;
1408 Builder
.buildInstr(IsSigned
? TargetOpcode::G_SDIVREM
1409 : TargetOpcode::G_UDIVREM
,
1410 {DestDivReg
, DestRemReg
},
1411 { FirstInst
->getOperand(1), FirstInst
->getOperand(2) });
1412 MI
.eraseFromParent();
1413 OtherMI
->eraseFromParent();
1416 bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr
&MI
,
1417 MachineInstr
*&BrCond
) {
1418 assert(MI
.getOpcode() == TargetOpcode::G_BR
);
1420 // Try to match the following:
1422 // G_BRCOND %c1, %bb2
1428 // The above pattern does not have a fall through to the successor bb2, always
1429 // resulting in a branch no matter which path is taken. Here we try to find
1430 // and replace that pattern with conditional branch to bb3 and otherwise
1431 // fallthrough to bb2. This is generally better for branch predictors.
1433 MachineBasicBlock
*MBB
= MI
.getParent();
1434 MachineBasicBlock::iterator
BrIt(MI
);
1435 if (BrIt
== MBB
->begin())
1437 assert(std::next(BrIt
) == MBB
->end() && "expected G_BR to be a terminator");
1439 BrCond
= &*std::prev(BrIt
);
1440 if (BrCond
->getOpcode() != TargetOpcode::G_BRCOND
)
1443 // Check that the next block is the conditional branch target. Also make sure
1444 // that it isn't the same as the G_BR's target (otherwise, this will loop.)
1445 MachineBasicBlock
*BrCondTarget
= BrCond
->getOperand(1).getMBB();
1446 return BrCondTarget
!= MI
.getOperand(0).getMBB() &&
1447 MBB
->isLayoutSuccessor(BrCondTarget
);
1450 void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr
&MI
,
1451 MachineInstr
*&BrCond
) {
1452 MachineBasicBlock
*BrTarget
= MI
.getOperand(0).getMBB();
1453 Builder
.setInstrAndDebugLoc(*BrCond
);
1454 LLT Ty
= MRI
.getType(BrCond
->getOperand(0).getReg());
1455 // FIXME: Does int/fp matter for this? If so, we might need to restrict
1456 // this to i1 only since we might not know for sure what kind of
1457 // compare generated the condition value.
1458 auto True
= Builder
.buildConstant(
1459 Ty
, getICmpTrueVal(getTargetLowering(), false, false));
1460 auto Xor
= Builder
.buildXor(Ty
, BrCond
->getOperand(0), True
);
1462 auto *FallthroughBB
= BrCond
->getOperand(1).getMBB();
1463 Observer
.changingInstr(MI
);
1464 MI
.getOperand(0).setMBB(FallthroughBB
);
1465 Observer
.changedInstr(MI
);
1467 // Change the conditional branch to use the inverted condition and
1468 // new target block.
1469 Observer
.changingInstr(*BrCond
);
1470 BrCond
->getOperand(0).setReg(Xor
.getReg(0));
1471 BrCond
->getOperand(1).setMBB(BrTarget
);
1472 Observer
.changedInstr(*BrCond
);
1476 bool CombinerHelper::tryEmitMemcpyInline(MachineInstr
&MI
) {
1477 MachineIRBuilder
HelperBuilder(MI
);
1478 GISelObserverWrapper DummyObserver
;
1479 LegalizerHelper
Helper(HelperBuilder
.getMF(), DummyObserver
, HelperBuilder
);
1480 return Helper
.lowerMemcpyInline(MI
) ==
1481 LegalizerHelper::LegalizeResult::Legalized
;
1484 bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr
&MI
, unsigned MaxLen
) {
1485 MachineIRBuilder
HelperBuilder(MI
);
1486 GISelObserverWrapper DummyObserver
;
1487 LegalizerHelper
Helper(HelperBuilder
.getMF(), DummyObserver
, HelperBuilder
);
1488 return Helper
.lowerMemCpyFamily(MI
, MaxLen
) ==
1489 LegalizerHelper::LegalizeResult::Legalized
;
1492 static APFloat
constantFoldFpUnary(const MachineInstr
&MI
,
1493 const MachineRegisterInfo
&MRI
,
1494 const APFloat
&Val
) {
1495 APFloat
Result(Val
);
1496 switch (MI
.getOpcode()) {
1498 llvm_unreachable("Unexpected opcode!");
1499 case TargetOpcode::G_FNEG
: {
1500 Result
.changeSign();
1503 case TargetOpcode::G_FABS
: {
1507 case TargetOpcode::G_FPTRUNC
: {
1509 LLT DstTy
= MRI
.getType(MI
.getOperand(0).getReg());
1510 Result
.convert(getFltSemanticForLLT(DstTy
), APFloat::rmNearestTiesToEven
,
1514 case TargetOpcode::G_FSQRT
: {
1516 Result
.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven
,
1518 Result
= APFloat(sqrt(Result
.convertToDouble()));
1521 case TargetOpcode::G_FLOG2
: {
1523 Result
.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven
,
1525 Result
= APFloat(log2(Result
.convertToDouble()));
1529 // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
1530 // `buildFConstant` will assert on size mismatch. Only `G_FSQRT`, and
1531 // `G_FLOG2` reach here.
1533 Result
.convert(Val
.getSemantics(), APFloat::rmNearestTiesToEven
, &Unused
);
1537 void CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr
&MI
,
1538 const ConstantFP
*Cst
) {
1539 Builder
.setInstrAndDebugLoc(MI
);
1540 APFloat Folded
= constantFoldFpUnary(MI
, MRI
, Cst
->getValue());
1541 const ConstantFP
*NewCst
= ConstantFP::get(Builder
.getContext(), Folded
);
1542 Builder
.buildFConstant(MI
.getOperand(0), *NewCst
);
1543 MI
.eraseFromParent();
1546 bool CombinerHelper::matchPtrAddImmedChain(MachineInstr
&MI
,
1547 PtrAddChain
&MatchInfo
) {
1548 // We're trying to match the following pattern:
1549 // %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1550 // %root = G_PTR_ADD %t1, G_CONSTANT imm2
1552 // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1554 if (MI
.getOpcode() != TargetOpcode::G_PTR_ADD
)
1557 Register Add2
= MI
.getOperand(1).getReg();
1558 Register Imm1
= MI
.getOperand(2).getReg();
1559 auto MaybeImmVal
= getIConstantVRegValWithLookThrough(Imm1
, MRI
);
1563 MachineInstr
*Add2Def
= MRI
.getVRegDef(Add2
);
1564 if (!Add2Def
|| Add2Def
->getOpcode() != TargetOpcode::G_PTR_ADD
)
1567 Register Base
= Add2Def
->getOperand(1).getReg();
1568 Register Imm2
= Add2Def
->getOperand(2).getReg();
1569 auto MaybeImm2Val
= getIConstantVRegValWithLookThrough(Imm2
, MRI
);
1573 // Check if the new combined immediate forms an illegal addressing mode.
1574 // Do not combine if it was legal before but would get illegal.
1575 // To do so, we need to find a load/store user of the pointer to get
1577 Type
*AccessTy
= nullptr;
1578 auto &MF
= *MI
.getMF();
1579 for (auto &UseMI
: MRI
.use_nodbg_instructions(MI
.getOperand(0).getReg())) {
1580 if (auto *LdSt
= dyn_cast
<GLoadStore
>(&UseMI
)) {
1581 AccessTy
= getTypeForLLT(MRI
.getType(LdSt
->getReg(0)),
1582 MF
.getFunction().getContext());
1586 TargetLoweringBase::AddrMode AMNew
;
1587 APInt CombinedImm
= MaybeImmVal
->Value
+ MaybeImm2Val
->Value
;
1588 AMNew
.BaseOffs
= CombinedImm
.getSExtValue();
1590 AMNew
.HasBaseReg
= true;
1591 TargetLoweringBase::AddrMode AMOld
;
1592 AMOld
.BaseOffs
= MaybeImmVal
->Value
.getSExtValue();
1593 AMOld
.HasBaseReg
= true;
1594 unsigned AS
= MRI
.getType(Add2
).getAddressSpace();
1595 const auto &TLI
= *MF
.getSubtarget().getTargetLowering();
1596 if (TLI
.isLegalAddressingMode(MF
.getDataLayout(), AMOld
, AccessTy
, AS
) &&
1597 !TLI
.isLegalAddressingMode(MF
.getDataLayout(), AMNew
, AccessTy
, AS
))
1601 // Pass the combined immediate to the apply function.
1602 MatchInfo
.Imm
= AMNew
.BaseOffs
;
1603 MatchInfo
.Base
= Base
;
1604 MatchInfo
.Bank
= getRegBank(Imm2
);
1608 void CombinerHelper::applyPtrAddImmedChain(MachineInstr
&MI
,
1609 PtrAddChain
&MatchInfo
) {
1610 assert(MI
.getOpcode() == TargetOpcode::G_PTR_ADD
&& "Expected G_PTR_ADD");
1611 MachineIRBuilder
MIB(MI
);
1612 LLT OffsetTy
= MRI
.getType(MI
.getOperand(2).getReg());
1613 auto NewOffset
= MIB
.buildConstant(OffsetTy
, MatchInfo
.Imm
);
1614 setRegBank(NewOffset
.getReg(0), MatchInfo
.Bank
);
1615 Observer
.changingInstr(MI
);
1616 MI
.getOperand(1).setReg(MatchInfo
.Base
);
1617 MI
.getOperand(2).setReg(NewOffset
.getReg(0));
1618 Observer
.changedInstr(MI
);
1621 bool CombinerHelper::matchShiftImmedChain(MachineInstr
&MI
,
1622 RegisterImmPair
&MatchInfo
) {
1623 // We're trying to match the following pattern with any of
1624 // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
1625 // %t1 = SHIFT %base, G_CONSTANT imm1
1626 // %root = SHIFT %t1, G_CONSTANT imm2
1628 // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
1630 unsigned Opcode
= MI
.getOpcode();
1631 assert((Opcode
== TargetOpcode::G_SHL
|| Opcode
== TargetOpcode::G_ASHR
||
1632 Opcode
== TargetOpcode::G_LSHR
|| Opcode
== TargetOpcode::G_SSHLSAT
||
1633 Opcode
== TargetOpcode::G_USHLSAT
) &&
1634 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1636 Register Shl2
= MI
.getOperand(1).getReg();
1637 Register Imm1
= MI
.getOperand(2).getReg();
1638 auto MaybeImmVal
= getIConstantVRegValWithLookThrough(Imm1
, MRI
);
1642 MachineInstr
*Shl2Def
= MRI
.getUniqueVRegDef(Shl2
);
1643 if (Shl2Def
->getOpcode() != Opcode
)
1646 Register Base
= Shl2Def
->getOperand(1).getReg();
1647 Register Imm2
= Shl2Def
->getOperand(2).getReg();
1648 auto MaybeImm2Val
= getIConstantVRegValWithLookThrough(Imm2
, MRI
);
1652 // Pass the combined immediate to the apply function.
1654 (MaybeImmVal
->Value
.getZExtValue() + MaybeImm2Val
->Value
).getZExtValue();
1655 MatchInfo
.Reg
= Base
;
1657 // There is no simple replacement for a saturating unsigned left shift that
1658 // exceeds the scalar size.
1659 if (Opcode
== TargetOpcode::G_USHLSAT
&&
1660 MatchInfo
.Imm
>= MRI
.getType(Shl2
).getScalarSizeInBits())
1666 void CombinerHelper::applyShiftImmedChain(MachineInstr
&MI
,
1667 RegisterImmPair
&MatchInfo
) {
1668 unsigned Opcode
= MI
.getOpcode();
1669 assert((Opcode
== TargetOpcode::G_SHL
|| Opcode
== TargetOpcode::G_ASHR
||
1670 Opcode
== TargetOpcode::G_LSHR
|| Opcode
== TargetOpcode::G_SSHLSAT
||
1671 Opcode
== TargetOpcode::G_USHLSAT
) &&
1672 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1674 Builder
.setInstrAndDebugLoc(MI
);
1675 LLT Ty
= MRI
.getType(MI
.getOperand(1).getReg());
1676 unsigned const ScalarSizeInBits
= Ty
.getScalarSizeInBits();
1677 auto Imm
= MatchInfo
.Imm
;
1679 if (Imm
>= ScalarSizeInBits
) {
1680 // Any logical shift that exceeds scalar size will produce zero.
1681 if (Opcode
== TargetOpcode::G_SHL
|| Opcode
== TargetOpcode::G_LSHR
) {
1682 Builder
.buildConstant(MI
.getOperand(0), 0);
1683 MI
.eraseFromParent();
1686 // Arithmetic shift and saturating signed left shift have no effect beyond
1688 Imm
= ScalarSizeInBits
- 1;
1691 LLT ImmTy
= MRI
.getType(MI
.getOperand(2).getReg());
1692 Register NewImm
= Builder
.buildConstant(ImmTy
, Imm
).getReg(0);
1693 Observer
.changingInstr(MI
);
1694 MI
.getOperand(1).setReg(MatchInfo
.Reg
);
1695 MI
.getOperand(2).setReg(NewImm
);
1696 Observer
.changedInstr(MI
);
1699 bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr
&MI
,
1700 ShiftOfShiftedLogic
&MatchInfo
) {
1701 // We're trying to match the following pattern with any of
1702 // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
1703 // with any of G_AND/G_OR/G_XOR logic instructions.
1704 // %t1 = SHIFT %X, G_CONSTANT C0
1705 // %t2 = LOGIC %t1, %Y
1706 // %root = SHIFT %t2, G_CONSTANT C1
1708 // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
1709 // %t4 = SHIFT %Y, G_CONSTANT C1
1710 // %root = LOGIC %t3, %t4
1711 unsigned ShiftOpcode
= MI
.getOpcode();
1712 assert((ShiftOpcode
== TargetOpcode::G_SHL
||
1713 ShiftOpcode
== TargetOpcode::G_ASHR
||
1714 ShiftOpcode
== TargetOpcode::G_LSHR
||
1715 ShiftOpcode
== TargetOpcode::G_USHLSAT
||
1716 ShiftOpcode
== TargetOpcode::G_SSHLSAT
) &&
1717 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1719 // Match a one-use bitwise logic op.
1720 Register LogicDest
= MI
.getOperand(1).getReg();
1721 if (!MRI
.hasOneNonDBGUse(LogicDest
))
1724 MachineInstr
*LogicMI
= MRI
.getUniqueVRegDef(LogicDest
);
1725 unsigned LogicOpcode
= LogicMI
->getOpcode();
1726 if (LogicOpcode
!= TargetOpcode::G_AND
&& LogicOpcode
!= TargetOpcode::G_OR
&&
1727 LogicOpcode
!= TargetOpcode::G_XOR
)
1730 // Find a matching one-use shift by constant.
1731 const Register C1
= MI
.getOperand(2).getReg();
1732 auto MaybeImmVal
= getIConstantVRegValWithLookThrough(C1
, MRI
);
1733 if (!MaybeImmVal
|| MaybeImmVal
->Value
== 0)
1736 const uint64_t C1Val
= MaybeImmVal
->Value
.getZExtValue();
1738 auto matchFirstShift
= [&](const MachineInstr
*MI
, uint64_t &ShiftVal
) {
1739 // Shift should match previous one and should be a one-use.
1740 if (MI
->getOpcode() != ShiftOpcode
||
1741 !MRI
.hasOneNonDBGUse(MI
->getOperand(0).getReg()))
1744 // Must be a constant.
1746 getIConstantVRegValWithLookThrough(MI
->getOperand(2).getReg(), MRI
);
1750 ShiftVal
= MaybeImmVal
->Value
.getSExtValue();
1754 // Logic ops are commutative, so check each operand for a match.
1755 Register LogicMIReg1
= LogicMI
->getOperand(1).getReg();
1756 MachineInstr
*LogicMIOp1
= MRI
.getUniqueVRegDef(LogicMIReg1
);
1757 Register LogicMIReg2
= LogicMI
->getOperand(2).getReg();
1758 MachineInstr
*LogicMIOp2
= MRI
.getUniqueVRegDef(LogicMIReg2
);
1761 if (matchFirstShift(LogicMIOp1
, C0Val
)) {
1762 MatchInfo
.LogicNonShiftReg
= LogicMIReg2
;
1763 MatchInfo
.Shift2
= LogicMIOp1
;
1764 } else if (matchFirstShift(LogicMIOp2
, C0Val
)) {
1765 MatchInfo
.LogicNonShiftReg
= LogicMIReg1
;
1766 MatchInfo
.Shift2
= LogicMIOp2
;
1770 MatchInfo
.ValSum
= C0Val
+ C1Val
;
1772 // The fold is not valid if the sum of the shift values exceeds bitwidth.
1773 if (MatchInfo
.ValSum
>= MRI
.getType(LogicDest
).getScalarSizeInBits())
1776 MatchInfo
.Logic
= LogicMI
;
1780 void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr
&MI
,
1781 ShiftOfShiftedLogic
&MatchInfo
) {
1782 unsigned Opcode
= MI
.getOpcode();
1783 assert((Opcode
== TargetOpcode::G_SHL
|| Opcode
== TargetOpcode::G_ASHR
||
1784 Opcode
== TargetOpcode::G_LSHR
|| Opcode
== TargetOpcode::G_USHLSAT
||
1785 Opcode
== TargetOpcode::G_SSHLSAT
) &&
1786 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1788 LLT ShlType
= MRI
.getType(MI
.getOperand(2).getReg());
1789 LLT DestType
= MRI
.getType(MI
.getOperand(0).getReg());
1790 Builder
.setInstrAndDebugLoc(MI
);
1792 Register Const
= Builder
.buildConstant(ShlType
, MatchInfo
.ValSum
).getReg(0);
1794 Register Shift1Base
= MatchInfo
.Shift2
->getOperand(1).getReg();
1796 Builder
.buildInstr(Opcode
, {DestType
}, {Shift1Base
, Const
}).getReg(0);
1798 // If LogicNonShiftReg is the same to Shift1Base, and shift1 const is the same
1799 // to MatchInfo.Shift2 const, CSEMIRBuilder will reuse the old shift1 when
1800 // build shift2. So, if we erase MatchInfo.Shift2 at the end, actually we
1801 // remove old shift1. And it will cause crash later. So erase it earlier to
1803 MatchInfo
.Shift2
->eraseFromParent();
1805 Register Shift2Const
= MI
.getOperand(2).getReg();
1806 Register Shift2
= Builder
1807 .buildInstr(Opcode
, {DestType
},
1808 {MatchInfo
.LogicNonShiftReg
, Shift2Const
})
1811 Register Dest
= MI
.getOperand(0).getReg();
1812 Builder
.buildInstr(MatchInfo
.Logic
->getOpcode(), {Dest
}, {Shift1
, Shift2
});
1814 // This was one use so it's safe to remove it.
1815 MatchInfo
.Logic
->eraseFromParent();
1817 MI
.eraseFromParent();
1820 bool CombinerHelper::matchCommuteShift(MachineInstr
&MI
, BuildFnTy
&MatchInfo
) {
1821 assert(MI
.getOpcode() == TargetOpcode::G_SHL
&& "Expected G_SHL");
1822 // Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
1823 // Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
1824 auto &Shl
= cast
<GenericMachineInstr
>(MI
);
1825 Register DstReg
= Shl
.getReg(0);
1826 Register SrcReg
= Shl
.getReg(1);
1827 Register ShiftReg
= Shl
.getReg(2);
1830 if (!getTargetLowering().isDesirableToCommuteWithShift(MI
, !isPreLegalize()))
1833 if (!mi_match(SrcReg
, MRI
,
1834 m_OneNonDBGUse(m_any_of(m_GAdd(m_Reg(X
), m_Reg(C1
)),
1835 m_GOr(m_Reg(X
), m_Reg(C1
))))))
1839 if (!mi_match(C1
, MRI
, m_ICstOrSplat(C1Val
)) ||
1840 !mi_match(ShiftReg
, MRI
, m_ICstOrSplat(C2Val
)))
1843 auto *SrcDef
= MRI
.getVRegDef(SrcReg
);
1844 assert((SrcDef
->getOpcode() == TargetOpcode::G_ADD
||
1845 SrcDef
->getOpcode() == TargetOpcode::G_OR
) && "Unexpected op");
1846 LLT SrcTy
= MRI
.getType(SrcReg
);
1847 MatchInfo
= [=](MachineIRBuilder
&B
) {
1848 auto S1
= B
.buildShl(SrcTy
, X
, ShiftReg
);
1849 auto S2
= B
.buildShl(SrcTy
, C1
, ShiftReg
);
1850 B
.buildInstr(SrcDef
->getOpcode(), {DstReg
}, {S1
, S2
});
1855 bool CombinerHelper::matchCombineMulToShl(MachineInstr
&MI
,
1856 unsigned &ShiftVal
) {
1857 assert(MI
.getOpcode() == TargetOpcode::G_MUL
&& "Expected a G_MUL");
1859 getIConstantVRegValWithLookThrough(MI
.getOperand(2).getReg(), MRI
);
1863 ShiftVal
= MaybeImmVal
->Value
.exactLogBase2();
1864 return (static_cast<int32_t>(ShiftVal
) != -1);
1867 void CombinerHelper::applyCombineMulToShl(MachineInstr
&MI
,
1868 unsigned &ShiftVal
) {
1869 assert(MI
.getOpcode() == TargetOpcode::G_MUL
&& "Expected a G_MUL");
1870 MachineIRBuilder
MIB(MI
);
1871 LLT ShiftTy
= MRI
.getType(MI
.getOperand(0).getReg());
1872 auto ShiftCst
= MIB
.buildConstant(ShiftTy
, ShiftVal
);
1873 Observer
.changingInstr(MI
);
1874 MI
.setDesc(MIB
.getTII().get(TargetOpcode::G_SHL
));
1875 MI
.getOperand(2).setReg(ShiftCst
.getReg(0));
1876 Observer
.changedInstr(MI
);
1879 // shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
1880 bool CombinerHelper::matchCombineShlOfExtend(MachineInstr
&MI
,
1881 RegisterImmPair
&MatchData
) {
1882 assert(MI
.getOpcode() == TargetOpcode::G_SHL
&& KB
);
1883 if (!getTargetLowering().isDesirableToPullExtFromShl(MI
))
1886 Register LHS
= MI
.getOperand(1).getReg();
1889 if (!mi_match(LHS
, MRI
, m_GAnyExt(m_Reg(ExtSrc
))) &&
1890 !mi_match(LHS
, MRI
, m_GZExt(m_Reg(ExtSrc
))) &&
1891 !mi_match(LHS
, MRI
, m_GSExt(m_Reg(ExtSrc
))))
1894 Register RHS
= MI
.getOperand(2).getReg();
1895 MachineInstr
*MIShiftAmt
= MRI
.getVRegDef(RHS
);
1896 auto MaybeShiftAmtVal
= isConstantOrConstantSplatVector(*MIShiftAmt
, MRI
);
1897 if (!MaybeShiftAmtVal
)
1901 LLT SrcTy
= MRI
.getType(ExtSrc
);
1903 // We only really care about the legality with the shifted value. We can
1904 // pick any type the constant shift amount, so ask the target what to
1905 // use. Otherwise we would have to guess and hope it is reported as legal.
1906 LLT ShiftAmtTy
= getTargetLowering().getPreferredShiftAmountTy(SrcTy
);
1907 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL
, {SrcTy
, ShiftAmtTy
}}))
1911 int64_t ShiftAmt
= MaybeShiftAmtVal
->getSExtValue();
1912 MatchData
.Reg
= ExtSrc
;
1913 MatchData
.Imm
= ShiftAmt
;
1915 unsigned MinLeadingZeros
= KB
->getKnownZeroes(ExtSrc
).countl_one();
1916 unsigned SrcTySize
= MRI
.getType(ExtSrc
).getScalarSizeInBits();
1917 return MinLeadingZeros
>= ShiftAmt
&& ShiftAmt
< SrcTySize
;
1920 void CombinerHelper::applyCombineShlOfExtend(MachineInstr
&MI
,
1921 const RegisterImmPair
&MatchData
) {
1922 Register ExtSrcReg
= MatchData
.Reg
;
1923 int64_t ShiftAmtVal
= MatchData
.Imm
;
1925 LLT ExtSrcTy
= MRI
.getType(ExtSrcReg
);
1926 Builder
.setInstrAndDebugLoc(MI
);
1927 auto ShiftAmt
= Builder
.buildConstant(ExtSrcTy
, ShiftAmtVal
);
1929 Builder
.buildShl(ExtSrcTy
, ExtSrcReg
, ShiftAmt
, MI
.getFlags());
1930 Builder
.buildZExt(MI
.getOperand(0), NarrowShift
);
1931 MI
.eraseFromParent();
1934 bool CombinerHelper::matchCombineMergeUnmerge(MachineInstr
&MI
,
1935 Register
&MatchInfo
) {
1936 GMerge
&Merge
= cast
<GMerge
>(MI
);
1937 SmallVector
<Register
, 16> MergedValues
;
1938 for (unsigned I
= 0; I
< Merge
.getNumSources(); ++I
)
1939 MergedValues
.emplace_back(Merge
.getSourceReg(I
));
1941 auto *Unmerge
= getOpcodeDef
<GUnmerge
>(MergedValues
[0], MRI
);
1942 if (!Unmerge
|| Unmerge
->getNumDefs() != Merge
.getNumSources())
1945 for (unsigned I
= 0; I
< MergedValues
.size(); ++I
)
1946 if (MergedValues
[I
] != Unmerge
->getReg(I
))
1949 MatchInfo
= Unmerge
->getSourceReg();
1953 static Register
peekThroughBitcast(Register Reg
,
1954 const MachineRegisterInfo
&MRI
) {
1955 while (mi_match(Reg
, MRI
, m_GBitcast(m_Reg(Reg
))))
1961 bool CombinerHelper::matchCombineUnmergeMergeToPlainValues(
1962 MachineInstr
&MI
, SmallVectorImpl
<Register
> &Operands
) {
1963 assert(MI
.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
&&
1964 "Expected an unmerge");
1965 auto &Unmerge
= cast
<GUnmerge
>(MI
);
1966 Register SrcReg
= peekThroughBitcast(Unmerge
.getSourceReg(), MRI
);
1968 auto *SrcInstr
= getOpcodeDef
<GMergeLikeInstr
>(SrcReg
, MRI
);
1972 // Check the source type of the merge.
1973 LLT SrcMergeTy
= MRI
.getType(SrcInstr
->getSourceReg(0));
1974 LLT Dst0Ty
= MRI
.getType(Unmerge
.getReg(0));
1975 bool SameSize
= Dst0Ty
.getSizeInBits() == SrcMergeTy
.getSizeInBits();
1976 if (SrcMergeTy
!= Dst0Ty
&& !SameSize
)
1978 // They are the same now (modulo a bitcast).
1979 // We can collect all the src registers.
1980 for (unsigned Idx
= 0; Idx
< SrcInstr
->getNumSources(); ++Idx
)
1981 Operands
.push_back(SrcInstr
->getSourceReg(Idx
));
1985 void CombinerHelper::applyCombineUnmergeMergeToPlainValues(
1986 MachineInstr
&MI
, SmallVectorImpl
<Register
> &Operands
) {
1987 assert(MI
.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
&&
1988 "Expected an unmerge");
1989 assert((MI
.getNumOperands() - 1 == Operands
.size()) &&
1990 "Not enough operands to replace all defs");
1991 unsigned NumElems
= MI
.getNumOperands() - 1;
1993 LLT SrcTy
= MRI
.getType(Operands
[0]);
1994 LLT DstTy
= MRI
.getType(MI
.getOperand(0).getReg());
1995 bool CanReuseInputDirectly
= DstTy
== SrcTy
;
1996 Builder
.setInstrAndDebugLoc(MI
);
1997 for (unsigned Idx
= 0; Idx
< NumElems
; ++Idx
) {
1998 Register DstReg
= MI
.getOperand(Idx
).getReg();
1999 Register SrcReg
= Operands
[Idx
];
2001 // This combine may run after RegBankSelect, so we need to be aware of
2003 const auto &DstCB
= MRI
.getRegClassOrRegBank(DstReg
);
2004 if (!DstCB
.isNull() && DstCB
!= MRI
.getRegClassOrRegBank(SrcReg
)) {
2005 SrcReg
= Builder
.buildCopy(MRI
.getType(SrcReg
), SrcReg
).getReg(0);
2006 MRI
.setRegClassOrRegBank(SrcReg
, DstCB
);
2009 if (CanReuseInputDirectly
)
2010 replaceRegWith(MRI
, DstReg
, SrcReg
);
2012 Builder
.buildCast(DstReg
, SrcReg
);
2014 MI
.eraseFromParent();
2017 bool CombinerHelper::matchCombineUnmergeConstant(MachineInstr
&MI
,
2018 SmallVectorImpl
<APInt
> &Csts
) {
2019 unsigned SrcIdx
= MI
.getNumOperands() - 1;
2020 Register SrcReg
= MI
.getOperand(SrcIdx
).getReg();
2021 MachineInstr
*SrcInstr
= MRI
.getVRegDef(SrcReg
);
2022 if (SrcInstr
->getOpcode() != TargetOpcode::G_CONSTANT
&&
2023 SrcInstr
->getOpcode() != TargetOpcode::G_FCONSTANT
)
2025 // Break down the big constant in smaller ones.
2026 const MachineOperand
&CstVal
= SrcInstr
->getOperand(1);
2027 APInt Val
= SrcInstr
->getOpcode() == TargetOpcode::G_CONSTANT
2028 ? CstVal
.getCImm()->getValue()
2029 : CstVal
.getFPImm()->getValueAPF().bitcastToAPInt();
2031 LLT Dst0Ty
= MRI
.getType(MI
.getOperand(0).getReg());
2032 unsigned ShiftAmt
= Dst0Ty
.getSizeInBits();
2033 // Unmerge a constant.
2034 for (unsigned Idx
= 0; Idx
!= SrcIdx
; ++Idx
) {
2035 Csts
.emplace_back(Val
.trunc(ShiftAmt
));
2036 Val
= Val
.lshr(ShiftAmt
);
2042 void CombinerHelper::applyCombineUnmergeConstant(MachineInstr
&MI
,
2043 SmallVectorImpl
<APInt
> &Csts
) {
2044 assert(MI
.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
&&
2045 "Expected an unmerge");
2046 assert((MI
.getNumOperands() - 1 == Csts
.size()) &&
2047 "Not enough operands to replace all defs");
2048 unsigned NumElems
= MI
.getNumOperands() - 1;
2049 Builder
.setInstrAndDebugLoc(MI
);
2050 for (unsigned Idx
= 0; Idx
< NumElems
; ++Idx
) {
2051 Register DstReg
= MI
.getOperand(Idx
).getReg();
2052 Builder
.buildConstant(DstReg
, Csts
[Idx
]);
2055 MI
.eraseFromParent();
2058 bool CombinerHelper::matchCombineUnmergeUndef(
2059 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
2060 unsigned SrcIdx
= MI
.getNumOperands() - 1;
2061 Register SrcReg
= MI
.getOperand(SrcIdx
).getReg();
2062 MatchInfo
= [&MI
](MachineIRBuilder
&B
) {
2063 unsigned NumElems
= MI
.getNumOperands() - 1;
2064 for (unsigned Idx
= 0; Idx
< NumElems
; ++Idx
) {
2065 Register DstReg
= MI
.getOperand(Idx
).getReg();
2066 B
.buildUndef(DstReg
);
2069 return isa
<GImplicitDef
>(MRI
.getVRegDef(SrcReg
));
2072 bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr
&MI
) {
2073 assert(MI
.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
&&
2074 "Expected an unmerge");
2075 // Check that all the lanes are dead except the first one.
2076 for (unsigned Idx
= 1, EndIdx
= MI
.getNumDefs(); Idx
!= EndIdx
; ++Idx
) {
2077 if (!MRI
.use_nodbg_empty(MI
.getOperand(Idx
).getReg()))
2083 void CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr
&MI
) {
2084 Builder
.setInstrAndDebugLoc(MI
);
2085 Register SrcReg
= MI
.getOperand(MI
.getNumDefs()).getReg();
2086 // Truncating a vector is going to truncate every single lane,
2087 // whereas we want the full lowbits.
2088 // Do the operation on a scalar instead.
2089 LLT SrcTy
= MRI
.getType(SrcReg
);
2090 if (SrcTy
.isVector())
2092 Builder
.buildCast(LLT::scalar(SrcTy
.getSizeInBits()), SrcReg
).getReg(0);
2094 Register Dst0Reg
= MI
.getOperand(0).getReg();
2095 LLT Dst0Ty
= MRI
.getType(Dst0Reg
);
2096 if (Dst0Ty
.isVector()) {
2097 auto MIB
= Builder
.buildTrunc(LLT::scalar(Dst0Ty
.getSizeInBits()), SrcReg
);
2098 Builder
.buildCast(Dst0Reg
, MIB
);
2100 Builder
.buildTrunc(Dst0Reg
, SrcReg
);
2101 MI
.eraseFromParent();
2104 bool CombinerHelper::matchCombineUnmergeZExtToZExt(MachineInstr
&MI
) {
2105 assert(MI
.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
&&
2106 "Expected an unmerge");
2107 Register Dst0Reg
= MI
.getOperand(0).getReg();
2108 LLT Dst0Ty
= MRI
.getType(Dst0Reg
);
2109 // G_ZEXT on vector applies to each lane, so it will
2110 // affect all destinations. Therefore we won't be able
2111 // to simplify the unmerge to just the first definition.
2112 if (Dst0Ty
.isVector())
2114 Register SrcReg
= MI
.getOperand(MI
.getNumDefs()).getReg();
2115 LLT SrcTy
= MRI
.getType(SrcReg
);
2116 if (SrcTy
.isVector())
2119 Register ZExtSrcReg
;
2120 if (!mi_match(SrcReg
, MRI
, m_GZExt(m_Reg(ZExtSrcReg
))))
2123 // Finally we can replace the first definition with
2124 // a zext of the source if the definition is big enough to hold
2125 // all of ZExtSrc bits.
2126 LLT ZExtSrcTy
= MRI
.getType(ZExtSrcReg
);
2127 return ZExtSrcTy
.getSizeInBits() <= Dst0Ty
.getSizeInBits();
2130 void CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr
&MI
) {
2131 assert(MI
.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
&&
2132 "Expected an unmerge");
2134 Register Dst0Reg
= MI
.getOperand(0).getReg();
2136 MachineInstr
*ZExtInstr
=
2137 MRI
.getVRegDef(MI
.getOperand(MI
.getNumDefs()).getReg());
2138 assert(ZExtInstr
&& ZExtInstr
->getOpcode() == TargetOpcode::G_ZEXT
&&
2139 "Expecting a G_ZEXT");
2141 Register ZExtSrcReg
= ZExtInstr
->getOperand(1).getReg();
2142 LLT Dst0Ty
= MRI
.getType(Dst0Reg
);
2143 LLT ZExtSrcTy
= MRI
.getType(ZExtSrcReg
);
2145 Builder
.setInstrAndDebugLoc(MI
);
2147 if (Dst0Ty
.getSizeInBits() > ZExtSrcTy
.getSizeInBits()) {
2148 Builder
.buildZExt(Dst0Reg
, ZExtSrcReg
);
2150 assert(Dst0Ty
.getSizeInBits() == ZExtSrcTy
.getSizeInBits() &&
2151 "ZExt src doesn't fit in destination");
2152 replaceRegWith(MRI
, Dst0Reg
, ZExtSrcReg
);
2156 for (unsigned Idx
= 1, EndIdx
= MI
.getNumDefs(); Idx
!= EndIdx
; ++Idx
) {
2158 ZeroReg
= Builder
.buildConstant(Dst0Ty
, 0).getReg(0);
2159 replaceRegWith(MRI
, MI
.getOperand(Idx
).getReg(), ZeroReg
);
2161 MI
.eraseFromParent();
2164 bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr
&MI
,
2165 unsigned TargetShiftSize
,
2166 unsigned &ShiftVal
) {
2167 assert((MI
.getOpcode() == TargetOpcode::G_SHL
||
2168 MI
.getOpcode() == TargetOpcode::G_LSHR
||
2169 MI
.getOpcode() == TargetOpcode::G_ASHR
) && "Expected a shift");
2171 LLT Ty
= MRI
.getType(MI
.getOperand(0).getReg());
2172 if (Ty
.isVector()) // TODO:
2175 // Don't narrow further than the requested size.
2176 unsigned Size
= Ty
.getSizeInBits();
2177 if (Size
<= TargetShiftSize
)
2181 getIConstantVRegValWithLookThrough(MI
.getOperand(2).getReg(), MRI
);
2185 ShiftVal
= MaybeImmVal
->Value
.getSExtValue();
2186 return ShiftVal
>= Size
/ 2 && ShiftVal
< Size
;
2189 void CombinerHelper::applyCombineShiftToUnmerge(MachineInstr
&MI
,
2190 const unsigned &ShiftVal
) {
2191 Register DstReg
= MI
.getOperand(0).getReg();
2192 Register SrcReg
= MI
.getOperand(1).getReg();
2193 LLT Ty
= MRI
.getType(SrcReg
);
2194 unsigned Size
= Ty
.getSizeInBits();
2195 unsigned HalfSize
= Size
/ 2;
2196 assert(ShiftVal
>= HalfSize
);
2198 LLT HalfTy
= LLT::scalar(HalfSize
);
2200 Builder
.setInstr(MI
);
2201 auto Unmerge
= Builder
.buildUnmerge(HalfTy
, SrcReg
);
2202 unsigned NarrowShiftAmt
= ShiftVal
- HalfSize
;
2204 if (MI
.getOpcode() == TargetOpcode::G_LSHR
) {
2205 Register Narrowed
= Unmerge
.getReg(1);
2207 // dst = G_LSHR s64:x, C for C >= 32
2209 // lo, hi = G_UNMERGE_VALUES x
2210 // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
2212 if (NarrowShiftAmt
!= 0) {
2213 Narrowed
= Builder
.buildLShr(HalfTy
, Narrowed
,
2214 Builder
.buildConstant(HalfTy
, NarrowShiftAmt
)).getReg(0);
2217 auto Zero
= Builder
.buildConstant(HalfTy
, 0);
2218 Builder
.buildMergeLikeInstr(DstReg
, {Narrowed
, Zero
});
2219 } else if (MI
.getOpcode() == TargetOpcode::G_SHL
) {
2220 Register Narrowed
= Unmerge
.getReg(0);
2221 // dst = G_SHL s64:x, C for C >= 32
2223 // lo, hi = G_UNMERGE_VALUES x
2224 // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
2225 if (NarrowShiftAmt
!= 0) {
2226 Narrowed
= Builder
.buildShl(HalfTy
, Narrowed
,
2227 Builder
.buildConstant(HalfTy
, NarrowShiftAmt
)).getReg(0);
2230 auto Zero
= Builder
.buildConstant(HalfTy
, 0);
2231 Builder
.buildMergeLikeInstr(DstReg
, {Zero
, Narrowed
});
2233 assert(MI
.getOpcode() == TargetOpcode::G_ASHR
);
2234 auto Hi
= Builder
.buildAShr(
2235 HalfTy
, Unmerge
.getReg(1),
2236 Builder
.buildConstant(HalfTy
, HalfSize
- 1));
2238 if (ShiftVal
== HalfSize
) {
2239 // (G_ASHR i64:x, 32) ->
2240 // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
2241 Builder
.buildMergeLikeInstr(DstReg
, {Unmerge
.getReg(1), Hi
});
2242 } else if (ShiftVal
== Size
- 1) {
2243 // Don't need a second shift.
2244 // (G_ASHR i64:x, 63) ->
2245 // %narrowed = (G_ASHR hi_32(x), 31)
2246 // G_MERGE_VALUES %narrowed, %narrowed
2247 Builder
.buildMergeLikeInstr(DstReg
, {Hi
, Hi
});
2249 auto Lo
= Builder
.buildAShr(
2250 HalfTy
, Unmerge
.getReg(1),
2251 Builder
.buildConstant(HalfTy
, ShiftVal
- HalfSize
));
2253 // (G_ASHR i64:x, C) ->, for C >= 32
2254 // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
2255 Builder
.buildMergeLikeInstr(DstReg
, {Lo
, Hi
});
2259 MI
.eraseFromParent();
2262 bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr
&MI
,
2263 unsigned TargetShiftAmount
) {
2265 if (matchCombineShiftToUnmerge(MI
, TargetShiftAmount
, ShiftAmt
)) {
2266 applyCombineShiftToUnmerge(MI
, ShiftAmt
);
2273 bool CombinerHelper::matchCombineI2PToP2I(MachineInstr
&MI
, Register
&Reg
) {
2274 assert(MI
.getOpcode() == TargetOpcode::G_INTTOPTR
&& "Expected a G_INTTOPTR");
2275 Register DstReg
= MI
.getOperand(0).getReg();
2276 LLT DstTy
= MRI
.getType(DstReg
);
2277 Register SrcReg
= MI
.getOperand(1).getReg();
2278 return mi_match(SrcReg
, MRI
,
2279 m_GPtrToInt(m_all_of(m_SpecificType(DstTy
), m_Reg(Reg
))));
2282 void CombinerHelper::applyCombineI2PToP2I(MachineInstr
&MI
, Register
&Reg
) {
2283 assert(MI
.getOpcode() == TargetOpcode::G_INTTOPTR
&& "Expected a G_INTTOPTR");
2284 Register DstReg
= MI
.getOperand(0).getReg();
2285 Builder
.setInstr(MI
);
2286 Builder
.buildCopy(DstReg
, Reg
);
2287 MI
.eraseFromParent();
2290 void CombinerHelper::applyCombineP2IToI2P(MachineInstr
&MI
, Register
&Reg
) {
2291 assert(MI
.getOpcode() == TargetOpcode::G_PTRTOINT
&& "Expected a G_PTRTOINT");
2292 Register DstReg
= MI
.getOperand(0).getReg();
2293 Builder
.setInstr(MI
);
2294 Builder
.buildZExtOrTrunc(DstReg
, Reg
);
2295 MI
.eraseFromParent();
2298 bool CombinerHelper::matchCombineAddP2IToPtrAdd(
2299 MachineInstr
&MI
, std::pair
<Register
, bool> &PtrReg
) {
2300 assert(MI
.getOpcode() == TargetOpcode::G_ADD
);
2301 Register LHS
= MI
.getOperand(1).getReg();
2302 Register RHS
= MI
.getOperand(2).getReg();
2303 LLT IntTy
= MRI
.getType(LHS
);
2305 // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
2307 PtrReg
.second
= false;
2308 for (Register SrcReg
: {LHS
, RHS
}) {
2309 if (mi_match(SrcReg
, MRI
, m_GPtrToInt(m_Reg(PtrReg
.first
)))) {
2310 // Don't handle cases where the integer is implicitly converted to the
2312 LLT PtrTy
= MRI
.getType(PtrReg
.first
);
2313 if (PtrTy
.getScalarSizeInBits() == IntTy
.getScalarSizeInBits())
2317 PtrReg
.second
= true;
2323 void CombinerHelper::applyCombineAddP2IToPtrAdd(
2324 MachineInstr
&MI
, std::pair
<Register
, bool> &PtrReg
) {
2325 Register Dst
= MI
.getOperand(0).getReg();
2326 Register LHS
= MI
.getOperand(1).getReg();
2327 Register RHS
= MI
.getOperand(2).getReg();
2329 const bool DoCommute
= PtrReg
.second
;
2331 std::swap(LHS
, RHS
);
2334 LLT PtrTy
= MRI
.getType(LHS
);
2336 Builder
.setInstrAndDebugLoc(MI
);
2337 auto PtrAdd
= Builder
.buildPtrAdd(PtrTy
, LHS
, RHS
);
2338 Builder
.buildPtrToInt(Dst
, PtrAdd
);
2339 MI
.eraseFromParent();
2342 bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr
&MI
,
2344 auto &PtrAdd
= cast
<GPtrAdd
>(MI
);
2345 Register LHS
= PtrAdd
.getBaseReg();
2346 Register RHS
= PtrAdd
.getOffsetReg();
2347 MachineRegisterInfo
&MRI
= Builder
.getMF().getRegInfo();
2349 if (auto RHSCst
= getIConstantVRegVal(RHS
, MRI
)) {
2351 if (mi_match(LHS
, MRI
, m_GIntToPtr(m_ICst(Cst
)))) {
2352 auto DstTy
= MRI
.getType(PtrAdd
.getReg(0));
2353 // G_INTTOPTR uses zero-extension
2354 NewCst
= Cst
.zextOrTrunc(DstTy
.getSizeInBits());
2355 NewCst
+= RHSCst
->sextOrTrunc(DstTy
.getSizeInBits());
2363 void CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr
&MI
,
2365 auto &PtrAdd
= cast
<GPtrAdd
>(MI
);
2366 Register Dst
= PtrAdd
.getReg(0);
2368 Builder
.setInstrAndDebugLoc(MI
);
2369 Builder
.buildConstant(Dst
, NewCst
);
2370 PtrAdd
.eraseFromParent();
2373 bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr
&MI
, Register
&Reg
) {
2374 assert(MI
.getOpcode() == TargetOpcode::G_ANYEXT
&& "Expected a G_ANYEXT");
2375 Register DstReg
= MI
.getOperand(0).getReg();
2376 Register SrcReg
= MI
.getOperand(1).getReg();
2377 Register OriginalSrcReg
= getSrcRegIgnoringCopies(SrcReg
, MRI
);
2378 if (OriginalSrcReg
.isValid())
2379 SrcReg
= OriginalSrcReg
;
2380 LLT DstTy
= MRI
.getType(DstReg
);
2381 return mi_match(SrcReg
, MRI
,
2382 m_GTrunc(m_all_of(m_Reg(Reg
), m_SpecificType(DstTy
))));
2385 bool CombinerHelper::matchCombineZextTrunc(MachineInstr
&MI
, Register
&Reg
) {
2386 assert(MI
.getOpcode() == TargetOpcode::G_ZEXT
&& "Expected a G_ZEXT");
2387 Register DstReg
= MI
.getOperand(0).getReg();
2388 Register SrcReg
= MI
.getOperand(1).getReg();
2389 LLT DstTy
= MRI
.getType(DstReg
);
2390 if (mi_match(SrcReg
, MRI
,
2391 m_GTrunc(m_all_of(m_Reg(Reg
), m_SpecificType(DstTy
))))) {
2392 unsigned DstSize
= DstTy
.getScalarSizeInBits();
2393 unsigned SrcSize
= MRI
.getType(SrcReg
).getScalarSizeInBits();
2394 return KB
->getKnownBits(Reg
).countMinLeadingZeros() >= DstSize
- SrcSize
;
2399 bool CombinerHelper::matchCombineExtOfExt(
2400 MachineInstr
&MI
, std::tuple
<Register
, unsigned> &MatchInfo
) {
2401 assert((MI
.getOpcode() == TargetOpcode::G_ANYEXT
||
2402 MI
.getOpcode() == TargetOpcode::G_SEXT
||
2403 MI
.getOpcode() == TargetOpcode::G_ZEXT
) &&
2404 "Expected a G_[ASZ]EXT");
2405 Register SrcReg
= MI
.getOperand(1).getReg();
2406 Register OriginalSrcReg
= getSrcRegIgnoringCopies(SrcReg
, MRI
);
2407 if (OriginalSrcReg
.isValid())
2408 SrcReg
= OriginalSrcReg
;
2409 MachineInstr
*SrcMI
= MRI
.getVRegDef(SrcReg
);
2410 // Match exts with the same opcode, anyext([sz]ext) and sext(zext).
2411 unsigned Opc
= MI
.getOpcode();
2412 unsigned SrcOpc
= SrcMI
->getOpcode();
2413 if (Opc
== SrcOpc
||
2414 (Opc
== TargetOpcode::G_ANYEXT
&&
2415 (SrcOpc
== TargetOpcode::G_SEXT
|| SrcOpc
== TargetOpcode::G_ZEXT
)) ||
2416 (Opc
== TargetOpcode::G_SEXT
&& SrcOpc
== TargetOpcode::G_ZEXT
)) {
2417 MatchInfo
= std::make_tuple(SrcMI
->getOperand(1).getReg(), SrcOpc
);
2423 void CombinerHelper::applyCombineExtOfExt(
2424 MachineInstr
&MI
, std::tuple
<Register
, unsigned> &MatchInfo
) {
2425 assert((MI
.getOpcode() == TargetOpcode::G_ANYEXT
||
2426 MI
.getOpcode() == TargetOpcode::G_SEXT
||
2427 MI
.getOpcode() == TargetOpcode::G_ZEXT
) &&
2428 "Expected a G_[ASZ]EXT");
2430 Register Reg
= std::get
<0>(MatchInfo
);
2431 unsigned SrcExtOp
= std::get
<1>(MatchInfo
);
2433 // Combine exts with the same opcode.
2434 if (MI
.getOpcode() == SrcExtOp
) {
2435 Observer
.changingInstr(MI
);
2436 MI
.getOperand(1).setReg(Reg
);
2437 Observer
.changedInstr(MI
);
2442 // - anyext([sz]ext x) to [sz]ext x
2443 // - sext(zext x) to zext x
2444 if (MI
.getOpcode() == TargetOpcode::G_ANYEXT
||
2445 (MI
.getOpcode() == TargetOpcode::G_SEXT
&&
2446 SrcExtOp
== TargetOpcode::G_ZEXT
)) {
2447 Register DstReg
= MI
.getOperand(0).getReg();
2448 Builder
.setInstrAndDebugLoc(MI
);
2449 Builder
.buildInstr(SrcExtOp
, {DstReg
}, {Reg
});
2450 MI
.eraseFromParent();
2454 bool CombinerHelper::matchCombineTruncOfExt(
2455 MachineInstr
&MI
, std::pair
<Register
, unsigned> &MatchInfo
) {
2456 assert(MI
.getOpcode() == TargetOpcode::G_TRUNC
&& "Expected a G_TRUNC");
2457 Register SrcReg
= MI
.getOperand(1).getReg();
2458 MachineInstr
*SrcMI
= MRI
.getVRegDef(SrcReg
);
2459 unsigned SrcOpc
= SrcMI
->getOpcode();
2460 if (SrcOpc
== TargetOpcode::G_ANYEXT
|| SrcOpc
== TargetOpcode::G_SEXT
||
2461 SrcOpc
== TargetOpcode::G_ZEXT
) {
2462 MatchInfo
= std::make_pair(SrcMI
->getOperand(1).getReg(), SrcOpc
);
2468 void CombinerHelper::applyCombineTruncOfExt(
2469 MachineInstr
&MI
, std::pair
<Register
, unsigned> &MatchInfo
) {
2470 assert(MI
.getOpcode() == TargetOpcode::G_TRUNC
&& "Expected a G_TRUNC");
2471 Register SrcReg
= MatchInfo
.first
;
2472 unsigned SrcExtOp
= MatchInfo
.second
;
2473 Register DstReg
= MI
.getOperand(0).getReg();
2474 LLT SrcTy
= MRI
.getType(SrcReg
);
2475 LLT DstTy
= MRI
.getType(DstReg
);
2476 if (SrcTy
== DstTy
) {
2477 MI
.eraseFromParent();
2478 replaceRegWith(MRI
, DstReg
, SrcReg
);
2481 Builder
.setInstrAndDebugLoc(MI
);
2482 if (SrcTy
.getSizeInBits() < DstTy
.getSizeInBits())
2483 Builder
.buildInstr(SrcExtOp
, {DstReg
}, {SrcReg
});
2485 Builder
.buildTrunc(DstReg
, SrcReg
);
2486 MI
.eraseFromParent();
2489 static LLT
getMidVTForTruncRightShiftCombine(LLT ShiftTy
, LLT TruncTy
) {
2490 const unsigned ShiftSize
= ShiftTy
.getScalarSizeInBits();
2491 const unsigned TruncSize
= TruncTy
.getScalarSizeInBits();
2493 // ShiftTy > 32 > TruncTy -> 32
2494 if (ShiftSize
> 32 && TruncSize
< 32)
2495 return ShiftTy
.changeElementSize(32);
2497 // TODO: We could also reduce to 16 bits, but that's more target-dependent.
2498 // Some targets like it, some don't, some only like it under certain
2499 // conditions/processor versions, etc.
2500 // A TL hook might be needed for this.
2506 bool CombinerHelper::matchCombineTruncOfShift(
2507 MachineInstr
&MI
, std::pair
<MachineInstr
*, LLT
> &MatchInfo
) {
2508 assert(MI
.getOpcode() == TargetOpcode::G_TRUNC
&& "Expected a G_TRUNC");
2509 Register DstReg
= MI
.getOperand(0).getReg();
2510 Register SrcReg
= MI
.getOperand(1).getReg();
2512 if (!MRI
.hasOneNonDBGUse(SrcReg
))
2515 LLT SrcTy
= MRI
.getType(SrcReg
);
2516 LLT DstTy
= MRI
.getType(DstReg
);
2518 MachineInstr
*SrcMI
= getDefIgnoringCopies(SrcReg
, MRI
);
2519 const auto &TL
= getTargetLowering();
2522 switch (SrcMI
->getOpcode()) {
2525 case TargetOpcode::G_SHL
: {
2528 // Make sure new shift amount is legal.
2529 KnownBits Known
= KB
->getKnownBits(SrcMI
->getOperand(2).getReg());
2530 if (Known
.getMaxValue().uge(NewShiftTy
.getScalarSizeInBits()))
2534 case TargetOpcode::G_LSHR
:
2535 case TargetOpcode::G_ASHR
: {
2536 // For right shifts, we conservatively do not do the transform if the TRUNC
2537 // has any STORE users. The reason is that if we change the type of the
2538 // shift, we may break the truncstore combine.
2540 // TODO: Fix truncstore combine to handle (trunc(lshr (trunc x), k)).
2541 for (auto &User
: MRI
.use_instructions(DstReg
))
2542 if (User
.getOpcode() == TargetOpcode::G_STORE
)
2545 NewShiftTy
= getMidVTForTruncRightShiftCombine(SrcTy
, DstTy
);
2546 if (NewShiftTy
== SrcTy
)
2549 // Make sure we won't lose information by truncating the high bits.
2550 KnownBits Known
= KB
->getKnownBits(SrcMI
->getOperand(2).getReg());
2551 if (Known
.getMaxValue().ugt(NewShiftTy
.getScalarSizeInBits() -
2552 DstTy
.getScalarSizeInBits()))
2558 if (!isLegalOrBeforeLegalizer(
2559 {SrcMI
->getOpcode(),
2560 {NewShiftTy
, TL
.getPreferredShiftAmountTy(NewShiftTy
)}}))
2563 MatchInfo
= std::make_pair(SrcMI
, NewShiftTy
);
2567 void CombinerHelper::applyCombineTruncOfShift(
2568 MachineInstr
&MI
, std::pair
<MachineInstr
*, LLT
> &MatchInfo
) {
2569 Builder
.setInstrAndDebugLoc(MI
);
2571 MachineInstr
*ShiftMI
= MatchInfo
.first
;
2572 LLT NewShiftTy
= MatchInfo
.second
;
2574 Register Dst
= MI
.getOperand(0).getReg();
2575 LLT DstTy
= MRI
.getType(Dst
);
2577 Register ShiftAmt
= ShiftMI
->getOperand(2).getReg();
2578 Register ShiftSrc
= ShiftMI
->getOperand(1).getReg();
2579 ShiftSrc
= Builder
.buildTrunc(NewShiftTy
, ShiftSrc
).getReg(0);
2583 .buildInstr(ShiftMI
->getOpcode(), {NewShiftTy
}, {ShiftSrc
, ShiftAmt
})
2586 if (NewShiftTy
== DstTy
)
2587 replaceRegWith(MRI
, Dst
, NewShift
);
2589 Builder
.buildTrunc(Dst
, NewShift
);
2594 bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr
&MI
) {
2595 return any_of(MI
.explicit_uses(), [this](const MachineOperand
&MO
) {
2596 return MO
.isReg() &&
2597 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF
, MO
.getReg(), MRI
);
2601 bool CombinerHelper::matchAllExplicitUsesAreUndef(MachineInstr
&MI
) {
2602 return all_of(MI
.explicit_uses(), [this](const MachineOperand
&MO
) {
2603 return !MO
.isReg() ||
2604 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF
, MO
.getReg(), MRI
);
2608 bool CombinerHelper::matchUndefShuffleVectorMask(MachineInstr
&MI
) {
2609 assert(MI
.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR
);
2610 ArrayRef
<int> Mask
= MI
.getOperand(3).getShuffleMask();
2611 return all_of(Mask
, [](int Elt
) { return Elt
< 0; });
2614 bool CombinerHelper::matchUndefStore(MachineInstr
&MI
) {
2615 assert(MI
.getOpcode() == TargetOpcode::G_STORE
);
2616 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF
, MI
.getOperand(0).getReg(),
2620 bool CombinerHelper::matchUndefSelectCmp(MachineInstr
&MI
) {
2621 assert(MI
.getOpcode() == TargetOpcode::G_SELECT
);
2622 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF
, MI
.getOperand(1).getReg(),
2626 bool CombinerHelper::matchInsertExtractVecEltOutOfBounds(MachineInstr
&MI
) {
2627 assert((MI
.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT
||
2628 MI
.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT
) &&
2629 "Expected an insert/extract element op");
2630 LLT VecTy
= MRI
.getType(MI
.getOperand(1).getReg());
2632 MI
.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT
? 2 : 3;
2633 auto Idx
= getIConstantVRegVal(MI
.getOperand(IdxIdx
).getReg(), MRI
);
2636 return Idx
->getZExtValue() >= VecTy
.getNumElements();
2639 bool CombinerHelper::matchConstantSelectCmp(MachineInstr
&MI
, unsigned &OpIdx
) {
2640 GSelect
&SelMI
= cast
<GSelect
>(MI
);
2642 isConstantOrConstantSplatVector(*MRI
.getVRegDef(SelMI
.getCondReg()), MRI
);
2645 OpIdx
= Cst
->isZero() ? 3 : 2;
2649 void CombinerHelper::eraseInst(MachineInstr
&MI
) { MI
.eraseFromParent(); }
2651 bool CombinerHelper::matchEqualDefs(const MachineOperand
&MOP1
,
2652 const MachineOperand
&MOP2
) {
2653 if (!MOP1
.isReg() || !MOP2
.isReg())
2655 auto InstAndDef1
= getDefSrcRegIgnoringCopies(MOP1
.getReg(), MRI
);
2658 auto InstAndDef2
= getDefSrcRegIgnoringCopies(MOP2
.getReg(), MRI
);
2661 MachineInstr
*I1
= InstAndDef1
->MI
;
2662 MachineInstr
*I2
= InstAndDef2
->MI
;
2664 // Handle a case like this:
2666 // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
2668 // Even though %0 and %1 are produced by the same instruction they are not
2671 return MOP1
.getReg() == MOP2
.getReg();
2673 // If we have an instruction which loads or stores, we can't guarantee that
2676 // For example, we may have
2678 // %x1 = G_LOAD %addr (load N from @somewhere)
2682 // %x2 = G_LOAD %addr (load N from @somewhere)
2684 // %or = G_OR %x1, %x2
2686 // It's possible that @foo will modify whatever lives at the address we're
2687 // loading from. To be safe, let's just assume that all loads and stores
2688 // are different (unless we have something which is guaranteed to not
2690 if (I1
->mayLoadOrStore() && !I1
->isDereferenceableInvariantLoad())
2693 // If both instructions are loads or stores, they are equal only if both
2694 // are dereferenceable invariant loads with the same number of bits.
2695 if (I1
->mayLoadOrStore() && I2
->mayLoadOrStore()) {
2696 GLoadStore
*LS1
= dyn_cast
<GLoadStore
>(I1
);
2697 GLoadStore
*LS2
= dyn_cast
<GLoadStore
>(I2
);
2701 if (!I2
->isDereferenceableInvariantLoad() ||
2702 (LS1
->getMemSizeInBits() != LS2
->getMemSizeInBits()))
2706 // Check for physical registers on the instructions first to avoid cases
2709 // %a = COPY $physreg
2711 // SOMETHING implicit-def $physreg
2713 // %b = COPY $physreg
2715 // These copies are not equivalent.
2716 if (any_of(I1
->uses(), [](const MachineOperand
&MO
) {
2717 return MO
.isReg() && MO
.getReg().isPhysical();
2719 // Check if we have a case like this:
2721 // %a = COPY $physreg
2724 // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
2725 // From that, we know that they must have the same value, since they must
2726 // have come from the same COPY.
2727 return I1
->isIdenticalTo(*I2
);
2730 // We don't have any physical registers, so we don't necessarily need the
2733 // On the off-chance that there's some target instruction feeding into the
2734 // instruction, let's use produceSameValue instead of isIdenticalTo.
2735 if (Builder
.getTII().produceSameValue(*I1
, *I2
, &MRI
)) {
2736 // Handle instructions with multiple defs that produce same values. Values
2737 // are same for operands with same index.
2738 // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2739 // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2740 // I1 and I2 are different instructions but produce same values,
2741 // %1 and %6 are same, %1 and %7 are not the same value.
2742 return I1
->findRegisterDefOperandIdx(InstAndDef1
->Reg
) ==
2743 I2
->findRegisterDefOperandIdx(InstAndDef2
->Reg
);
2748 bool CombinerHelper::matchConstantOp(const MachineOperand
&MOP
, int64_t C
) {
2751 auto *MI
= MRI
.getVRegDef(MOP
.getReg());
2752 auto MaybeCst
= isConstantOrConstantSplatVector(*MI
, MRI
);
2753 return MaybeCst
&& MaybeCst
->getBitWidth() <= 64 &&
2754 MaybeCst
->getSExtValue() == C
;
2757 bool CombinerHelper::matchConstantFPOp(const MachineOperand
&MOP
, double C
) {
2760 std::optional
<FPValueAndVReg
> MaybeCst
;
2761 if (!mi_match(MOP
.getReg(), MRI
, m_GFCstOrSplat(MaybeCst
)))
2764 return MaybeCst
->Value
.isExactlyValue(C
);
2767 void CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr
&MI
,
2769 assert(MI
.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2770 Register OldReg
= MI
.getOperand(0).getReg();
2771 Register Replacement
= MI
.getOperand(OpIdx
).getReg();
2772 assert(canReplaceReg(OldReg
, Replacement
, MRI
) && "Cannot replace register?");
2773 MI
.eraseFromParent();
2774 replaceRegWith(MRI
, OldReg
, Replacement
);
2777 void CombinerHelper::replaceSingleDefInstWithReg(MachineInstr
&MI
,
2778 Register Replacement
) {
2779 assert(MI
.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2780 Register OldReg
= MI
.getOperand(0).getReg();
2781 assert(canReplaceReg(OldReg
, Replacement
, MRI
) && "Cannot replace register?");
2782 MI
.eraseFromParent();
2783 replaceRegWith(MRI
, OldReg
, Replacement
);
2786 bool CombinerHelper::matchConstantLargerBitWidth(MachineInstr
&MI
,
2787 unsigned ConstIdx
) {
2788 Register ConstReg
= MI
.getOperand(ConstIdx
).getReg();
2789 LLT DstTy
= MRI
.getType(MI
.getOperand(0).getReg());
2791 // Get the shift amount
2792 auto VRegAndVal
= getIConstantVRegValWithLookThrough(ConstReg
, MRI
);
2796 // Return true of shift amount >= Bitwidth
2797 return (VRegAndVal
->Value
.uge(DstTy
.getSizeInBits()));
2800 void CombinerHelper::applyFunnelShiftConstantModulo(MachineInstr
&MI
) {
2801 assert((MI
.getOpcode() == TargetOpcode::G_FSHL
||
2802 MI
.getOpcode() == TargetOpcode::G_FSHR
) &&
2803 "This is not a funnel shift operation");
2805 Register ConstReg
= MI
.getOperand(3).getReg();
2806 LLT ConstTy
= MRI
.getType(ConstReg
);
2807 LLT DstTy
= MRI
.getType(MI
.getOperand(0).getReg());
2809 auto VRegAndVal
= getIConstantVRegValWithLookThrough(ConstReg
, MRI
);
2810 assert((VRegAndVal
) && "Value is not a constant");
2812 // Calculate the new Shift Amount = Old Shift Amount % BitWidth
2813 APInt NewConst
= VRegAndVal
->Value
.urem(
2814 APInt(ConstTy
.getSizeInBits(), DstTy
.getScalarSizeInBits()));
2816 Builder
.setInstrAndDebugLoc(MI
);
2817 auto NewConstInstr
= Builder
.buildConstant(ConstTy
, NewConst
.getZExtValue());
2819 MI
.getOpcode(), {MI
.getOperand(0)},
2820 {MI
.getOperand(1), MI
.getOperand(2), NewConstInstr
.getReg(0)});
2822 MI
.eraseFromParent();
2825 bool CombinerHelper::matchSelectSameVal(MachineInstr
&MI
) {
2826 assert(MI
.getOpcode() == TargetOpcode::G_SELECT
);
2827 // Match (cond ? x : x)
2828 return matchEqualDefs(MI
.getOperand(2), MI
.getOperand(3)) &&
2829 canReplaceReg(MI
.getOperand(0).getReg(), MI
.getOperand(2).getReg(),
2833 bool CombinerHelper::matchBinOpSameVal(MachineInstr
&MI
) {
2834 return matchEqualDefs(MI
.getOperand(1), MI
.getOperand(2)) &&
2835 canReplaceReg(MI
.getOperand(0).getReg(), MI
.getOperand(1).getReg(),
2839 bool CombinerHelper::matchOperandIsZero(MachineInstr
&MI
, unsigned OpIdx
) {
2840 return matchConstantOp(MI
.getOperand(OpIdx
), 0) &&
2841 canReplaceReg(MI
.getOperand(0).getReg(), MI
.getOperand(OpIdx
).getReg(),
2845 bool CombinerHelper::matchOperandIsUndef(MachineInstr
&MI
, unsigned OpIdx
) {
2846 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
2847 return MO
.isReg() &&
2848 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF
, MO
.getReg(), MRI
);
2851 bool CombinerHelper::matchOperandIsKnownToBeAPowerOfTwo(MachineInstr
&MI
,
2853 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
2854 return isKnownToBeAPowerOfTwo(MO
.getReg(), MRI
, KB
);
2857 void CombinerHelper::replaceInstWithFConstant(MachineInstr
&MI
, double C
) {
2858 assert(MI
.getNumDefs() == 1 && "Expected only one def?");
2859 Builder
.setInstr(MI
);
2860 Builder
.buildFConstant(MI
.getOperand(0), C
);
2861 MI
.eraseFromParent();
2864 void CombinerHelper::replaceInstWithConstant(MachineInstr
&MI
, int64_t C
) {
2865 assert(MI
.getNumDefs() == 1 && "Expected only one def?");
2866 Builder
.setInstr(MI
);
2867 Builder
.buildConstant(MI
.getOperand(0), C
);
2868 MI
.eraseFromParent();
2871 void CombinerHelper::replaceInstWithConstant(MachineInstr
&MI
, APInt C
) {
2872 assert(MI
.getNumDefs() == 1 && "Expected only one def?");
2873 Builder
.setInstr(MI
);
2874 Builder
.buildConstant(MI
.getOperand(0), C
);
2875 MI
.eraseFromParent();
2878 void CombinerHelper::replaceInstWithFConstant(MachineInstr
&MI
, ConstantFP
*CFP
) {
2879 assert(MI
.getNumDefs() == 1 && "Expected only one def?");
2880 Builder
.setInstr(MI
);
2881 Builder
.buildFConstant(MI
.getOperand(0), CFP
->getValueAPF());
2882 MI
.eraseFromParent();
2885 void CombinerHelper::replaceInstWithUndef(MachineInstr
&MI
) {
2886 assert(MI
.getNumDefs() == 1 && "Expected only one def?");
2887 Builder
.setInstr(MI
);
2888 Builder
.buildUndef(MI
.getOperand(0));
2889 MI
.eraseFromParent();
2892 bool CombinerHelper::matchSimplifyAddToSub(
2893 MachineInstr
&MI
, std::tuple
<Register
, Register
> &MatchInfo
) {
2894 Register LHS
= MI
.getOperand(1).getReg();
2895 Register RHS
= MI
.getOperand(2).getReg();
2896 Register
&NewLHS
= std::get
<0>(MatchInfo
);
2897 Register
&NewRHS
= std::get
<1>(MatchInfo
);
2899 // Helper lambda to check for opportunities for
2900 // ((0-A) + B) -> B - A
2901 // (A + (0-B)) -> A - B
2902 auto CheckFold
= [&](Register
&MaybeSub
, Register
&MaybeNewLHS
) {
2903 if (!mi_match(MaybeSub
, MRI
, m_Neg(m_Reg(NewRHS
))))
2905 NewLHS
= MaybeNewLHS
;
2909 return CheckFold(LHS
, RHS
) || CheckFold(RHS
, LHS
);
2912 bool CombinerHelper::matchCombineInsertVecElts(
2913 MachineInstr
&MI
, SmallVectorImpl
<Register
> &MatchInfo
) {
2914 assert(MI
.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT
&&
2916 Register DstReg
= MI
.getOperand(0).getReg();
2917 LLT DstTy
= MRI
.getType(DstReg
);
2918 assert(DstTy
.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
2919 unsigned NumElts
= DstTy
.getNumElements();
2920 // If this MI is part of a sequence of insert_vec_elts, then
2921 // don't do the combine in the middle of the sequence.
2922 if (MRI
.hasOneUse(DstReg
) && MRI
.use_instr_begin(DstReg
)->getOpcode() ==
2923 TargetOpcode::G_INSERT_VECTOR_ELT
)
2925 MachineInstr
*CurrInst
= &MI
;
2926 MachineInstr
*TmpInst
;
2929 MatchInfo
.resize(NumElts
);
2931 CurrInst
->getOperand(0).getReg(), MRI
,
2932 m_GInsertVecElt(m_MInstr(TmpInst
), m_Reg(TmpReg
), m_ICst(IntImm
)))) {
2933 if (IntImm
>= NumElts
|| IntImm
< 0)
2935 if (!MatchInfo
[IntImm
])
2936 MatchInfo
[IntImm
] = TmpReg
;
2940 if (CurrInst
->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT
)
2942 if (TmpInst
->getOpcode() == TargetOpcode::G_BUILD_VECTOR
) {
2943 for (unsigned I
= 1; I
< TmpInst
->getNumOperands(); ++I
) {
2944 if (!MatchInfo
[I
- 1].isValid())
2945 MatchInfo
[I
- 1] = TmpInst
->getOperand(I
).getReg();
2949 // If we didn't end in a G_IMPLICIT_DEF, bail out.
2950 return TmpInst
->getOpcode() == TargetOpcode::G_IMPLICIT_DEF
;
2953 void CombinerHelper::applyCombineInsertVecElts(
2954 MachineInstr
&MI
, SmallVectorImpl
<Register
> &MatchInfo
) {
2955 Builder
.setInstr(MI
);
2957 auto GetUndef
= [&]() {
2960 LLT DstTy
= MRI
.getType(MI
.getOperand(0).getReg());
2961 UndefReg
= Builder
.buildUndef(DstTy
.getScalarType()).getReg(0);
2964 for (unsigned I
= 0; I
< MatchInfo
.size(); ++I
) {
2966 MatchInfo
[I
] = GetUndef();
2968 Builder
.buildBuildVector(MI
.getOperand(0).getReg(), MatchInfo
);
2969 MI
.eraseFromParent();
2972 void CombinerHelper::applySimplifyAddToSub(
2973 MachineInstr
&MI
, std::tuple
<Register
, Register
> &MatchInfo
) {
2974 Builder
.setInstr(MI
);
2975 Register SubLHS
, SubRHS
;
2976 std::tie(SubLHS
, SubRHS
) = MatchInfo
;
2977 Builder
.buildSub(MI
.getOperand(0).getReg(), SubLHS
, SubRHS
);
2978 MI
.eraseFromParent();
2981 bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
2982 MachineInstr
&MI
, InstructionStepsMatchInfo
&MatchInfo
) {
2983 // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
2985 // Creates the new hand + logic instruction (but does not insert them.)
2987 // On success, MatchInfo is populated with the new instructions. These are
2988 // inserted in applyHoistLogicOpWithSameOpcodeHands.
2989 unsigned LogicOpcode
= MI
.getOpcode();
2990 assert(LogicOpcode
== TargetOpcode::G_AND
||
2991 LogicOpcode
== TargetOpcode::G_OR
||
2992 LogicOpcode
== TargetOpcode::G_XOR
);
2993 MachineIRBuilder
MIB(MI
);
2994 Register Dst
= MI
.getOperand(0).getReg();
2995 Register LHSReg
= MI
.getOperand(1).getReg();
2996 Register RHSReg
= MI
.getOperand(2).getReg();
2998 // Don't recompute anything.
2999 if (!MRI
.hasOneNonDBGUse(LHSReg
) || !MRI
.hasOneNonDBGUse(RHSReg
))
3002 // Make sure we have (hand x, ...), (hand y, ...)
3003 MachineInstr
*LeftHandInst
= getDefIgnoringCopies(LHSReg
, MRI
);
3004 MachineInstr
*RightHandInst
= getDefIgnoringCopies(RHSReg
, MRI
);
3005 if (!LeftHandInst
|| !RightHandInst
)
3007 unsigned HandOpcode
= LeftHandInst
->getOpcode();
3008 if (HandOpcode
!= RightHandInst
->getOpcode())
3010 if (!LeftHandInst
->getOperand(1).isReg() ||
3011 !RightHandInst
->getOperand(1).isReg())
3014 // Make sure the types match up, and if we're doing this post-legalization,
3015 // we end up with legal types.
3016 Register X
= LeftHandInst
->getOperand(1).getReg();
3017 Register Y
= RightHandInst
->getOperand(1).getReg();
3018 LLT XTy
= MRI
.getType(X
);
3019 LLT YTy
= MRI
.getType(Y
);
3020 if (!XTy
.isValid() || XTy
!= YTy
)
3023 // Optional extra source register.
3024 Register ExtraHandOpSrcReg
;
3025 switch (HandOpcode
) {
3028 case TargetOpcode::G_ANYEXT
:
3029 case TargetOpcode::G_SEXT
:
3030 case TargetOpcode::G_ZEXT
: {
3031 // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
3034 case TargetOpcode::G_AND
:
3035 case TargetOpcode::G_ASHR
:
3036 case TargetOpcode::G_LSHR
:
3037 case TargetOpcode::G_SHL
: {
3038 // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
3039 MachineOperand
&ZOp
= LeftHandInst
->getOperand(2);
3040 if (!matchEqualDefs(ZOp
, RightHandInst
->getOperand(2)))
3042 ExtraHandOpSrcReg
= ZOp
.getReg();
3047 if (!isLegalOrBeforeLegalizer({LogicOpcode
, {XTy
, YTy
}}))
3050 // Record the steps to build the new instructions.
3052 // Steps to build (logic x, y)
3053 auto NewLogicDst
= MRI
.createGenericVirtualRegister(XTy
);
3054 OperandBuildSteps LogicBuildSteps
= {
3055 [=](MachineInstrBuilder
&MIB
) { MIB
.addDef(NewLogicDst
); },
3056 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(X
); },
3057 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(Y
); }};
3058 InstructionBuildSteps
LogicSteps(LogicOpcode
, LogicBuildSteps
);
3060 // Steps to build hand (logic x, y), ...z
3061 OperandBuildSteps HandBuildSteps
= {
3062 [=](MachineInstrBuilder
&MIB
) { MIB
.addDef(Dst
); },
3063 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(NewLogicDst
); }};
3064 if (ExtraHandOpSrcReg
.isValid())
3065 HandBuildSteps
.push_back(
3066 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(ExtraHandOpSrcReg
); });
3067 InstructionBuildSteps
HandSteps(HandOpcode
, HandBuildSteps
);
3069 MatchInfo
= InstructionStepsMatchInfo({LogicSteps
, HandSteps
});
3073 void CombinerHelper::applyBuildInstructionSteps(
3074 MachineInstr
&MI
, InstructionStepsMatchInfo
&MatchInfo
) {
3075 assert(MatchInfo
.InstrsToBuild
.size() &&
3076 "Expected at least one instr to build?");
3077 Builder
.setInstr(MI
);
3078 for (auto &InstrToBuild
: MatchInfo
.InstrsToBuild
) {
3079 assert(InstrToBuild
.Opcode
&& "Expected a valid opcode?");
3080 assert(InstrToBuild
.OperandFns
.size() && "Expected at least one operand?");
3081 MachineInstrBuilder Instr
= Builder
.buildInstr(InstrToBuild
.Opcode
);
3082 for (auto &OperandFn
: InstrToBuild
.OperandFns
)
3085 MI
.eraseFromParent();
3088 bool CombinerHelper::matchAshrShlToSextInreg(
3089 MachineInstr
&MI
, std::tuple
<Register
, int64_t> &MatchInfo
) {
3090 assert(MI
.getOpcode() == TargetOpcode::G_ASHR
);
3091 int64_t ShlCst
, AshrCst
;
3093 if (!mi_match(MI
.getOperand(0).getReg(), MRI
,
3094 m_GAShr(m_GShl(m_Reg(Src
), m_ICstOrSplat(ShlCst
)),
3095 m_ICstOrSplat(AshrCst
))))
3097 if (ShlCst
!= AshrCst
)
3099 if (!isLegalOrBeforeLegalizer(
3100 {TargetOpcode::G_SEXT_INREG
, {MRI
.getType(Src
)}}))
3102 MatchInfo
= std::make_tuple(Src
, ShlCst
);
3106 void CombinerHelper::applyAshShlToSextInreg(
3107 MachineInstr
&MI
, std::tuple
<Register
, int64_t> &MatchInfo
) {
3108 assert(MI
.getOpcode() == TargetOpcode::G_ASHR
);
3111 std::tie(Src
, ShiftAmt
) = MatchInfo
;
3112 unsigned Size
= MRI
.getType(Src
).getScalarSizeInBits();
3113 Builder
.setInstrAndDebugLoc(MI
);
3114 Builder
.buildSExtInReg(MI
.getOperand(0).getReg(), Src
, Size
- ShiftAmt
);
3115 MI
.eraseFromParent();
3118 /// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
3119 bool CombinerHelper::matchOverlappingAnd(
3120 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
3121 assert(MI
.getOpcode() == TargetOpcode::G_AND
);
3123 Register Dst
= MI
.getOperand(0).getReg();
3124 LLT Ty
= MRI
.getType(Dst
);
3131 m_GAnd(m_GAnd(m_Reg(R
), m_ICst(C1
)), m_ICst(C2
))))
3134 MatchInfo
= [=](MachineIRBuilder
&B
) {
3136 B
.buildAnd(Dst
, R
, B
.buildConstant(Ty
, C1
& C2
));
3139 auto Zero
= B
.buildConstant(Ty
, 0);
3140 replaceRegWith(MRI
, Dst
, Zero
->getOperand(0).getReg());
3145 bool CombinerHelper::matchRedundantAnd(MachineInstr
&MI
,
3146 Register
&Replacement
) {
3149 // %y:_(sN) = G_SOMETHING
3150 // %x:_(sN) = G_SOMETHING
3151 // %res:_(sN) = G_AND %x, %y
3153 // Eliminate the G_AND when it is known that x & y == x or x & y == y.
3155 // Patterns like this can appear as a result of legalization. E.g.
3157 // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
3158 // %one:_(s32) = G_CONSTANT i32 1
3159 // %and:_(s32) = G_AND %cmp, %one
3161 // In this case, G_ICMP only produces a single bit, so x & 1 == x.
3162 assert(MI
.getOpcode() == TargetOpcode::G_AND
);
3166 Register AndDst
= MI
.getOperand(0).getReg();
3167 Register LHS
= MI
.getOperand(1).getReg();
3168 Register RHS
= MI
.getOperand(2).getReg();
3169 KnownBits LHSBits
= KB
->getKnownBits(LHS
);
3170 KnownBits RHSBits
= KB
->getKnownBits(RHS
);
3172 // Check that x & Mask == x.
3173 // x & 1 == x, always
3174 // x & 0 == x, only if x is also 0
3175 // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
3177 // Check if we can replace AndDst with the LHS of the G_AND
3178 if (canReplaceReg(AndDst
, LHS
, MRI
) &&
3179 (LHSBits
.Zero
| RHSBits
.One
).isAllOnes()) {
3184 // Check if we can replace AndDst with the RHS of the G_AND
3185 if (canReplaceReg(AndDst
, RHS
, MRI
) &&
3186 (LHSBits
.One
| RHSBits
.Zero
).isAllOnes()) {
3194 bool CombinerHelper::matchRedundantOr(MachineInstr
&MI
, Register
&Replacement
) {
3197 // %y:_(sN) = G_SOMETHING
3198 // %x:_(sN) = G_SOMETHING
3199 // %res:_(sN) = G_OR %x, %y
3201 // Eliminate the G_OR when it is known that x | y == x or x | y == y.
3202 assert(MI
.getOpcode() == TargetOpcode::G_OR
);
3206 Register OrDst
= MI
.getOperand(0).getReg();
3207 Register LHS
= MI
.getOperand(1).getReg();
3208 Register RHS
= MI
.getOperand(2).getReg();
3209 KnownBits LHSBits
= KB
->getKnownBits(LHS
);
3210 KnownBits RHSBits
= KB
->getKnownBits(RHS
);
3212 // Check that x | Mask == x.
3213 // x | 0 == x, always
3214 // x | 1 == x, only if x is also 1
3215 // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
3217 // Check if we can replace OrDst with the LHS of the G_OR
3218 if (canReplaceReg(OrDst
, LHS
, MRI
) &&
3219 (LHSBits
.One
| RHSBits
.Zero
).isAllOnes()) {
3224 // Check if we can replace OrDst with the RHS of the G_OR
3225 if (canReplaceReg(OrDst
, RHS
, MRI
) &&
3226 (LHSBits
.Zero
| RHSBits
.One
).isAllOnes()) {
3234 bool CombinerHelper::matchRedundantSExtInReg(MachineInstr
&MI
) {
3235 // If the input is already sign extended, just drop the extension.
3236 Register Src
= MI
.getOperand(1).getReg();
3237 unsigned ExtBits
= MI
.getOperand(2).getImm();
3238 unsigned TypeSize
= MRI
.getType(Src
).getScalarSizeInBits();
3239 return KB
->computeNumSignBits(Src
) >= (TypeSize
- ExtBits
+ 1);
3242 static bool isConstValidTrue(const TargetLowering
&TLI
, unsigned ScalarSizeBits
,
3243 int64_t Cst
, bool IsVector
, bool IsFP
) {
3244 // For i1, Cst will always be -1 regardless of boolean contents.
3245 return (ScalarSizeBits
== 1 && Cst
== -1) ||
3246 isConstTrueVal(TLI
, Cst
, IsVector
, IsFP
);
3249 bool CombinerHelper::matchNotCmp(MachineInstr
&MI
,
3250 SmallVectorImpl
<Register
> &RegsToNegate
) {
3251 assert(MI
.getOpcode() == TargetOpcode::G_XOR
);
3252 LLT Ty
= MRI
.getType(MI
.getOperand(0).getReg());
3253 const auto &TLI
= *Builder
.getMF().getSubtarget().getTargetLowering();
3256 // We match xor(src, true) here.
3257 if (!mi_match(MI
.getOperand(0).getReg(), MRI
,
3258 m_GXor(m_Reg(XorSrc
), m_Reg(CstReg
))))
3261 if (!MRI
.hasOneNonDBGUse(XorSrc
))
3264 // Check that XorSrc is the root of a tree of comparisons combined with ANDs
3265 // and ORs. The suffix of RegsToNegate starting from index I is used a work
3266 // list of tree nodes to visit.
3267 RegsToNegate
.push_back(XorSrc
);
3268 // Remember whether the comparisons are all integer or all floating point.
3271 for (unsigned I
= 0; I
< RegsToNegate
.size(); ++I
) {
3272 Register Reg
= RegsToNegate
[I
];
3273 if (!MRI
.hasOneNonDBGUse(Reg
))
3275 MachineInstr
*Def
= MRI
.getVRegDef(Reg
);
3276 switch (Def
->getOpcode()) {
3278 // Don't match if the tree contains anything other than ANDs, ORs and
3281 case TargetOpcode::G_ICMP
:
3285 // When we apply the combine we will invert the predicate.
3287 case TargetOpcode::G_FCMP
:
3291 // When we apply the combine we will invert the predicate.
3293 case TargetOpcode::G_AND
:
3294 case TargetOpcode::G_OR
:
3295 // Implement De Morgan's laws:
3296 // ~(x & y) -> ~x | ~y
3297 // ~(x | y) -> ~x & ~y
3298 // When we apply the combine we will change the opcode and recursively
3299 // negate the operands.
3300 RegsToNegate
.push_back(Def
->getOperand(1).getReg());
3301 RegsToNegate
.push_back(Def
->getOperand(2).getReg());
3306 // Now we know whether the comparisons are integer or floating point, check
3307 // the constant in the xor.
3309 if (Ty
.isVector()) {
3310 MachineInstr
*CstDef
= MRI
.getVRegDef(CstReg
);
3311 auto MaybeCst
= getIConstantSplatSExtVal(*CstDef
, MRI
);
3314 if (!isConstValidTrue(TLI
, Ty
.getScalarSizeInBits(), *MaybeCst
, true, IsFP
))
3317 if (!mi_match(CstReg
, MRI
, m_ICst(Cst
)))
3319 if (!isConstValidTrue(TLI
, Ty
.getSizeInBits(), Cst
, false, IsFP
))
3326 void CombinerHelper::applyNotCmp(MachineInstr
&MI
,
3327 SmallVectorImpl
<Register
> &RegsToNegate
) {
3328 for (Register Reg
: RegsToNegate
) {
3329 MachineInstr
*Def
= MRI
.getVRegDef(Reg
);
3330 Observer
.changingInstr(*Def
);
3331 // For each comparison, invert the opcode. For each AND and OR, change the
3333 switch (Def
->getOpcode()) {
3335 llvm_unreachable("Unexpected opcode");
3336 case TargetOpcode::G_ICMP
:
3337 case TargetOpcode::G_FCMP
: {
3338 MachineOperand
&PredOp
= Def
->getOperand(1);
3339 CmpInst::Predicate NewP
= CmpInst::getInversePredicate(
3340 (CmpInst::Predicate
)PredOp
.getPredicate());
3341 PredOp
.setPredicate(NewP
);
3344 case TargetOpcode::G_AND
:
3345 Def
->setDesc(Builder
.getTII().get(TargetOpcode::G_OR
));
3347 case TargetOpcode::G_OR
:
3348 Def
->setDesc(Builder
.getTII().get(TargetOpcode::G_AND
));
3351 Observer
.changedInstr(*Def
);
3354 replaceRegWith(MRI
, MI
.getOperand(0).getReg(), MI
.getOperand(1).getReg());
3355 MI
.eraseFromParent();
3358 bool CombinerHelper::matchXorOfAndWithSameReg(
3359 MachineInstr
&MI
, std::pair
<Register
, Register
> &MatchInfo
) {
3360 // Match (xor (and x, y), y) (or any of its commuted cases)
3361 assert(MI
.getOpcode() == TargetOpcode::G_XOR
);
3362 Register
&X
= MatchInfo
.first
;
3363 Register
&Y
= MatchInfo
.second
;
3364 Register AndReg
= MI
.getOperand(1).getReg();
3365 Register SharedReg
= MI
.getOperand(2).getReg();
3367 // Find a G_AND on either side of the G_XOR.
3370 // (xor (and x, y), SharedReg)
3371 // (xor SharedReg, (and x, y))
3372 if (!mi_match(AndReg
, MRI
, m_GAnd(m_Reg(X
), m_Reg(Y
)))) {
3373 std::swap(AndReg
, SharedReg
);
3374 if (!mi_match(AndReg
, MRI
, m_GAnd(m_Reg(X
), m_Reg(Y
))))
3378 // Only do this if we'll eliminate the G_AND.
3379 if (!MRI
.hasOneNonDBGUse(AndReg
))
3382 // We can combine if SharedReg is the same as either the LHS or RHS of the
3386 return Y
== SharedReg
;
3389 void CombinerHelper::applyXorOfAndWithSameReg(
3390 MachineInstr
&MI
, std::pair
<Register
, Register
> &MatchInfo
) {
3391 // Fold (xor (and x, y), y) -> (and (not x), y)
3392 Builder
.setInstrAndDebugLoc(MI
);
3394 std::tie(X
, Y
) = MatchInfo
;
3395 auto Not
= Builder
.buildNot(MRI
.getType(X
), X
);
3396 Observer
.changingInstr(MI
);
3397 MI
.setDesc(Builder
.getTII().get(TargetOpcode::G_AND
));
3398 MI
.getOperand(1).setReg(Not
->getOperand(0).getReg());
3399 MI
.getOperand(2).setReg(Y
);
3400 Observer
.changedInstr(MI
);
3403 bool CombinerHelper::matchPtrAddZero(MachineInstr
&MI
) {
3404 auto &PtrAdd
= cast
<GPtrAdd
>(MI
);
3405 Register DstReg
= PtrAdd
.getReg(0);
3406 LLT Ty
= MRI
.getType(DstReg
);
3407 const DataLayout
&DL
= Builder
.getMF().getDataLayout();
3409 if (DL
.isNonIntegralAddressSpace(Ty
.getScalarType().getAddressSpace()))
3412 if (Ty
.isPointer()) {
3413 auto ConstVal
= getIConstantVRegVal(PtrAdd
.getBaseReg(), MRI
);
3414 return ConstVal
&& *ConstVal
== 0;
3417 assert(Ty
.isVector() && "Expecting a vector type");
3418 const MachineInstr
*VecMI
= MRI
.getVRegDef(PtrAdd
.getBaseReg());
3419 return isBuildVectorAllZeros(*VecMI
, MRI
);
3422 void CombinerHelper::applyPtrAddZero(MachineInstr
&MI
) {
3423 auto &PtrAdd
= cast
<GPtrAdd
>(MI
);
3424 Builder
.setInstrAndDebugLoc(PtrAdd
);
3425 Builder
.buildIntToPtr(PtrAdd
.getReg(0), PtrAdd
.getOffsetReg());
3426 PtrAdd
.eraseFromParent();
3429 /// The second source operand is known to be a power of 2.
3430 void CombinerHelper::applySimplifyURemByPow2(MachineInstr
&MI
) {
3431 Register DstReg
= MI
.getOperand(0).getReg();
3432 Register Src0
= MI
.getOperand(1).getReg();
3433 Register Pow2Src1
= MI
.getOperand(2).getReg();
3434 LLT Ty
= MRI
.getType(DstReg
);
3435 Builder
.setInstrAndDebugLoc(MI
);
3437 // Fold (urem x, pow2) -> (and x, pow2-1)
3438 auto NegOne
= Builder
.buildConstant(Ty
, -1);
3439 auto Add
= Builder
.buildAdd(Ty
, Pow2Src1
, NegOne
);
3440 Builder
.buildAnd(DstReg
, Src0
, Add
);
3441 MI
.eraseFromParent();
3444 bool CombinerHelper::matchFoldBinOpIntoSelect(MachineInstr
&MI
,
3445 unsigned &SelectOpNo
) {
3446 Register LHS
= MI
.getOperand(1).getReg();
3447 Register RHS
= MI
.getOperand(2).getReg();
3449 Register OtherOperandReg
= RHS
;
3451 MachineInstr
*Select
= MRI
.getVRegDef(LHS
);
3453 // Don't do this unless the old select is going away. We want to eliminate the
3454 // binary operator, not replace a binop with a select.
3455 if (Select
->getOpcode() != TargetOpcode::G_SELECT
||
3456 !MRI
.hasOneNonDBGUse(LHS
)) {
3457 OtherOperandReg
= LHS
;
3459 Select
= MRI
.getVRegDef(RHS
);
3460 if (Select
->getOpcode() != TargetOpcode::G_SELECT
||
3461 !MRI
.hasOneNonDBGUse(RHS
))
3465 MachineInstr
*SelectLHS
= MRI
.getVRegDef(Select
->getOperand(2).getReg());
3466 MachineInstr
*SelectRHS
= MRI
.getVRegDef(Select
->getOperand(3).getReg());
3468 if (!isConstantOrConstantVector(*SelectLHS
, MRI
,
3470 /*AllowOpaqueConstants*/ false))
3472 if (!isConstantOrConstantVector(*SelectRHS
, MRI
,
3474 /*AllowOpaqueConstants*/ false))
3477 unsigned BinOpcode
= MI
.getOpcode();
3479 // We know that one of the operands is a select of constants. Now verify that
3480 // the other binary operator operand is either a constant, or we can handle a
3482 bool CanFoldNonConst
=
3483 (BinOpcode
== TargetOpcode::G_AND
|| BinOpcode
== TargetOpcode::G_OR
) &&
3484 (isNullOrNullSplat(*SelectLHS
, MRI
) ||
3485 isAllOnesOrAllOnesSplat(*SelectLHS
, MRI
)) &&
3486 (isNullOrNullSplat(*SelectRHS
, MRI
) ||
3487 isAllOnesOrAllOnesSplat(*SelectRHS
, MRI
));
3488 if (CanFoldNonConst
)
3491 return isConstantOrConstantVector(*MRI
.getVRegDef(OtherOperandReg
), MRI
,
3493 /*AllowOpaqueConstants*/ false);
3496 /// \p SelectOperand is the operand in binary operator \p MI that is the select
3498 void CombinerHelper::applyFoldBinOpIntoSelect(MachineInstr
&MI
,
3499 const unsigned &SelectOperand
) {
3500 Builder
.setInstrAndDebugLoc(MI
);
3502 Register Dst
= MI
.getOperand(0).getReg();
3503 Register LHS
= MI
.getOperand(1).getReg();
3504 Register RHS
= MI
.getOperand(2).getReg();
3505 MachineInstr
*Select
= MRI
.getVRegDef(MI
.getOperand(SelectOperand
).getReg());
3507 Register SelectCond
= Select
->getOperand(1).getReg();
3508 Register SelectTrue
= Select
->getOperand(2).getReg();
3509 Register SelectFalse
= Select
->getOperand(3).getReg();
3511 LLT Ty
= MRI
.getType(Dst
);
3512 unsigned BinOpcode
= MI
.getOpcode();
3514 Register FoldTrue
, FoldFalse
;
3516 // We have a select-of-constants followed by a binary operator with a
3517 // constant. Eliminate the binop by pulling the constant math into the select.
3518 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
3519 if (SelectOperand
== 1) {
3520 // TODO: SelectionDAG verifies this actually constant folds before
3521 // committing to the combine.
3523 FoldTrue
= Builder
.buildInstr(BinOpcode
, {Ty
}, {SelectTrue
, RHS
}).getReg(0);
3525 Builder
.buildInstr(BinOpcode
, {Ty
}, {SelectFalse
, RHS
}).getReg(0);
3527 FoldTrue
= Builder
.buildInstr(BinOpcode
, {Ty
}, {LHS
, SelectTrue
}).getReg(0);
3529 Builder
.buildInstr(BinOpcode
, {Ty
}, {LHS
, SelectFalse
}).getReg(0);
3532 Builder
.buildSelect(Dst
, SelectCond
, FoldTrue
, FoldFalse
, MI
.getFlags());
3533 MI
.eraseFromParent();
3536 std::optional
<SmallVector
<Register
, 8>>
3537 CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr
*Root
) const {
3538 assert(Root
->getOpcode() == TargetOpcode::G_OR
&& "Expected G_OR only!");
3539 // We want to detect if Root is part of a tree which represents a bunch
3540 // of loads being merged into a larger load. We'll try to recognize patterns
3541 // like, for example:
3560 // Each "Reg" may have been produced by a load + some arithmetic. This
3561 // function will save each of them.
3562 SmallVector
<Register
, 8> RegsToVisit
;
3563 SmallVector
<const MachineInstr
*, 7> Ors
= {Root
};
3565 // In the "worst" case, we're dealing with a load for each byte. So, there
3566 // are at most #bytes - 1 ORs.
3567 const unsigned MaxIter
=
3568 MRI
.getType(Root
->getOperand(0).getReg()).getSizeInBytes() - 1;
3569 for (unsigned Iter
= 0; Iter
< MaxIter
; ++Iter
) {
3572 const MachineInstr
*Curr
= Ors
.pop_back_val();
3573 Register OrLHS
= Curr
->getOperand(1).getReg();
3574 Register OrRHS
= Curr
->getOperand(2).getReg();
3576 // In the combine, we want to elimate the entire tree.
3577 if (!MRI
.hasOneNonDBGUse(OrLHS
) || !MRI
.hasOneNonDBGUse(OrRHS
))
3578 return std::nullopt
;
3580 // If it's a G_OR, save it and continue to walk. If it's not, then it's
3581 // something that may be a load + arithmetic.
3582 if (const MachineInstr
*Or
= getOpcodeDef(TargetOpcode::G_OR
, OrLHS
, MRI
))
3585 RegsToVisit
.push_back(OrLHS
);
3586 if (const MachineInstr
*Or
= getOpcodeDef(TargetOpcode::G_OR
, OrRHS
, MRI
))
3589 RegsToVisit
.push_back(OrRHS
);
3592 // We're going to try and merge each register into a wider power-of-2 type,
3593 // so we ought to have an even number of registers.
3594 if (RegsToVisit
.empty() || RegsToVisit
.size() % 2 != 0)
3595 return std::nullopt
;
3599 /// Helper function for findLoadOffsetsForLoadOrCombine.
3601 /// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
3602 /// and then moving that value into a specific byte offset.
3606 /// \returns The load instruction and the byte offset it is moved into.
3607 static std::optional
<std::pair
<GZExtLoad
*, int64_t>>
3608 matchLoadAndBytePosition(Register Reg
, unsigned MemSizeInBits
,
3609 const MachineRegisterInfo
&MRI
) {
3610 assert(MRI
.hasOneNonDBGUse(Reg
) &&
3611 "Expected Reg to only have one non-debug use?");
3614 if (!mi_match(Reg
, MRI
,
3615 m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad
), m_ICst(Shift
))))) {
3620 if (Shift
% MemSizeInBits
!= 0)
3621 return std::nullopt
;
3623 // TODO: Handle other types of loads.
3624 auto *Load
= getOpcodeDef
<GZExtLoad
>(MaybeLoad
, MRI
);
3626 return std::nullopt
;
3628 if (!Load
->isUnordered() || Load
->getMemSizeInBits() != MemSizeInBits
)
3629 return std::nullopt
;
3631 return std::make_pair(Load
, Shift
/ MemSizeInBits
);
3634 std::optional
<std::tuple
<GZExtLoad
*, int64_t, GZExtLoad
*>>
3635 CombinerHelper::findLoadOffsetsForLoadOrCombine(
3636 SmallDenseMap
<int64_t, int64_t, 8> &MemOffset2Idx
,
3637 const SmallVector
<Register
, 8> &RegsToVisit
, const unsigned MemSizeInBits
) {
3639 // Each load found for the pattern. There should be one for each RegsToVisit.
3640 SmallSetVector
<const MachineInstr
*, 8> Loads
;
3642 // The lowest index used in any load. (The lowest "i" for each x[i].)
3643 int64_t LowestIdx
= INT64_MAX
;
3645 // The load which uses the lowest index.
3646 GZExtLoad
*LowestIdxLoad
= nullptr;
3648 // Keeps track of the load indices we see. We shouldn't see any indices twice.
3649 SmallSet
<int64_t, 8> SeenIdx
;
3651 // Ensure each load is in the same MBB.
3652 // TODO: Support multiple MachineBasicBlocks.
3653 MachineBasicBlock
*MBB
= nullptr;
3654 const MachineMemOperand
*MMO
= nullptr;
3656 // Earliest instruction-order load in the pattern.
3657 GZExtLoad
*EarliestLoad
= nullptr;
3659 // Latest instruction-order load in the pattern.
3660 GZExtLoad
*LatestLoad
= nullptr;
3662 // Base pointer which every load should share.
3665 // We want to find a load for each register. Each load should have some
3666 // appropriate bit twiddling arithmetic. During this loop, we will also keep
3667 // track of the load which uses the lowest index. Later, we will check if we
3668 // can use its pointer in the final, combined load.
3669 for (auto Reg
: RegsToVisit
) {
3670 // Find the load, and find the position that it will end up in (e.g. a
3672 auto LoadAndPos
= matchLoadAndBytePosition(Reg
, MemSizeInBits
, MRI
);
3674 return std::nullopt
;
3677 std::tie(Load
, DstPos
) = *LoadAndPos
;
3679 // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
3680 // it is difficult to check for stores/calls/etc between loads.
3681 MachineBasicBlock
*LoadMBB
= Load
->getParent();
3685 return std::nullopt
;
3687 // Make sure that the MachineMemOperands of every seen load are compatible.
3688 auto &LoadMMO
= Load
->getMMO();
3691 if (MMO
->getAddrSpace() != LoadMMO
.getAddrSpace())
3692 return std::nullopt
;
3694 // Find out what the base pointer and index for the load is.
3697 if (!mi_match(Load
->getOperand(1).getReg(), MRI
,
3698 m_GPtrAdd(m_Reg(LoadPtr
), m_ICst(Idx
)))) {
3699 LoadPtr
= Load
->getOperand(1).getReg();
3703 // Don't combine things like a[i], a[i] -> a bigger load.
3704 if (!SeenIdx
.insert(Idx
).second
)
3705 return std::nullopt
;
3707 // Every load must share the same base pointer; don't combine things like:
3709 // a[i], b[i + 1] -> a bigger load.
3710 if (!BasePtr
.isValid())
3712 if (BasePtr
!= LoadPtr
)
3713 return std::nullopt
;
3715 if (Idx
< LowestIdx
) {
3717 LowestIdxLoad
= Load
;
3720 // Keep track of the byte offset that this load ends up at. If we have seen
3721 // the byte offset, then stop here. We do not want to combine:
3723 // a[i] << 16, a[i + k] << 16 -> a bigger load.
3724 if (!MemOffset2Idx
.try_emplace(DstPos
, Idx
).second
)
3725 return std::nullopt
;
3728 // Keep track of the position of the earliest/latest loads in the pattern.
3729 // We will check that there are no load fold barriers between them later
3732 // FIXME: Is there a better way to check for load fold barriers?
3733 if (!EarliestLoad
|| dominates(*Load
, *EarliestLoad
))
3734 EarliestLoad
= Load
;
3735 if (!LatestLoad
|| dominates(*LatestLoad
, *Load
))
3739 // We found a load for each register. Let's check if each load satisfies the
3741 assert(Loads
.size() == RegsToVisit
.size() &&
3742 "Expected to find a load for each register?");
3743 assert(EarliestLoad
!= LatestLoad
&& EarliestLoad
&&
3744 LatestLoad
&& "Expected at least two loads?");
3746 // Check if there are any stores, calls, etc. between any of the loads. If
3747 // there are, then we can't safely perform the combine.
3749 // MaxIter is chosen based off the (worst case) number of iterations it
3750 // typically takes to succeed in the LLVM test suite plus some padding.
3752 // FIXME: Is there a better way to check for load fold barriers?
3753 const unsigned MaxIter
= 20;
3755 for (const auto &MI
: instructionsWithoutDebug(EarliestLoad
->getIterator(),
3756 LatestLoad
->getIterator())) {
3757 if (Loads
.count(&MI
))
3759 if (MI
.isLoadFoldBarrier())
3760 return std::nullopt
;
3761 if (Iter
++ == MaxIter
)
3762 return std::nullopt
;
3765 return std::make_tuple(LowestIdxLoad
, LowestIdx
, LatestLoad
);
3768 bool CombinerHelper::matchLoadOrCombine(
3769 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
3770 assert(MI
.getOpcode() == TargetOpcode::G_OR
);
3771 MachineFunction
&MF
= *MI
.getMF();
3772 // Assuming a little-endian target, transform:
3774 // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
3776 // s32 val = *((i32)a)
3779 // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
3781 // s32 val = BSWAP(*((s32)a))
3782 Register Dst
= MI
.getOperand(0).getReg();
3783 LLT Ty
= MRI
.getType(Dst
);
3787 // We need to combine at least two loads into this type. Since the smallest
3788 // possible load is into a byte, we need at least a 16-bit wide type.
3789 const unsigned WideMemSizeInBits
= Ty
.getSizeInBits();
3790 if (WideMemSizeInBits
< 16 || WideMemSizeInBits
% 8 != 0)
3793 // Match a collection of non-OR instructions in the pattern.
3794 auto RegsToVisit
= findCandidatesForLoadOrCombine(&MI
);
3798 // We have a collection of non-OR instructions. Figure out how wide each of
3799 // the small loads should be based off of the number of potential loads we
3801 const unsigned NarrowMemSizeInBits
= WideMemSizeInBits
/ RegsToVisit
->size();
3802 if (NarrowMemSizeInBits
% 8 != 0)
3805 // Check if each register feeding into each OR is a load from the same
3806 // base pointer + some arithmetic.
3808 // e.g. a[0], a[1] << 8, a[2] << 16, etc.
3810 // Also verify that each of these ends up putting a[i] into the same memory
3811 // offset as a load into a wide type would.
3812 SmallDenseMap
<int64_t, int64_t, 8> MemOffset2Idx
;
3813 GZExtLoad
*LowestIdxLoad
, *LatestLoad
;
3815 auto MaybeLoadInfo
= findLoadOffsetsForLoadOrCombine(
3816 MemOffset2Idx
, *RegsToVisit
, NarrowMemSizeInBits
);
3819 std::tie(LowestIdxLoad
, LowestIdx
, LatestLoad
) = *MaybeLoadInfo
;
3821 // We have a bunch of loads being OR'd together. Using the addresses + offsets
3822 // we found before, check if this corresponds to a big or little endian byte
3823 // pattern. If it does, then we can represent it using a load + possibly a
3825 bool IsBigEndianTarget
= MF
.getDataLayout().isBigEndian();
3826 std::optional
<bool> IsBigEndian
= isBigEndian(MemOffset2Idx
, LowestIdx
);
3829 bool NeedsBSwap
= IsBigEndianTarget
!= *IsBigEndian
;
3830 if (NeedsBSwap
&& !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP
, {Ty
}}))
3833 // Make sure that the load from the lowest index produces offset 0 in the
3836 // This ensures that we won't combine something like this:
3838 // load x[i] -> byte 2
3839 // load x[i+1] -> byte 0 ---> wide_load x[i]
3840 // load x[i+2] -> byte 1
3841 const unsigned NumLoadsInTy
= WideMemSizeInBits
/ NarrowMemSizeInBits
;
3842 const unsigned ZeroByteOffset
=
3844 ? bigEndianByteAt(NumLoadsInTy
, 0)
3845 : littleEndianByteAt(NumLoadsInTy
, 0);
3846 auto ZeroOffsetIdx
= MemOffset2Idx
.find(ZeroByteOffset
);
3847 if (ZeroOffsetIdx
== MemOffset2Idx
.end() ||
3848 ZeroOffsetIdx
->second
!= LowestIdx
)
3851 // We wil reuse the pointer from the load which ends up at byte offset 0. It
3852 // may not use index 0.
3853 Register Ptr
= LowestIdxLoad
->getPointerReg();
3854 const MachineMemOperand
&MMO
= LowestIdxLoad
->getMMO();
3855 LegalityQuery::MemDesc
MMDesc(MMO
);
3856 MMDesc
.MemoryTy
= Ty
;
3857 if (!isLegalOrBeforeLegalizer(
3858 {TargetOpcode::G_LOAD
, {Ty
, MRI
.getType(Ptr
)}, {MMDesc
}}))
3860 auto PtrInfo
= MMO
.getPointerInfo();
3861 auto *NewMMO
= MF
.getMachineMemOperand(&MMO
, PtrInfo
, WideMemSizeInBits
/ 8);
3863 // Load must be allowed and fast on the target.
3864 LLVMContext
&C
= MF
.getFunction().getContext();
3865 auto &DL
= MF
.getDataLayout();
3867 if (!getTargetLowering().allowsMemoryAccess(C
, DL
, Ty
, *NewMMO
, &Fast
) ||
3871 MatchInfo
= [=](MachineIRBuilder
&MIB
) {
3872 MIB
.setInstrAndDebugLoc(*LatestLoad
);
3873 Register LoadDst
= NeedsBSwap
? MRI
.cloneVirtualRegister(Dst
) : Dst
;
3874 MIB
.buildLoad(LoadDst
, Ptr
, *NewMMO
);
3876 MIB
.buildBSwap(Dst
, LoadDst
);
3881 bool CombinerHelper::matchExtendThroughPhis(MachineInstr
&MI
,
3882 MachineInstr
*&ExtMI
) {
3883 auto &PHI
= cast
<GPhi
>(MI
);
3884 Register DstReg
= PHI
.getReg(0);
3886 // TODO: Extending a vector may be expensive, don't do this until heuristics
3888 if (MRI
.getType(DstReg
).isVector())
3891 // Try to match a phi, whose only use is an extend.
3892 if (!MRI
.hasOneNonDBGUse(DstReg
))
3894 ExtMI
= &*MRI
.use_instr_nodbg_begin(DstReg
);
3895 switch (ExtMI
->getOpcode()) {
3896 case TargetOpcode::G_ANYEXT
:
3897 return true; // G_ANYEXT is usually free.
3898 case TargetOpcode::G_ZEXT
:
3899 case TargetOpcode::G_SEXT
:
3905 // If the target is likely to fold this extend away, don't propagate.
3906 if (Builder
.getTII().isExtendLikelyToBeFolded(*ExtMI
, MRI
))
3909 // We don't want to propagate the extends unless there's a good chance that
3910 // they'll be optimized in some way.
3911 // Collect the unique incoming values.
3912 SmallPtrSet
<MachineInstr
*, 4> InSrcs
;
3913 for (unsigned I
= 0; I
< PHI
.getNumIncomingValues(); ++I
) {
3914 auto *DefMI
= getDefIgnoringCopies(PHI
.getIncomingValue(I
), MRI
);
3915 switch (DefMI
->getOpcode()) {
3916 case TargetOpcode::G_LOAD
:
3917 case TargetOpcode::G_TRUNC
:
3918 case TargetOpcode::G_SEXT
:
3919 case TargetOpcode::G_ZEXT
:
3920 case TargetOpcode::G_ANYEXT
:
3921 case TargetOpcode::G_CONSTANT
:
3922 InSrcs
.insert(DefMI
);
3923 // Don't try to propagate if there are too many places to create new
3924 // extends, chances are it'll increase code size.
3925 if (InSrcs
.size() > 2)
3935 void CombinerHelper::applyExtendThroughPhis(MachineInstr
&MI
,
3936 MachineInstr
*&ExtMI
) {
3937 auto &PHI
= cast
<GPhi
>(MI
);
3938 Register DstReg
= ExtMI
->getOperand(0).getReg();
3939 LLT ExtTy
= MRI
.getType(DstReg
);
3941 // Propagate the extension into the block of each incoming reg's block.
3942 // Use a SetVector here because PHIs can have duplicate edges, and we want
3943 // deterministic iteration order.
3944 SmallSetVector
<MachineInstr
*, 8> SrcMIs
;
3945 SmallDenseMap
<MachineInstr
*, MachineInstr
*, 8> OldToNewSrcMap
;
3946 for (unsigned I
= 0; I
< PHI
.getNumIncomingValues(); ++I
) {
3947 auto SrcReg
= PHI
.getIncomingValue(I
);
3948 auto *SrcMI
= MRI
.getVRegDef(SrcReg
);
3949 if (!SrcMIs
.insert(SrcMI
))
3952 // Build an extend after each src inst.
3953 auto *MBB
= SrcMI
->getParent();
3954 MachineBasicBlock::iterator InsertPt
= ++SrcMI
->getIterator();
3955 if (InsertPt
!= MBB
->end() && InsertPt
->isPHI())
3956 InsertPt
= MBB
->getFirstNonPHI();
3958 Builder
.setInsertPt(*SrcMI
->getParent(), InsertPt
);
3959 Builder
.setDebugLoc(MI
.getDebugLoc());
3960 auto NewExt
= Builder
.buildExtOrTrunc(ExtMI
->getOpcode(), ExtTy
, SrcReg
);
3961 OldToNewSrcMap
[SrcMI
] = NewExt
;
3964 // Create a new phi with the extended inputs.
3965 Builder
.setInstrAndDebugLoc(MI
);
3966 auto NewPhi
= Builder
.buildInstrNoInsert(TargetOpcode::G_PHI
);
3967 NewPhi
.addDef(DstReg
);
3968 for (const MachineOperand
&MO
: llvm::drop_begin(MI
.operands())) {
3970 NewPhi
.addMBB(MO
.getMBB());
3973 auto *NewSrc
= OldToNewSrcMap
[MRI
.getVRegDef(MO
.getReg())];
3974 NewPhi
.addUse(NewSrc
->getOperand(0).getReg());
3976 Builder
.insertInstr(NewPhi
);
3977 ExtMI
->eraseFromParent();
3980 bool CombinerHelper::matchExtractVecEltBuildVec(MachineInstr
&MI
,
3982 assert(MI
.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT
);
3983 // If we have a constant index, look for a G_BUILD_VECTOR source
3984 // and find the source register that the index maps to.
3985 Register SrcVec
= MI
.getOperand(1).getReg();
3986 LLT SrcTy
= MRI
.getType(SrcVec
);
3988 auto Cst
= getIConstantVRegValWithLookThrough(MI
.getOperand(2).getReg(), MRI
);
3989 if (!Cst
|| Cst
->Value
.getZExtValue() >= SrcTy
.getNumElements())
3992 unsigned VecIdx
= Cst
->Value
.getZExtValue();
3994 // Check if we have a build_vector or build_vector_trunc with an optional
3996 MachineInstr
*SrcVecMI
= MRI
.getVRegDef(SrcVec
);
3997 if (SrcVecMI
->getOpcode() == TargetOpcode::G_TRUNC
) {
3998 SrcVecMI
= MRI
.getVRegDef(SrcVecMI
->getOperand(1).getReg());
4001 if (SrcVecMI
->getOpcode() != TargetOpcode::G_BUILD_VECTOR
&&
4002 SrcVecMI
->getOpcode() != TargetOpcode::G_BUILD_VECTOR_TRUNC
)
4005 EVT
Ty(getMVTForLLT(SrcTy
));
4006 if (!MRI
.hasOneNonDBGUse(SrcVec
) &&
4007 !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty
))
4010 Reg
= SrcVecMI
->getOperand(VecIdx
+ 1).getReg();
4014 void CombinerHelper::applyExtractVecEltBuildVec(MachineInstr
&MI
,
4016 // Check the type of the register, since it may have come from a
4017 // G_BUILD_VECTOR_TRUNC.
4018 LLT ScalarTy
= MRI
.getType(Reg
);
4019 Register DstReg
= MI
.getOperand(0).getReg();
4020 LLT DstTy
= MRI
.getType(DstReg
);
4022 Builder
.setInstrAndDebugLoc(MI
);
4023 if (ScalarTy
!= DstTy
) {
4024 assert(ScalarTy
.getSizeInBits() > DstTy
.getSizeInBits());
4025 Builder
.buildTrunc(DstReg
, Reg
);
4026 MI
.eraseFromParent();
4029 replaceSingleDefInstWithReg(MI
, Reg
);
4032 bool CombinerHelper::matchExtractAllEltsFromBuildVector(
4034 SmallVectorImpl
<std::pair
<Register
, MachineInstr
*>> &SrcDstPairs
) {
4035 assert(MI
.getOpcode() == TargetOpcode::G_BUILD_VECTOR
);
4036 // This combine tries to find build_vector's which have every source element
4037 // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
4038 // the masked load scalarization is run late in the pipeline. There's already
4039 // a combine for a similar pattern starting from the extract, but that
4040 // doesn't attempt to do it if there are multiple uses of the build_vector,
4041 // which in this case is true. Starting the combine from the build_vector
4042 // feels more natural than trying to find sibling nodes of extracts.
4044 // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
4045 // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
4046 // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
4047 // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
4048 // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
4050 // replace ext{1,2,3,4} with %s{1,2,3,4}
4052 Register DstReg
= MI
.getOperand(0).getReg();
4053 LLT DstTy
= MRI
.getType(DstReg
);
4054 unsigned NumElts
= DstTy
.getNumElements();
4056 SmallBitVector
ExtractedElts(NumElts
);
4057 for (MachineInstr
&II
: MRI
.use_nodbg_instructions(DstReg
)) {
4058 if (II
.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT
)
4060 auto Cst
= getIConstantVRegVal(II
.getOperand(2).getReg(), MRI
);
4063 unsigned Idx
= Cst
->getZExtValue();
4065 return false; // Out of range.
4066 ExtractedElts
.set(Idx
);
4067 SrcDstPairs
.emplace_back(
4068 std::make_pair(MI
.getOperand(Idx
+ 1).getReg(), &II
));
4070 // Match if every element was extracted.
4071 return ExtractedElts
.all();
4074 void CombinerHelper::applyExtractAllEltsFromBuildVector(
4076 SmallVectorImpl
<std::pair
<Register
, MachineInstr
*>> &SrcDstPairs
) {
4077 assert(MI
.getOpcode() == TargetOpcode::G_BUILD_VECTOR
);
4078 for (auto &Pair
: SrcDstPairs
) {
4079 auto *ExtMI
= Pair
.second
;
4080 replaceRegWith(MRI
, ExtMI
->getOperand(0).getReg(), Pair
.first
);
4081 ExtMI
->eraseFromParent();
4083 MI
.eraseFromParent();
4086 void CombinerHelper::applyBuildFn(
4087 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
4088 Builder
.setInstrAndDebugLoc(MI
);
4090 MI
.eraseFromParent();
4093 void CombinerHelper::applyBuildFnNoErase(
4094 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
4095 Builder
.setInstrAndDebugLoc(MI
);
4099 bool CombinerHelper::matchOrShiftToFunnelShift(MachineInstr
&MI
,
4100 BuildFnTy
&MatchInfo
) {
4101 assert(MI
.getOpcode() == TargetOpcode::G_OR
);
4103 Register Dst
= MI
.getOperand(0).getReg();
4104 LLT Ty
= MRI
.getType(Dst
);
4105 unsigned BitWidth
= Ty
.getScalarSizeInBits();
4107 Register ShlSrc
, ShlAmt
, LShrSrc
, LShrAmt
, Amt
;
4108 unsigned FshOpc
= 0;
4110 // Match (or (shl ...), (lshr ...)).
4111 if (!mi_match(Dst
, MRI
,
4112 // m_GOr() handles the commuted version as well.
4113 m_GOr(m_GShl(m_Reg(ShlSrc
), m_Reg(ShlAmt
)),
4114 m_GLShr(m_Reg(LShrSrc
), m_Reg(LShrAmt
)))))
4117 // Given constants C0 and C1 such that C0 + C1 is bit-width:
4118 // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1)
4119 int64_t CstShlAmt
, CstLShrAmt
;
4120 if (mi_match(ShlAmt
, MRI
, m_ICstOrSplat(CstShlAmt
)) &&
4121 mi_match(LShrAmt
, MRI
, m_ICstOrSplat(CstLShrAmt
)) &&
4122 CstShlAmt
+ CstLShrAmt
== BitWidth
) {
4123 FshOpc
= TargetOpcode::G_FSHR
;
4126 } else if (mi_match(LShrAmt
, MRI
,
4127 m_GSub(m_SpecificICstOrSplat(BitWidth
), m_Reg(Amt
))) &&
4129 // (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt)
4130 FshOpc
= TargetOpcode::G_FSHL
;
4132 } else if (mi_match(ShlAmt
, MRI
,
4133 m_GSub(m_SpecificICstOrSplat(BitWidth
), m_Reg(Amt
))) &&
4135 // (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt)
4136 FshOpc
= TargetOpcode::G_FSHR
;
4142 LLT AmtTy
= MRI
.getType(Amt
);
4143 if (!isLegalOrBeforeLegalizer({FshOpc
, {Ty
, AmtTy
}}))
4146 MatchInfo
= [=](MachineIRBuilder
&B
) {
4147 B
.buildInstr(FshOpc
, {Dst
}, {ShlSrc
, LShrSrc
, Amt
});
4152 /// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
4153 bool CombinerHelper::matchFunnelShiftToRotate(MachineInstr
&MI
) {
4154 unsigned Opc
= MI
.getOpcode();
4155 assert(Opc
== TargetOpcode::G_FSHL
|| Opc
== TargetOpcode::G_FSHR
);
4156 Register X
= MI
.getOperand(1).getReg();
4157 Register Y
= MI
.getOperand(2).getReg();
4160 unsigned RotateOpc
=
4161 Opc
== TargetOpcode::G_FSHL
? TargetOpcode::G_ROTL
: TargetOpcode::G_ROTR
;
4162 return isLegalOrBeforeLegalizer({RotateOpc
, {MRI
.getType(X
), MRI
.getType(Y
)}});
4165 void CombinerHelper::applyFunnelShiftToRotate(MachineInstr
&MI
) {
4166 unsigned Opc
= MI
.getOpcode();
4167 assert(Opc
== TargetOpcode::G_FSHL
|| Opc
== TargetOpcode::G_FSHR
);
4168 bool IsFSHL
= Opc
== TargetOpcode::G_FSHL
;
4169 Observer
.changingInstr(MI
);
4170 MI
.setDesc(Builder
.getTII().get(IsFSHL
? TargetOpcode::G_ROTL
4171 : TargetOpcode::G_ROTR
));
4172 MI
.removeOperand(2);
4173 Observer
.changedInstr(MI
);
4176 // Fold (rot x, c) -> (rot x, c % BitSize)
4177 bool CombinerHelper::matchRotateOutOfRange(MachineInstr
&MI
) {
4178 assert(MI
.getOpcode() == TargetOpcode::G_ROTL
||
4179 MI
.getOpcode() == TargetOpcode::G_ROTR
);
4181 MRI
.getType(MI
.getOperand(0).getReg()).getScalarSizeInBits();
4182 Register AmtReg
= MI
.getOperand(2).getReg();
4183 bool OutOfRange
= false;
4184 auto MatchOutOfRange
= [Bitsize
, &OutOfRange
](const Constant
*C
) {
4185 if (auto *CI
= dyn_cast
<ConstantInt
>(C
))
4186 OutOfRange
|= CI
->getValue().uge(Bitsize
);
4189 return matchUnaryPredicate(MRI
, AmtReg
, MatchOutOfRange
) && OutOfRange
;
4192 void CombinerHelper::applyRotateOutOfRange(MachineInstr
&MI
) {
4193 assert(MI
.getOpcode() == TargetOpcode::G_ROTL
||
4194 MI
.getOpcode() == TargetOpcode::G_ROTR
);
4196 MRI
.getType(MI
.getOperand(0).getReg()).getScalarSizeInBits();
4197 Builder
.setInstrAndDebugLoc(MI
);
4198 Register Amt
= MI
.getOperand(2).getReg();
4199 LLT AmtTy
= MRI
.getType(Amt
);
4200 auto Bits
= Builder
.buildConstant(AmtTy
, Bitsize
);
4201 Amt
= Builder
.buildURem(AmtTy
, MI
.getOperand(2).getReg(), Bits
).getReg(0);
4202 Observer
.changingInstr(MI
);
4203 MI
.getOperand(2).setReg(Amt
);
4204 Observer
.changedInstr(MI
);
4207 bool CombinerHelper::matchICmpToTrueFalseKnownBits(MachineInstr
&MI
,
4208 int64_t &MatchInfo
) {
4209 assert(MI
.getOpcode() == TargetOpcode::G_ICMP
);
4210 auto Pred
= static_cast<CmpInst::Predicate
>(MI
.getOperand(1).getPredicate());
4211 auto KnownLHS
= KB
->getKnownBits(MI
.getOperand(2).getReg());
4212 auto KnownRHS
= KB
->getKnownBits(MI
.getOperand(3).getReg());
4213 std::optional
<bool> KnownVal
;
4216 llvm_unreachable("Unexpected G_ICMP predicate?");
4217 case CmpInst::ICMP_EQ
:
4218 KnownVal
= KnownBits::eq(KnownLHS
, KnownRHS
);
4220 case CmpInst::ICMP_NE
:
4221 KnownVal
= KnownBits::ne(KnownLHS
, KnownRHS
);
4223 case CmpInst::ICMP_SGE
:
4224 KnownVal
= KnownBits::sge(KnownLHS
, KnownRHS
);
4226 case CmpInst::ICMP_SGT
:
4227 KnownVal
= KnownBits::sgt(KnownLHS
, KnownRHS
);
4229 case CmpInst::ICMP_SLE
:
4230 KnownVal
= KnownBits::sle(KnownLHS
, KnownRHS
);
4232 case CmpInst::ICMP_SLT
:
4233 KnownVal
= KnownBits::slt(KnownLHS
, KnownRHS
);
4235 case CmpInst::ICMP_UGE
:
4236 KnownVal
= KnownBits::uge(KnownLHS
, KnownRHS
);
4238 case CmpInst::ICMP_UGT
:
4239 KnownVal
= KnownBits::ugt(KnownLHS
, KnownRHS
);
4241 case CmpInst::ICMP_ULE
:
4242 KnownVal
= KnownBits::ule(KnownLHS
, KnownRHS
);
4244 case CmpInst::ICMP_ULT
:
4245 KnownVal
= KnownBits::ult(KnownLHS
, KnownRHS
);
4252 ? getICmpTrueVal(getTargetLowering(),
4254 MRI
.getType(MI
.getOperand(0).getReg()).isVector(),
4260 bool CombinerHelper::matchICmpToLHSKnownBits(
4261 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
4262 assert(MI
.getOpcode() == TargetOpcode::G_ICMP
);
4265 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4266 // %cmp = G_ICMP ne %x, 0
4270 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4271 // %cmp = G_ICMP eq %x, 1
4273 // We can replace %cmp with %x assuming true is 1 on the target.
4274 auto Pred
= static_cast<CmpInst::Predicate
>(MI
.getOperand(1).getPredicate());
4275 if (!CmpInst::isEquality(Pred
))
4277 Register Dst
= MI
.getOperand(0).getReg();
4278 LLT DstTy
= MRI
.getType(Dst
);
4279 if (getICmpTrueVal(getTargetLowering(), DstTy
.isVector(),
4280 /* IsFP = */ false) != 1)
4282 int64_t OneOrZero
= Pred
== CmpInst::ICMP_EQ
;
4283 if (!mi_match(MI
.getOperand(3).getReg(), MRI
, m_SpecificICst(OneOrZero
)))
4285 Register LHS
= MI
.getOperand(2).getReg();
4286 auto KnownLHS
= KB
->getKnownBits(LHS
);
4287 if (KnownLHS
.getMinValue() != 0 || KnownLHS
.getMaxValue() != 1)
4289 // Make sure replacing Dst with the LHS is a legal operation.
4290 LLT LHSTy
= MRI
.getType(LHS
);
4291 unsigned LHSSize
= LHSTy
.getSizeInBits();
4292 unsigned DstSize
= DstTy
.getSizeInBits();
4293 unsigned Op
= TargetOpcode::COPY
;
4294 if (DstSize
!= LHSSize
)
4295 Op
= DstSize
< LHSSize
? TargetOpcode::G_TRUNC
: TargetOpcode::G_ZEXT
;
4296 if (!isLegalOrBeforeLegalizer({Op
, {DstTy
, LHSTy
}}))
4298 MatchInfo
= [=](MachineIRBuilder
&B
) { B
.buildInstr(Op
, {Dst
}, {LHS
}); };
4302 // Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0
4303 bool CombinerHelper::matchAndOrDisjointMask(
4304 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
4305 assert(MI
.getOpcode() == TargetOpcode::G_AND
);
4307 // Ignore vector types to simplify matching the two constants.
4308 // TODO: do this for vectors and scalars via a demanded bits analysis.
4309 LLT Ty
= MRI
.getType(MI
.getOperand(0).getReg());
4314 Register AndMaskReg
;
4315 int64_t AndMaskBits
;
4317 if (!mi_match(MI
, MRI
,
4318 m_GAnd(m_GOr(m_Reg(Src
), m_ICst(OrMaskBits
)),
4319 m_all_of(m_ICst(AndMaskBits
), m_Reg(AndMaskReg
)))))
4322 // Check if OrMask could turn on any bits in Src.
4323 if (AndMaskBits
& OrMaskBits
)
4326 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
4327 Observer
.changingInstr(MI
);
4328 // Canonicalize the result to have the constant on the RHS.
4329 if (MI
.getOperand(1).getReg() == AndMaskReg
)
4330 MI
.getOperand(2).setReg(AndMaskReg
);
4331 MI
.getOperand(1).setReg(Src
);
4332 Observer
.changedInstr(MI
);
4337 /// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
4338 bool CombinerHelper::matchBitfieldExtractFromSExtInReg(
4339 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
4340 assert(MI
.getOpcode() == TargetOpcode::G_SEXT_INREG
);
4341 Register Dst
= MI
.getOperand(0).getReg();
4342 Register Src
= MI
.getOperand(1).getReg();
4343 LLT Ty
= MRI
.getType(Src
);
4344 LLT ExtractTy
= getTargetLowering().getPreferredShiftAmountTy(Ty
);
4345 if (!LI
|| !LI
->isLegalOrCustom({TargetOpcode::G_SBFX
, {Ty
, ExtractTy
}}))
4347 int64_t Width
= MI
.getOperand(2).getImm();
4352 m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc
), m_ICst(ShiftImm
)),
4353 m_GLShr(m_Reg(ShiftSrc
), m_ICst(ShiftImm
))))))
4355 if (ShiftImm
< 0 || ShiftImm
+ Width
> Ty
.getScalarSizeInBits())
4358 MatchInfo
= [=](MachineIRBuilder
&B
) {
4359 auto Cst1
= B
.buildConstant(ExtractTy
, ShiftImm
);
4360 auto Cst2
= B
.buildConstant(ExtractTy
, Width
);
4361 B
.buildSbfx(Dst
, ShiftSrc
, Cst1
, Cst2
);
4366 /// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants.
4367 bool CombinerHelper::matchBitfieldExtractFromAnd(
4368 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
4369 assert(MI
.getOpcode() == TargetOpcode::G_AND
);
4370 Register Dst
= MI
.getOperand(0).getReg();
4371 LLT Ty
= MRI
.getType(Dst
);
4372 LLT ExtractTy
= getTargetLowering().getPreferredShiftAmountTy(Ty
);
4373 if (LI
&& !LI
->isLegalOrCustom({TargetOpcode::G_UBFX
, {Ty
, ExtractTy
}}))
4376 int64_t AndImm
, LSBImm
;
4378 const unsigned Size
= Ty
.getScalarSizeInBits();
4379 if (!mi_match(MI
.getOperand(0).getReg(), MRI
,
4380 m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc
), m_ICst(LSBImm
))),
4384 // The mask is a mask of the low bits iff imm & (imm+1) == 0.
4385 auto MaybeMask
= static_cast<uint64_t>(AndImm
);
4386 if (MaybeMask
& (MaybeMask
+ 1))
4389 // LSB must fit within the register.
4390 if (static_cast<uint64_t>(LSBImm
) >= Size
)
4393 uint64_t Width
= APInt(Size
, AndImm
).countr_one();
4394 MatchInfo
= [=](MachineIRBuilder
&B
) {
4395 auto WidthCst
= B
.buildConstant(ExtractTy
, Width
);
4396 auto LSBCst
= B
.buildConstant(ExtractTy
, LSBImm
);
4397 B
.buildInstr(TargetOpcode::G_UBFX
, {Dst
}, {ShiftSrc
, LSBCst
, WidthCst
});
4402 bool CombinerHelper::matchBitfieldExtractFromShr(
4403 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
4404 const unsigned Opcode
= MI
.getOpcode();
4405 assert(Opcode
== TargetOpcode::G_ASHR
|| Opcode
== TargetOpcode::G_LSHR
);
4407 const Register Dst
= MI
.getOperand(0).getReg();
4409 const unsigned ExtrOpcode
= Opcode
== TargetOpcode::G_ASHR
4410 ? TargetOpcode::G_SBFX
4411 : TargetOpcode::G_UBFX
;
4413 // Check if the type we would use for the extract is legal
4414 LLT Ty
= MRI
.getType(Dst
);
4415 LLT ExtractTy
= getTargetLowering().getPreferredShiftAmountTy(Ty
);
4416 if (!LI
|| !LI
->isLegalOrCustom({ExtrOpcode
, {Ty
, ExtractTy
}}))
4422 const unsigned Size
= Ty
.getScalarSizeInBits();
4424 // Try to match shr (shl x, c1), c2
4425 if (!mi_match(Dst
, MRI
,
4427 m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc
), m_ICst(ShlAmt
))),
4431 // Make sure that the shift sizes can fit a bitfield extract
4432 if (ShlAmt
< 0 || ShlAmt
> ShrAmt
|| ShrAmt
>= Size
)
4435 // Skip this combine if the G_SEXT_INREG combine could handle it
4436 if (Opcode
== TargetOpcode::G_ASHR
&& ShlAmt
== ShrAmt
)
4439 // Calculate start position and width of the extract
4440 const int64_t Pos
= ShrAmt
- ShlAmt
;
4441 const int64_t Width
= Size
- ShrAmt
;
4443 MatchInfo
= [=](MachineIRBuilder
&B
) {
4444 auto WidthCst
= B
.buildConstant(ExtractTy
, Width
);
4445 auto PosCst
= B
.buildConstant(ExtractTy
, Pos
);
4446 B
.buildInstr(ExtrOpcode
, {Dst
}, {ShlSrc
, PosCst
, WidthCst
});
4451 bool CombinerHelper::matchBitfieldExtractFromShrAnd(
4452 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
4453 const unsigned Opcode
= MI
.getOpcode();
4454 assert(Opcode
== TargetOpcode::G_LSHR
|| Opcode
== TargetOpcode::G_ASHR
);
4456 const Register Dst
= MI
.getOperand(0).getReg();
4457 LLT Ty
= MRI
.getType(Dst
);
4458 LLT ExtractTy
= getTargetLowering().getPreferredShiftAmountTy(Ty
);
4459 if (LI
&& !LI
->isLegalOrCustom({TargetOpcode::G_UBFX
, {Ty
, ExtractTy
}}))
4462 // Try to match shr (and x, c1), c2
4466 if (!mi_match(Dst
, MRI
,
4468 m_OneNonDBGUse(m_GAnd(m_Reg(AndSrc
), m_ICst(SMask
))),
4472 const unsigned Size
= Ty
.getScalarSizeInBits();
4473 if (ShrAmt
< 0 || ShrAmt
>= Size
)
4476 // If the shift subsumes the mask, emit the 0 directly.
4477 if (0 == (SMask
>> ShrAmt
)) {
4478 MatchInfo
= [=](MachineIRBuilder
&B
) {
4479 B
.buildConstant(Dst
, 0);
4484 // Check that ubfx can do the extraction, with no holes in the mask.
4485 uint64_t UMask
= SMask
;
4486 UMask
|= maskTrailingOnes
<uint64_t>(ShrAmt
);
4487 UMask
&= maskTrailingOnes
<uint64_t>(Size
);
4488 if (!isMask_64(UMask
))
4491 // Calculate start position and width of the extract.
4492 const int64_t Pos
= ShrAmt
;
4493 const int64_t Width
= llvm::countr_one(UMask
) - ShrAmt
;
4495 // It's preferable to keep the shift, rather than form G_SBFX.
4496 // TODO: remove the G_AND via demanded bits analysis.
4497 if (Opcode
== TargetOpcode::G_ASHR
&& Width
+ ShrAmt
== Size
)
4500 MatchInfo
= [=](MachineIRBuilder
&B
) {
4501 auto WidthCst
= B
.buildConstant(ExtractTy
, Width
);
4502 auto PosCst
= B
.buildConstant(ExtractTy
, Pos
);
4503 B
.buildInstr(TargetOpcode::G_UBFX
, {Dst
}, {AndSrc
, PosCst
, WidthCst
});
4508 bool CombinerHelper::reassociationCanBreakAddressingModePattern(
4510 auto &PtrAdd
= cast
<GPtrAdd
>(MI
);
4512 Register Src1Reg
= PtrAdd
.getBaseReg();
4513 auto *Src1Def
= getOpcodeDef
<GPtrAdd
>(Src1Reg
, MRI
);
4517 Register Src2Reg
= PtrAdd
.getOffsetReg();
4519 if (MRI
.hasOneNonDBGUse(Src1Reg
))
4522 auto C1
= getIConstantVRegVal(Src1Def
->getOffsetReg(), MRI
);
4525 auto C2
= getIConstantVRegVal(Src2Reg
, MRI
);
4529 const APInt
&C1APIntVal
= *C1
;
4530 const APInt
&C2APIntVal
= *C2
;
4531 const int64_t CombinedValue
= (C1APIntVal
+ C2APIntVal
).getSExtValue();
4533 for (auto &UseMI
: MRI
.use_nodbg_instructions(PtrAdd
.getReg(0))) {
4534 // This combine may end up running before ptrtoint/inttoptr combines
4535 // manage to eliminate redundant conversions, so try to look through them.
4536 MachineInstr
*ConvUseMI
= &UseMI
;
4537 unsigned ConvUseOpc
= ConvUseMI
->getOpcode();
4538 while (ConvUseOpc
== TargetOpcode::G_INTTOPTR
||
4539 ConvUseOpc
== TargetOpcode::G_PTRTOINT
) {
4540 Register DefReg
= ConvUseMI
->getOperand(0).getReg();
4541 if (!MRI
.hasOneNonDBGUse(DefReg
))
4543 ConvUseMI
= &*MRI
.use_instr_nodbg_begin(DefReg
);
4544 ConvUseOpc
= ConvUseMI
->getOpcode();
4546 auto *LdStMI
= dyn_cast
<GLoadStore
>(ConvUseMI
);
4549 // Is x[offset2] already not a legal addressing mode? If so then
4550 // reassociating the constants breaks nothing (we test offset2 because
4551 // that's the one we hope to fold into the load or store).
4552 TargetLoweringBase::AddrMode AM
;
4553 AM
.HasBaseReg
= true;
4554 AM
.BaseOffs
= C2APIntVal
.getSExtValue();
4555 unsigned AS
= MRI
.getType(LdStMI
->getPointerReg()).getAddressSpace();
4556 Type
*AccessTy
= getTypeForLLT(LdStMI
->getMMO().getMemoryType(),
4557 PtrAdd
.getMF()->getFunction().getContext());
4558 const auto &TLI
= *PtrAdd
.getMF()->getSubtarget().getTargetLowering();
4559 if (!TLI
.isLegalAddressingMode(PtrAdd
.getMF()->getDataLayout(), AM
,
4563 // Would x[offset1+offset2] still be a legal addressing mode?
4564 AM
.BaseOffs
= CombinedValue
;
4565 if (!TLI
.isLegalAddressingMode(PtrAdd
.getMF()->getDataLayout(), AM
,
4573 bool CombinerHelper::matchReassocConstantInnerRHS(GPtrAdd
&MI
,
4575 BuildFnTy
&MatchInfo
) {
4576 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
4577 Register Src1Reg
= MI
.getOperand(1).getReg();
4578 if (RHS
->getOpcode() != TargetOpcode::G_ADD
)
4580 auto C2
= getIConstantVRegVal(RHS
->getOperand(2).getReg(), MRI
);
4584 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
4585 LLT PtrTy
= MRI
.getType(MI
.getOperand(0).getReg());
4588 Builder
.buildPtrAdd(PtrTy
, Src1Reg
, RHS
->getOperand(1).getReg());
4589 Observer
.changingInstr(MI
);
4590 MI
.getOperand(1).setReg(NewBase
.getReg(0));
4591 MI
.getOperand(2).setReg(RHS
->getOperand(2).getReg());
4592 Observer
.changedInstr(MI
);
4594 return !reassociationCanBreakAddressingModePattern(MI
);
4597 bool CombinerHelper::matchReassocConstantInnerLHS(GPtrAdd
&MI
,
4600 BuildFnTy
&MatchInfo
) {
4601 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C)
4602 // if and only if (G_PTR_ADD X, C) has one use.
4604 std::optional
<ValueAndVReg
> LHSCstOff
;
4605 if (!mi_match(MI
.getBaseReg(), MRI
,
4606 m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase
), m_GCst(LHSCstOff
)))))
4609 auto *LHSPtrAdd
= cast
<GPtrAdd
>(LHS
);
4610 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
4611 // When we change LHSPtrAdd's offset register we might cause it to use a reg
4612 // before its def. Sink the instruction so the outer PTR_ADD to ensure this
4614 LHSPtrAdd
->moveBefore(&MI
);
4615 Register RHSReg
= MI
.getOffsetReg();
4616 // set VReg will cause type mismatch if it comes from extend/trunc
4617 auto NewCst
= B
.buildConstant(MRI
.getType(RHSReg
), LHSCstOff
->Value
);
4618 Observer
.changingInstr(MI
);
4619 MI
.getOperand(2).setReg(NewCst
.getReg(0));
4620 Observer
.changedInstr(MI
);
4621 Observer
.changingInstr(*LHSPtrAdd
);
4622 LHSPtrAdd
->getOperand(2).setReg(RHSReg
);
4623 Observer
.changedInstr(*LHSPtrAdd
);
4625 return !reassociationCanBreakAddressingModePattern(MI
);
4628 bool CombinerHelper::matchReassocFoldConstantsInSubTree(GPtrAdd
&MI
,
4631 BuildFnTy
&MatchInfo
) {
4632 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
4633 auto *LHSPtrAdd
= dyn_cast
<GPtrAdd
>(LHS
);
4637 Register Src2Reg
= MI
.getOperand(2).getReg();
4638 Register LHSSrc1
= LHSPtrAdd
->getBaseReg();
4639 Register LHSSrc2
= LHSPtrAdd
->getOffsetReg();
4640 auto C1
= getIConstantVRegVal(LHSSrc2
, MRI
);
4643 auto C2
= getIConstantVRegVal(Src2Reg
, MRI
);
4647 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
4648 auto NewCst
= B
.buildConstant(MRI
.getType(Src2Reg
), *C1
+ *C2
);
4649 Observer
.changingInstr(MI
);
4650 MI
.getOperand(1).setReg(LHSSrc1
);
4651 MI
.getOperand(2).setReg(NewCst
.getReg(0));
4652 Observer
.changedInstr(MI
);
4654 return !reassociationCanBreakAddressingModePattern(MI
);
4657 bool CombinerHelper::matchReassocPtrAdd(MachineInstr
&MI
,
4658 BuildFnTy
&MatchInfo
) {
4659 auto &PtrAdd
= cast
<GPtrAdd
>(MI
);
4660 // We're trying to match a few pointer computation patterns here for
4661 // re-association opportunities.
4662 // 1) Isolating a constant operand to be on the RHS, e.g.:
4663 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
4665 // 2) Folding two constants in each sub-tree as long as such folding
4666 // doesn't break a legal addressing mode.
4667 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
4669 // 3) Move a constant from the LHS of an inner op to the RHS of the outer.
4670 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C)
4671 // iif (G_PTR_ADD X, C) has one use.
4672 MachineInstr
*LHS
= MRI
.getVRegDef(PtrAdd
.getBaseReg());
4673 MachineInstr
*RHS
= MRI
.getVRegDef(PtrAdd
.getOffsetReg());
4675 // Try to match example 2.
4676 if (matchReassocFoldConstantsInSubTree(PtrAdd
, LHS
, RHS
, MatchInfo
))
4679 // Try to match example 3.
4680 if (matchReassocConstantInnerLHS(PtrAdd
, LHS
, RHS
, MatchInfo
))
4683 // Try to match example 1.
4684 if (matchReassocConstantInnerRHS(PtrAdd
, RHS
, MatchInfo
))
4689 bool CombinerHelper::tryReassocBinOp(unsigned Opc
, Register DstReg
,
4690 Register OpLHS
, Register OpRHS
,
4691 BuildFnTy
&MatchInfo
) {
4692 LLT OpRHSTy
= MRI
.getType(OpRHS
);
4693 MachineInstr
*OpLHSDef
= MRI
.getVRegDef(OpLHS
);
4695 if (OpLHSDef
->getOpcode() != Opc
)
4698 MachineInstr
*OpRHSDef
= MRI
.getVRegDef(OpRHS
);
4699 Register OpLHSLHS
= OpLHSDef
->getOperand(1).getReg();
4700 Register OpLHSRHS
= OpLHSDef
->getOperand(2).getReg();
4702 // If the inner op is (X op C), pull the constant out so it can be folded with
4703 // other constants in the expression tree. Folding is not guaranteed so we
4704 // might have (C1 op C2). In that case do not pull a constant out because it
4705 // won't help and can lead to infinite loops.
4706 if (isConstantOrConstantSplatVector(*MRI
.getVRegDef(OpLHSRHS
), MRI
) &&
4707 !isConstantOrConstantSplatVector(*MRI
.getVRegDef(OpLHSLHS
), MRI
)) {
4708 if (isConstantOrConstantSplatVector(*OpRHSDef
, MRI
)) {
4709 // (Opc (Opc X, C1), C2) -> (Opc X, (Opc C1, C2))
4710 MatchInfo
= [=](MachineIRBuilder
&B
) {
4711 auto NewCst
= B
.buildInstr(Opc
, {OpRHSTy
}, {OpLHSRHS
, OpRHS
});
4712 B
.buildInstr(Opc
, {DstReg
}, {OpLHSLHS
, NewCst
});
4716 if (getTargetLowering().isReassocProfitable(MRI
, OpLHS
, OpRHS
)) {
4717 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
4718 // iff (op x, c1) has one use
4719 MatchInfo
= [=](MachineIRBuilder
&B
) {
4720 auto NewLHSLHS
= B
.buildInstr(Opc
, {OpRHSTy
}, {OpLHSLHS
, OpRHS
});
4721 B
.buildInstr(Opc
, {DstReg
}, {NewLHSLHS
, OpLHSRHS
});
4730 bool CombinerHelper::matchReassocCommBinOp(MachineInstr
&MI
,
4731 BuildFnTy
&MatchInfo
) {
4732 // We don't check if the reassociation will break a legal addressing mode
4733 // here since pointer arithmetic is handled by G_PTR_ADD.
4734 unsigned Opc
= MI
.getOpcode();
4735 Register DstReg
= MI
.getOperand(0).getReg();
4736 Register LHSReg
= MI
.getOperand(1).getReg();
4737 Register RHSReg
= MI
.getOperand(2).getReg();
4739 if (tryReassocBinOp(Opc
, DstReg
, LHSReg
, RHSReg
, MatchInfo
))
4741 if (tryReassocBinOp(Opc
, DstReg
, RHSReg
, LHSReg
, MatchInfo
))
4746 bool CombinerHelper::matchConstantFoldCastOp(MachineInstr
&MI
, APInt
&MatchInfo
) {
4747 LLT DstTy
= MRI
.getType(MI
.getOperand(0).getReg());
4748 Register SrcOp
= MI
.getOperand(1).getReg();
4750 if (auto MaybeCst
= ConstantFoldCastOp(MI
.getOpcode(), DstTy
, SrcOp
, MRI
)) {
4751 MatchInfo
= *MaybeCst
;
4758 bool CombinerHelper::matchConstantFoldBinOp(MachineInstr
&MI
, APInt
&MatchInfo
) {
4759 Register Op1
= MI
.getOperand(1).getReg();
4760 Register Op2
= MI
.getOperand(2).getReg();
4761 auto MaybeCst
= ConstantFoldBinOp(MI
.getOpcode(), Op1
, Op2
, MRI
);
4764 MatchInfo
= *MaybeCst
;
4768 bool CombinerHelper::matchConstantFoldFPBinOp(MachineInstr
&MI
, ConstantFP
* &MatchInfo
) {
4769 Register Op1
= MI
.getOperand(1).getReg();
4770 Register Op2
= MI
.getOperand(2).getReg();
4771 auto MaybeCst
= ConstantFoldFPBinOp(MI
.getOpcode(), Op1
, Op2
, MRI
);
4775 ConstantFP::get(MI
.getMF()->getFunction().getContext(), *MaybeCst
);
4779 bool CombinerHelper::matchConstantFoldFMA(MachineInstr
&MI
,
4780 ConstantFP
*&MatchInfo
) {
4781 assert(MI
.getOpcode() == TargetOpcode::G_FMA
||
4782 MI
.getOpcode() == TargetOpcode::G_FMAD
);
4783 auto [_
, Op1
, Op2
, Op3
] = MI
.getFirst4Regs();
4785 const ConstantFP
*Op3Cst
= getConstantFPVRegVal(Op3
, MRI
);
4789 const ConstantFP
*Op2Cst
= getConstantFPVRegVal(Op2
, MRI
);
4793 const ConstantFP
*Op1Cst
= getConstantFPVRegVal(Op1
, MRI
);
4797 APFloat Op1F
= Op1Cst
->getValueAPF();
4798 Op1F
.fusedMultiplyAdd(Op2Cst
->getValueAPF(), Op3Cst
->getValueAPF(),
4799 APFloat::rmNearestTiesToEven
);
4800 MatchInfo
= ConstantFP::get(MI
.getMF()->getFunction().getContext(), Op1F
);
4804 bool CombinerHelper::matchNarrowBinopFeedingAnd(
4805 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
4806 // Look for a binop feeding into an AND with a mask:
4808 // %add = G_ADD %lhs, %rhs
4809 // %and = G_AND %add, 000...11111111
4811 // Check if it's possible to perform the binop at a narrower width and zext
4812 // back to the original width like so:
4814 // %narrow_lhs = G_TRUNC %lhs
4815 // %narrow_rhs = G_TRUNC %rhs
4816 // %narrow_add = G_ADD %narrow_lhs, %narrow_rhs
4817 // %new_add = G_ZEXT %narrow_add
4818 // %and = G_AND %new_add, 000...11111111
4820 // This can allow later combines to eliminate the G_AND if it turns out
4821 // that the mask is irrelevant.
4822 assert(MI
.getOpcode() == TargetOpcode::G_AND
);
4823 Register Dst
= MI
.getOperand(0).getReg();
4824 Register AndLHS
= MI
.getOperand(1).getReg();
4825 Register AndRHS
= MI
.getOperand(2).getReg();
4826 LLT WideTy
= MRI
.getType(Dst
);
4828 // If the potential binop has more than one use, then it's possible that one
4829 // of those uses will need its full width.
4830 if (!WideTy
.isScalar() || !MRI
.hasOneNonDBGUse(AndLHS
))
4833 // Check if the LHS feeding the AND is impacted by the high bits that we're
4836 // e.g. for 64-bit x, y:
4838 // add_64(x, y) & 65535 == zext(add_16(trunc(x), trunc(y))) & 65535
4839 MachineInstr
*LHSInst
= getDefIgnoringCopies(AndLHS
, MRI
);
4842 unsigned LHSOpc
= LHSInst
->getOpcode();
4846 case TargetOpcode::G_ADD
:
4847 case TargetOpcode::G_SUB
:
4848 case TargetOpcode::G_MUL
:
4849 case TargetOpcode::G_AND
:
4850 case TargetOpcode::G_OR
:
4851 case TargetOpcode::G_XOR
:
4855 // Find the mask on the RHS.
4856 auto Cst
= getIConstantVRegValWithLookThrough(AndRHS
, MRI
);
4859 auto Mask
= Cst
->Value
;
4863 // No point in combining if there's nothing to truncate.
4864 unsigned NarrowWidth
= Mask
.countr_one();
4865 if (NarrowWidth
== WideTy
.getSizeInBits())
4867 LLT NarrowTy
= LLT::scalar(NarrowWidth
);
4869 // Check if adding the zext + truncates could be harmful.
4870 auto &MF
= *MI
.getMF();
4871 const auto &TLI
= getTargetLowering();
4872 LLVMContext
&Ctx
= MF
.getFunction().getContext();
4873 auto &DL
= MF
.getDataLayout();
4874 if (!TLI
.isTruncateFree(WideTy
, NarrowTy
, DL
, Ctx
) ||
4875 !TLI
.isZExtFree(NarrowTy
, WideTy
, DL
, Ctx
))
4877 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC
, {NarrowTy
, WideTy
}}) ||
4878 !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT
, {WideTy
, NarrowTy
}}))
4880 Register BinOpLHS
= LHSInst
->getOperand(1).getReg();
4881 Register BinOpRHS
= LHSInst
->getOperand(2).getReg();
4882 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
4883 auto NarrowLHS
= Builder
.buildTrunc(NarrowTy
, BinOpLHS
);
4884 auto NarrowRHS
= Builder
.buildTrunc(NarrowTy
, BinOpRHS
);
4886 Builder
.buildInstr(LHSOpc
, {NarrowTy
}, {NarrowLHS
, NarrowRHS
});
4887 auto Ext
= Builder
.buildZExt(WideTy
, NarrowBinOp
);
4888 Observer
.changingInstr(MI
);
4889 MI
.getOperand(1).setReg(Ext
.getReg(0));
4890 Observer
.changedInstr(MI
);
4895 bool CombinerHelper::matchMulOBy2(MachineInstr
&MI
, BuildFnTy
&MatchInfo
) {
4896 unsigned Opc
= MI
.getOpcode();
4897 assert(Opc
== TargetOpcode::G_UMULO
|| Opc
== TargetOpcode::G_SMULO
);
4899 if (!mi_match(MI
.getOperand(3).getReg(), MRI
, m_SpecificICstOrSplat(2)))
4902 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
4903 Observer
.changingInstr(MI
);
4904 unsigned NewOpc
= Opc
== TargetOpcode::G_UMULO
? TargetOpcode::G_UADDO
4905 : TargetOpcode::G_SADDO
;
4906 MI
.setDesc(Builder
.getTII().get(NewOpc
));
4907 MI
.getOperand(3).setReg(MI
.getOperand(2).getReg());
4908 Observer
.changedInstr(MI
);
4913 bool CombinerHelper::matchMulOBy0(MachineInstr
&MI
, BuildFnTy
&MatchInfo
) {
4914 // (G_*MULO x, 0) -> 0 + no carry out
4915 assert(MI
.getOpcode() == TargetOpcode::G_UMULO
||
4916 MI
.getOpcode() == TargetOpcode::G_SMULO
);
4917 if (!mi_match(MI
.getOperand(3).getReg(), MRI
, m_SpecificICstOrSplat(0)))
4919 Register Dst
= MI
.getOperand(0).getReg();
4920 Register Carry
= MI
.getOperand(1).getReg();
4921 if (!isConstantLegalOrBeforeLegalizer(MRI
.getType(Dst
)) ||
4922 !isConstantLegalOrBeforeLegalizer(MRI
.getType(Carry
)))
4924 MatchInfo
= [=](MachineIRBuilder
&B
) {
4925 B
.buildConstant(Dst
, 0);
4926 B
.buildConstant(Carry
, 0);
4931 bool CombinerHelper::matchAddOBy0(MachineInstr
&MI
, BuildFnTy
&MatchInfo
) {
4932 // (G_*ADDO x, 0) -> x + no carry out
4933 assert(MI
.getOpcode() == TargetOpcode::G_UADDO
||
4934 MI
.getOpcode() == TargetOpcode::G_SADDO
);
4935 if (!mi_match(MI
.getOperand(3).getReg(), MRI
, m_SpecificICstOrSplat(0)))
4937 Register Carry
= MI
.getOperand(1).getReg();
4938 if (!isConstantLegalOrBeforeLegalizer(MRI
.getType(Carry
)))
4940 Register Dst
= MI
.getOperand(0).getReg();
4941 Register LHS
= MI
.getOperand(2).getReg();
4942 MatchInfo
= [=](MachineIRBuilder
&B
) {
4943 B
.buildCopy(Dst
, LHS
);
4944 B
.buildConstant(Carry
, 0);
4949 bool CombinerHelper::matchAddEToAddO(MachineInstr
&MI
, BuildFnTy
&MatchInfo
) {
4950 // (G_*ADDE x, y, 0) -> (G_*ADDO x, y)
4951 // (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
4952 assert(MI
.getOpcode() == TargetOpcode::G_UADDE
||
4953 MI
.getOpcode() == TargetOpcode::G_SADDE
||
4954 MI
.getOpcode() == TargetOpcode::G_USUBE
||
4955 MI
.getOpcode() == TargetOpcode::G_SSUBE
);
4956 if (!mi_match(MI
.getOperand(4).getReg(), MRI
, m_SpecificICstOrSplat(0)))
4958 MatchInfo
= [&](MachineIRBuilder
&B
) {
4960 switch (MI
.getOpcode()) {
4961 case TargetOpcode::G_UADDE
:
4962 NewOpcode
= TargetOpcode::G_UADDO
;
4964 case TargetOpcode::G_SADDE
:
4965 NewOpcode
= TargetOpcode::G_SADDO
;
4967 case TargetOpcode::G_USUBE
:
4968 NewOpcode
= TargetOpcode::G_USUBO
;
4970 case TargetOpcode::G_SSUBE
:
4971 NewOpcode
= TargetOpcode::G_SSUBO
;
4974 Observer
.changingInstr(MI
);
4975 MI
.setDesc(B
.getTII().get(NewOpcode
));
4976 MI
.removeOperand(4);
4977 Observer
.changedInstr(MI
);
4982 bool CombinerHelper::matchSubAddSameReg(MachineInstr
&MI
,
4983 BuildFnTy
&MatchInfo
) {
4984 assert(MI
.getOpcode() == TargetOpcode::G_SUB
);
4985 Register Dst
= MI
.getOperand(0).getReg();
4986 // (x + y) - z -> x (if y == z)
4987 // (x + y) - z -> y (if x == z)
4989 if (mi_match(Dst
, MRI
, m_GSub(m_GAdd(m_Reg(X
), m_Reg(Y
)), m_Reg(Z
)))) {
4990 Register ReplaceReg
;
4992 if (Y
== Z
|| (mi_match(Y
, MRI
, m_ICstOrSplat(CstY
)) &&
4993 mi_match(Z
, MRI
, m_SpecificICstOrSplat(CstY
))))
4995 else if (X
== Z
|| (mi_match(X
, MRI
, m_ICstOrSplat(CstX
)) &&
4996 mi_match(Z
, MRI
, m_SpecificICstOrSplat(CstX
))))
4999 MatchInfo
= [=](MachineIRBuilder
&B
) { B
.buildCopy(Dst
, ReplaceReg
); };
5004 // x - (y + z) -> 0 - y (if x == z)
5005 // x - (y + z) -> 0 - z (if x == y)
5006 if (mi_match(Dst
, MRI
, m_GSub(m_Reg(X
), m_GAdd(m_Reg(Y
), m_Reg(Z
))))) {
5007 Register ReplaceReg
;
5009 if (X
== Z
|| (mi_match(X
, MRI
, m_ICstOrSplat(CstX
)) &&
5010 mi_match(Z
, MRI
, m_SpecificICstOrSplat(CstX
))))
5012 else if (X
== Y
|| (mi_match(X
, MRI
, m_ICstOrSplat(CstX
)) &&
5013 mi_match(Y
, MRI
, m_SpecificICstOrSplat(CstX
))))
5016 MatchInfo
= [=](MachineIRBuilder
&B
) {
5017 auto Zero
= B
.buildConstant(MRI
.getType(Dst
), 0);
5018 B
.buildSub(Dst
, Zero
, ReplaceReg
);
5026 MachineInstr
*CombinerHelper::buildUDivUsingMul(MachineInstr
&MI
) {
5027 assert(MI
.getOpcode() == TargetOpcode::G_UDIV
);
5028 auto &UDiv
= cast
<GenericMachineInstr
>(MI
);
5029 Register Dst
= UDiv
.getReg(0);
5030 Register LHS
= UDiv
.getReg(1);
5031 Register RHS
= UDiv
.getReg(2);
5032 LLT Ty
= MRI
.getType(Dst
);
5033 LLT ScalarTy
= Ty
.getScalarType();
5034 const unsigned EltBits
= ScalarTy
.getScalarSizeInBits();
5035 LLT ShiftAmtTy
= getTargetLowering().getPreferredShiftAmountTy(Ty
);
5036 LLT ScalarShiftAmtTy
= ShiftAmtTy
.getScalarType();
5037 auto &MIB
= Builder
;
5038 MIB
.setInstrAndDebugLoc(MI
);
5040 bool UseNPQ
= false;
5041 SmallVector
<Register
, 16> PreShifts
, PostShifts
, MagicFactors
, NPQFactors
;
5043 auto BuildUDIVPattern
= [&](const Constant
*C
) {
5044 auto *CI
= cast
<ConstantInt
>(C
);
5045 const APInt
&Divisor
= CI
->getValue();
5047 bool SelNPQ
= false;
5048 APInt
Magic(Divisor
.getBitWidth(), 0);
5049 unsigned PreShift
= 0, PostShift
= 0;
5051 // Magic algorithm doesn't work for division by 1. We need to emit a select
5053 // TODO: Use undef values for divisor of 1.
5054 if (!Divisor
.isOne()) {
5055 UnsignedDivisionByConstantInfo magics
=
5056 UnsignedDivisionByConstantInfo::get(Divisor
);
5058 Magic
= std::move(magics
.Magic
);
5060 assert(magics
.PreShift
< Divisor
.getBitWidth() &&
5061 "We shouldn't generate an undefined shift!");
5062 assert(magics
.PostShift
< Divisor
.getBitWidth() &&
5063 "We shouldn't generate an undefined shift!");
5064 assert((!magics
.IsAdd
|| magics
.PreShift
== 0) && "Unexpected pre-shift");
5065 PreShift
= magics
.PreShift
;
5066 PostShift
= magics
.PostShift
;
5067 SelNPQ
= magics
.IsAdd
;
5070 PreShifts
.push_back(
5071 MIB
.buildConstant(ScalarShiftAmtTy
, PreShift
).getReg(0));
5072 MagicFactors
.push_back(MIB
.buildConstant(ScalarTy
, Magic
).getReg(0));
5073 NPQFactors
.push_back(
5074 MIB
.buildConstant(ScalarTy
,
5075 SelNPQ
? APInt::getOneBitSet(EltBits
, EltBits
- 1)
5076 : APInt::getZero(EltBits
))
5078 PostShifts
.push_back(
5079 MIB
.buildConstant(ScalarShiftAmtTy
, PostShift
).getReg(0));
5084 // Collect the shifts/magic values from each element.
5085 bool Matched
= matchUnaryPredicate(MRI
, RHS
, BuildUDIVPattern
);
5087 assert(Matched
&& "Expected unary predicate match to succeed");
5089 Register PreShift
, PostShift
, MagicFactor
, NPQFactor
;
5090 auto *RHSDef
= getOpcodeDef
<GBuildVector
>(RHS
, MRI
);
5092 PreShift
= MIB
.buildBuildVector(ShiftAmtTy
, PreShifts
).getReg(0);
5093 MagicFactor
= MIB
.buildBuildVector(Ty
, MagicFactors
).getReg(0);
5094 NPQFactor
= MIB
.buildBuildVector(Ty
, NPQFactors
).getReg(0);
5095 PostShift
= MIB
.buildBuildVector(ShiftAmtTy
, PostShifts
).getReg(0);
5097 assert(MRI
.getType(RHS
).isScalar() &&
5098 "Non-build_vector operation should have been a scalar");
5099 PreShift
= PreShifts
[0];
5100 MagicFactor
= MagicFactors
[0];
5101 PostShift
= PostShifts
[0];
5105 Q
= MIB
.buildLShr(Ty
, Q
, PreShift
).getReg(0);
5107 // Multiply the numerator (operand 0) by the magic value.
5108 Q
= MIB
.buildUMulH(Ty
, Q
, MagicFactor
).getReg(0);
5111 Register NPQ
= MIB
.buildSub(Ty
, LHS
, Q
).getReg(0);
5113 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
5114 // G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero.
5116 NPQ
= MIB
.buildUMulH(Ty
, NPQ
, NPQFactor
).getReg(0);
5118 NPQ
= MIB
.buildLShr(Ty
, NPQ
, MIB
.buildConstant(ShiftAmtTy
, 1)).getReg(0);
5120 Q
= MIB
.buildAdd(Ty
, NPQ
, Q
).getReg(0);
5123 Q
= MIB
.buildLShr(Ty
, Q
, PostShift
).getReg(0);
5124 auto One
= MIB
.buildConstant(Ty
, 1);
5125 auto IsOne
= MIB
.buildICmp(
5126 CmpInst::Predicate::ICMP_EQ
,
5127 Ty
.isScalar() ? LLT::scalar(1) : Ty
.changeElementSize(1), RHS
, One
);
5128 return MIB
.buildSelect(Ty
, IsOne
, LHS
, Q
);
5131 bool CombinerHelper::matchUDivByConst(MachineInstr
&MI
) {
5132 assert(MI
.getOpcode() == TargetOpcode::G_UDIV
);
5133 Register Dst
= MI
.getOperand(0).getReg();
5134 Register RHS
= MI
.getOperand(2).getReg();
5135 LLT DstTy
= MRI
.getType(Dst
);
5136 auto *RHSDef
= MRI
.getVRegDef(RHS
);
5137 if (!isConstantOrConstantVector(*RHSDef
, MRI
))
5140 auto &MF
= *MI
.getMF();
5141 AttributeList Attr
= MF
.getFunction().getAttributes();
5142 const auto &TLI
= getTargetLowering();
5143 LLVMContext
&Ctx
= MF
.getFunction().getContext();
5144 auto &DL
= MF
.getDataLayout();
5145 if (TLI
.isIntDivCheap(getApproximateEVTForLLT(DstTy
, DL
, Ctx
), Attr
))
5148 // Don't do this for minsize because the instruction sequence is usually
5150 if (MF
.getFunction().hasMinSize())
5153 // Don't do this if the types are not going to be legal.
5155 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL
, {DstTy
, DstTy
}}))
5157 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH
, {DstTy
}}))
5159 if (!isLegalOrBeforeLegalizer(
5160 {TargetOpcode::G_ICMP
,
5161 {DstTy
.isVector() ? DstTy
.changeElementSize(1) : LLT::scalar(1),
5166 auto CheckEltValue
= [&](const Constant
*C
) {
5167 if (auto *CI
= dyn_cast_or_null
<ConstantInt
>(C
))
5168 return !CI
->isZero();
5171 return matchUnaryPredicate(MRI
, RHS
, CheckEltValue
);
5174 void CombinerHelper::applyUDivByConst(MachineInstr
&MI
) {
5175 auto *NewMI
= buildUDivUsingMul(MI
);
5176 replaceSingleDefInstWithReg(MI
, NewMI
->getOperand(0).getReg());
5179 bool CombinerHelper::matchSDivByConst(MachineInstr
&MI
) {
5180 assert(MI
.getOpcode() == TargetOpcode::G_SDIV
&& "Expected SDIV");
5181 Register Dst
= MI
.getOperand(0).getReg();
5182 Register RHS
= MI
.getOperand(2).getReg();
5183 LLT DstTy
= MRI
.getType(Dst
);
5185 auto &MF
= *MI
.getMF();
5186 AttributeList Attr
= MF
.getFunction().getAttributes();
5187 const auto &TLI
= getTargetLowering();
5188 LLVMContext
&Ctx
= MF
.getFunction().getContext();
5189 auto &DL
= MF
.getDataLayout();
5190 if (TLI
.isIntDivCheap(getApproximateEVTForLLT(DstTy
, DL
, Ctx
), Attr
))
5193 // Don't do this for minsize because the instruction sequence is usually
5195 if (MF
.getFunction().hasMinSize())
5198 // If the sdiv has an 'exact' flag we can use a simpler lowering.
5199 if (MI
.getFlag(MachineInstr::MIFlag::IsExact
)) {
5200 return matchUnaryPredicate(
5201 MRI
, RHS
, [](const Constant
*C
) { return C
&& !C
->isZeroValue(); });
5204 // Don't support the general case for now.
5208 void CombinerHelper::applySDivByConst(MachineInstr
&MI
) {
5209 auto *NewMI
= buildSDivUsingMul(MI
);
5210 replaceSingleDefInstWithReg(MI
, NewMI
->getOperand(0).getReg());
5213 MachineInstr
*CombinerHelper::buildSDivUsingMul(MachineInstr
&MI
) {
5214 assert(MI
.getOpcode() == TargetOpcode::G_SDIV
&& "Expected SDIV");
5215 auto &SDiv
= cast
<GenericMachineInstr
>(MI
);
5216 Register Dst
= SDiv
.getReg(0);
5217 Register LHS
= SDiv
.getReg(1);
5218 Register RHS
= SDiv
.getReg(2);
5219 LLT Ty
= MRI
.getType(Dst
);
5220 LLT ScalarTy
= Ty
.getScalarType();
5221 LLT ShiftAmtTy
= getTargetLowering().getPreferredShiftAmountTy(Ty
);
5222 LLT ScalarShiftAmtTy
= ShiftAmtTy
.getScalarType();
5223 auto &MIB
= Builder
;
5224 MIB
.setInstrAndDebugLoc(MI
);
5226 bool UseSRA
= false;
5227 SmallVector
<Register
, 16> Shifts
, Factors
;
5229 auto *RHSDef
= cast
<GenericMachineInstr
>(getDefIgnoringCopies(RHS
, MRI
));
5230 bool IsSplat
= getIConstantSplatVal(*RHSDef
, MRI
).has_value();
5232 auto BuildSDIVPattern
= [&](const Constant
*C
) {
5233 // Don't recompute inverses for each splat element.
5234 if (IsSplat
&& !Factors
.empty()) {
5235 Shifts
.push_back(Shifts
[0]);
5236 Factors
.push_back(Factors
[0]);
5240 auto *CI
= cast
<ConstantInt
>(C
);
5241 APInt Divisor
= CI
->getValue();
5242 unsigned Shift
= Divisor
.countr_zero();
5244 Divisor
.ashrInPlace(Shift
);
5248 // Calculate the multiplicative inverse modulo BW.
5249 // 2^W requires W + 1 bits, so we have to extend and then truncate.
5250 unsigned W
= Divisor
.getBitWidth();
5251 APInt Factor
= Divisor
.zext(W
+ 1)
5252 .multiplicativeInverse(APInt::getSignedMinValue(W
+ 1))
5254 Shifts
.push_back(MIB
.buildConstant(ScalarShiftAmtTy
, Shift
).getReg(0));
5255 Factors
.push_back(MIB
.buildConstant(ScalarTy
, Factor
).getReg(0));
5259 // Collect all magic values from the build vector.
5260 bool Matched
= matchUnaryPredicate(MRI
, RHS
, BuildSDIVPattern
);
5262 assert(Matched
&& "Expected unary predicate match to succeed");
5264 Register Shift
, Factor
;
5265 if (Ty
.isVector()) {
5266 Shift
= MIB
.buildBuildVector(ShiftAmtTy
, Shifts
).getReg(0);
5267 Factor
= MIB
.buildBuildVector(Ty
, Factors
).getReg(0);
5270 Factor
= Factors
[0];
5276 Res
= MIB
.buildAShr(Ty
, Res
, Shift
, MachineInstr::IsExact
).getReg(0);
5278 return MIB
.buildMul(Ty
, Res
, Factor
);
5281 bool CombinerHelper::matchUMulHToLShr(MachineInstr
&MI
) {
5282 assert(MI
.getOpcode() == TargetOpcode::G_UMULH
);
5283 Register RHS
= MI
.getOperand(2).getReg();
5284 Register Dst
= MI
.getOperand(0).getReg();
5285 LLT Ty
= MRI
.getType(Dst
);
5286 LLT ShiftAmtTy
= getTargetLowering().getPreferredShiftAmountTy(Ty
);
5287 auto MatchPow2ExceptOne
= [&](const Constant
*C
) {
5288 if (auto *CI
= dyn_cast
<ConstantInt
>(C
))
5289 return CI
->getValue().isPowerOf2() && !CI
->getValue().isOne();
5292 if (!matchUnaryPredicate(MRI
, RHS
, MatchPow2ExceptOne
, false))
5294 return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR
, {Ty
, ShiftAmtTy
}});
5297 void CombinerHelper::applyUMulHToLShr(MachineInstr
&MI
) {
5298 Register LHS
= MI
.getOperand(1).getReg();
5299 Register RHS
= MI
.getOperand(2).getReg();
5300 Register Dst
= MI
.getOperand(0).getReg();
5301 LLT Ty
= MRI
.getType(Dst
);
5302 LLT ShiftAmtTy
= getTargetLowering().getPreferredShiftAmountTy(Ty
);
5303 unsigned NumEltBits
= Ty
.getScalarSizeInBits();
5305 Builder
.setInstrAndDebugLoc(MI
);
5306 auto LogBase2
= buildLogBase2(RHS
, Builder
);
5308 Builder
.buildSub(Ty
, Builder
.buildConstant(Ty
, NumEltBits
), LogBase2
);
5309 auto Trunc
= Builder
.buildZExtOrTrunc(ShiftAmtTy
, ShiftAmt
);
5310 Builder
.buildLShr(Dst
, LHS
, Trunc
);
5311 MI
.eraseFromParent();
5314 bool CombinerHelper::matchRedundantNegOperands(MachineInstr
&MI
,
5315 BuildFnTy
&MatchInfo
) {
5316 unsigned Opc
= MI
.getOpcode();
5317 assert(Opc
== TargetOpcode::G_FADD
|| Opc
== TargetOpcode::G_FSUB
||
5318 Opc
== TargetOpcode::G_FMUL
|| Opc
== TargetOpcode::G_FDIV
||
5319 Opc
== TargetOpcode::G_FMAD
|| Opc
== TargetOpcode::G_FMA
);
5321 Register Dst
= MI
.getOperand(0).getReg();
5322 Register X
= MI
.getOperand(1).getReg();
5323 Register Y
= MI
.getOperand(2).getReg();
5324 LLT Type
= MRI
.getType(Dst
);
5326 // fold (fadd x, fneg(y)) -> (fsub x, y)
5327 // fold (fadd fneg(y), x) -> (fsub x, y)
5328 // G_ADD is commutative so both cases are checked by m_GFAdd
5329 if (mi_match(Dst
, MRI
, m_GFAdd(m_Reg(X
), m_GFNeg(m_Reg(Y
)))) &&
5330 isLegalOrBeforeLegalizer({TargetOpcode::G_FSUB
, {Type
}})) {
5331 Opc
= TargetOpcode::G_FSUB
;
5333 /// fold (fsub x, fneg(y)) -> (fadd x, y)
5334 else if (mi_match(Dst
, MRI
, m_GFSub(m_Reg(X
), m_GFNeg(m_Reg(Y
)))) &&
5335 isLegalOrBeforeLegalizer({TargetOpcode::G_FADD
, {Type
}})) {
5336 Opc
= TargetOpcode::G_FADD
;
5338 // fold (fmul fneg(x), fneg(y)) -> (fmul x, y)
5339 // fold (fdiv fneg(x), fneg(y)) -> (fdiv x, y)
5340 // fold (fmad fneg(x), fneg(y), z) -> (fmad x, y, z)
5341 // fold (fma fneg(x), fneg(y), z) -> (fma x, y, z)
5342 else if ((Opc
== TargetOpcode::G_FMUL
|| Opc
== TargetOpcode::G_FDIV
||
5343 Opc
== TargetOpcode::G_FMAD
|| Opc
== TargetOpcode::G_FMA
) &&
5344 mi_match(X
, MRI
, m_GFNeg(m_Reg(X
))) &&
5345 mi_match(Y
, MRI
, m_GFNeg(m_Reg(Y
)))) {
5350 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
5351 Observer
.changingInstr(MI
);
5352 MI
.setDesc(B
.getTII().get(Opc
));
5353 MI
.getOperand(1).setReg(X
);
5354 MI
.getOperand(2).setReg(Y
);
5355 Observer
.changedInstr(MI
);
5360 bool CombinerHelper::matchFsubToFneg(MachineInstr
&MI
, Register
&MatchInfo
) {
5361 assert(MI
.getOpcode() == TargetOpcode::G_FSUB
);
5363 Register LHS
= MI
.getOperand(1).getReg();
5364 MatchInfo
= MI
.getOperand(2).getReg();
5365 LLT Ty
= MRI
.getType(MI
.getOperand(0).getReg());
5367 const auto LHSCst
= Ty
.isVector()
5368 ? getFConstantSplat(LHS
, MRI
, /* allowUndef */ true)
5369 : getFConstantVRegValWithLookThrough(LHS
, MRI
);
5373 // -0.0 is always allowed
5374 if (LHSCst
->Value
.isNegZero())
5377 // +0.0 is only allowed if nsz is set.
5378 if (LHSCst
->Value
.isPosZero())
5379 return MI
.getFlag(MachineInstr::FmNsz
);
5384 void CombinerHelper::applyFsubToFneg(MachineInstr
&MI
, Register
&MatchInfo
) {
5385 Builder
.setInstrAndDebugLoc(MI
);
5386 Register Dst
= MI
.getOperand(0).getReg();
5388 Dst
, Builder
.buildFCanonicalize(MRI
.getType(Dst
), MatchInfo
).getReg(0));
5392 /// Checks if \p MI is TargetOpcode::G_FMUL and contractable either
5393 /// due to global flags or MachineInstr flags.
5394 static bool isContractableFMul(MachineInstr
&MI
, bool AllowFusionGlobally
) {
5395 if (MI
.getOpcode() != TargetOpcode::G_FMUL
)
5397 return AllowFusionGlobally
|| MI
.getFlag(MachineInstr::MIFlag::FmContract
);
5400 static bool hasMoreUses(const MachineInstr
&MI0
, const MachineInstr
&MI1
,
5401 const MachineRegisterInfo
&MRI
) {
5402 return std::distance(MRI
.use_instr_nodbg_begin(MI0
.getOperand(0).getReg()),
5403 MRI
.use_instr_nodbg_end()) >
5404 std::distance(MRI
.use_instr_nodbg_begin(MI1
.getOperand(0).getReg()),
5405 MRI
.use_instr_nodbg_end());
5408 bool CombinerHelper::canCombineFMadOrFMA(MachineInstr
&MI
,
5409 bool &AllowFusionGlobally
,
5410 bool &HasFMAD
, bool &Aggressive
,
5411 bool CanReassociate
) {
5413 auto *MF
= MI
.getMF();
5414 const auto &TLI
= *MF
->getSubtarget().getTargetLowering();
5415 const TargetOptions
&Options
= MF
->getTarget().Options
;
5416 LLT DstType
= MRI
.getType(MI
.getOperand(0).getReg());
5418 if (CanReassociate
&&
5419 !(Options
.UnsafeFPMath
|| MI
.getFlag(MachineInstr::MIFlag::FmReassoc
)))
5422 // Floating-point multiply-add with intermediate rounding.
5423 HasFMAD
= (!isPreLegalize() && TLI
.isFMADLegal(MI
, DstType
));
5424 // Floating-point multiply-add without intermediate rounding.
5425 bool HasFMA
= TLI
.isFMAFasterThanFMulAndFAdd(*MF
, DstType
) &&
5426 isLegalOrBeforeLegalizer({TargetOpcode::G_FMA
, {DstType
}});
5427 // No valid opcode, do not combine.
5428 if (!HasFMAD
&& !HasFMA
)
5431 AllowFusionGlobally
= Options
.AllowFPOpFusion
== FPOpFusion::Fast
||
5432 Options
.UnsafeFPMath
|| HasFMAD
;
5433 // If the addition is not contractable, do not combine.
5434 if (!AllowFusionGlobally
&& !MI
.getFlag(MachineInstr::MIFlag::FmContract
))
5437 Aggressive
= TLI
.enableAggressiveFMAFusion(DstType
);
5441 bool CombinerHelper::matchCombineFAddFMulToFMadOrFMA(
5442 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
5443 assert(MI
.getOpcode() == TargetOpcode::G_FADD
);
5445 bool AllowFusionGlobally
, HasFMAD
, Aggressive
;
5446 if (!canCombineFMadOrFMA(MI
, AllowFusionGlobally
, HasFMAD
, Aggressive
))
5449 Register Op1
= MI
.getOperand(1).getReg();
5450 Register Op2
= MI
.getOperand(2).getReg();
5451 DefinitionAndSourceRegister LHS
= {MRI
.getVRegDef(Op1
), Op1
};
5452 DefinitionAndSourceRegister RHS
= {MRI
.getVRegDef(Op2
), Op2
};
5453 unsigned PreferredFusedOpcode
=
5454 HasFMAD
? TargetOpcode::G_FMAD
: TargetOpcode::G_FMA
;
5456 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5457 // prefer to fold the multiply with fewer uses.
5458 if (Aggressive
&& isContractableFMul(*LHS
.MI
, AllowFusionGlobally
) &&
5459 isContractableFMul(*RHS
.MI
, AllowFusionGlobally
)) {
5460 if (hasMoreUses(*LHS
.MI
, *RHS
.MI
, MRI
))
5461 std::swap(LHS
, RHS
);
5464 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
5465 if (isContractableFMul(*LHS
.MI
, AllowFusionGlobally
) &&
5466 (Aggressive
|| MRI
.hasOneNonDBGUse(LHS
.Reg
))) {
5467 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
5468 B
.buildInstr(PreferredFusedOpcode
, {MI
.getOperand(0).getReg()},
5469 {LHS
.MI
->getOperand(1).getReg(),
5470 LHS
.MI
->getOperand(2).getReg(), RHS
.Reg
});
5475 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
5476 if (isContractableFMul(*RHS
.MI
, AllowFusionGlobally
) &&
5477 (Aggressive
|| MRI
.hasOneNonDBGUse(RHS
.Reg
))) {
5478 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
5479 B
.buildInstr(PreferredFusedOpcode
, {MI
.getOperand(0).getReg()},
5480 {RHS
.MI
->getOperand(1).getReg(),
5481 RHS
.MI
->getOperand(2).getReg(), LHS
.Reg
});
5489 bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMA(
5490 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
5491 assert(MI
.getOpcode() == TargetOpcode::G_FADD
);
5493 bool AllowFusionGlobally
, HasFMAD
, Aggressive
;
5494 if (!canCombineFMadOrFMA(MI
, AllowFusionGlobally
, HasFMAD
, Aggressive
))
5497 const auto &TLI
= *MI
.getMF()->getSubtarget().getTargetLowering();
5498 Register Op1
= MI
.getOperand(1).getReg();
5499 Register Op2
= MI
.getOperand(2).getReg();
5500 DefinitionAndSourceRegister LHS
= {MRI
.getVRegDef(Op1
), Op1
};
5501 DefinitionAndSourceRegister RHS
= {MRI
.getVRegDef(Op2
), Op2
};
5502 LLT DstType
= MRI
.getType(MI
.getOperand(0).getReg());
5504 unsigned PreferredFusedOpcode
=
5505 HasFMAD
? TargetOpcode::G_FMAD
: TargetOpcode::G_FMA
;
5507 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5508 // prefer to fold the multiply with fewer uses.
5509 if (Aggressive
&& isContractableFMul(*LHS
.MI
, AllowFusionGlobally
) &&
5510 isContractableFMul(*RHS
.MI
, AllowFusionGlobally
)) {
5511 if (hasMoreUses(*LHS
.MI
, *RHS
.MI
, MRI
))
5512 std::swap(LHS
, RHS
);
5515 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
5516 MachineInstr
*FpExtSrc
;
5517 if (mi_match(LHS
.Reg
, MRI
, m_GFPExt(m_MInstr(FpExtSrc
))) &&
5518 isContractableFMul(*FpExtSrc
, AllowFusionGlobally
) &&
5519 TLI
.isFPExtFoldable(MI
, PreferredFusedOpcode
, DstType
,
5520 MRI
.getType(FpExtSrc
->getOperand(1).getReg()))) {
5521 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
5522 auto FpExtX
= B
.buildFPExt(DstType
, FpExtSrc
->getOperand(1).getReg());
5523 auto FpExtY
= B
.buildFPExt(DstType
, FpExtSrc
->getOperand(2).getReg());
5524 B
.buildInstr(PreferredFusedOpcode
, {MI
.getOperand(0).getReg()},
5525 {FpExtX
.getReg(0), FpExtY
.getReg(0), RHS
.Reg
});
5530 // fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z)
5531 // Note: Commutes FADD operands.
5532 if (mi_match(RHS
.Reg
, MRI
, m_GFPExt(m_MInstr(FpExtSrc
))) &&
5533 isContractableFMul(*FpExtSrc
, AllowFusionGlobally
) &&
5534 TLI
.isFPExtFoldable(MI
, PreferredFusedOpcode
, DstType
,
5535 MRI
.getType(FpExtSrc
->getOperand(1).getReg()))) {
5536 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
5537 auto FpExtX
= B
.buildFPExt(DstType
, FpExtSrc
->getOperand(1).getReg());
5538 auto FpExtY
= B
.buildFPExt(DstType
, FpExtSrc
->getOperand(2).getReg());
5539 B
.buildInstr(PreferredFusedOpcode
, {MI
.getOperand(0).getReg()},
5540 {FpExtX
.getReg(0), FpExtY
.getReg(0), LHS
.Reg
});
5548 bool CombinerHelper::matchCombineFAddFMAFMulToFMadOrFMA(
5549 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
5550 assert(MI
.getOpcode() == TargetOpcode::G_FADD
);
5552 bool AllowFusionGlobally
, HasFMAD
, Aggressive
;
5553 if (!canCombineFMadOrFMA(MI
, AllowFusionGlobally
, HasFMAD
, Aggressive
, true))
5556 Register Op1
= MI
.getOperand(1).getReg();
5557 Register Op2
= MI
.getOperand(2).getReg();
5558 DefinitionAndSourceRegister LHS
= {MRI
.getVRegDef(Op1
), Op1
};
5559 DefinitionAndSourceRegister RHS
= {MRI
.getVRegDef(Op2
), Op2
};
5560 LLT DstTy
= MRI
.getType(MI
.getOperand(0).getReg());
5562 unsigned PreferredFusedOpcode
=
5563 HasFMAD
? TargetOpcode::G_FMAD
: TargetOpcode::G_FMA
;
5565 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5566 // prefer to fold the multiply with fewer uses.
5567 if (Aggressive
&& isContractableFMul(*LHS
.MI
, AllowFusionGlobally
) &&
5568 isContractableFMul(*RHS
.MI
, AllowFusionGlobally
)) {
5569 if (hasMoreUses(*LHS
.MI
, *RHS
.MI
, MRI
))
5570 std::swap(LHS
, RHS
);
5573 MachineInstr
*FMA
= nullptr;
5575 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z))
5576 if (LHS
.MI
->getOpcode() == PreferredFusedOpcode
&&
5577 (MRI
.getVRegDef(LHS
.MI
->getOperand(3).getReg())->getOpcode() ==
5578 TargetOpcode::G_FMUL
) &&
5579 MRI
.hasOneNonDBGUse(LHS
.MI
->getOperand(0).getReg()) &&
5580 MRI
.hasOneNonDBGUse(LHS
.MI
->getOperand(3).getReg())) {
5584 // fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z))
5585 else if (RHS
.MI
->getOpcode() == PreferredFusedOpcode
&&
5586 (MRI
.getVRegDef(RHS
.MI
->getOperand(3).getReg())->getOpcode() ==
5587 TargetOpcode::G_FMUL
) &&
5588 MRI
.hasOneNonDBGUse(RHS
.MI
->getOperand(0).getReg()) &&
5589 MRI
.hasOneNonDBGUse(RHS
.MI
->getOperand(3).getReg())) {
5595 MachineInstr
*FMulMI
= MRI
.getVRegDef(FMA
->getOperand(3).getReg());
5596 Register X
= FMA
->getOperand(1).getReg();
5597 Register Y
= FMA
->getOperand(2).getReg();
5598 Register U
= FMulMI
->getOperand(1).getReg();
5599 Register V
= FMulMI
->getOperand(2).getReg();
5601 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
5602 Register InnerFMA
= MRI
.createGenericVirtualRegister(DstTy
);
5603 B
.buildInstr(PreferredFusedOpcode
, {InnerFMA
}, {U
, V
, Z
});
5604 B
.buildInstr(PreferredFusedOpcode
, {MI
.getOperand(0).getReg()},
5613 bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMAAggressive(
5614 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
5615 assert(MI
.getOpcode() == TargetOpcode::G_FADD
);
5617 bool AllowFusionGlobally
, HasFMAD
, Aggressive
;
5618 if (!canCombineFMadOrFMA(MI
, AllowFusionGlobally
, HasFMAD
, Aggressive
))
5624 const auto &TLI
= *MI
.getMF()->getSubtarget().getTargetLowering();
5625 LLT DstType
= MRI
.getType(MI
.getOperand(0).getReg());
5626 Register Op1
= MI
.getOperand(1).getReg();
5627 Register Op2
= MI
.getOperand(2).getReg();
5628 DefinitionAndSourceRegister LHS
= {MRI
.getVRegDef(Op1
), Op1
};
5629 DefinitionAndSourceRegister RHS
= {MRI
.getVRegDef(Op2
), Op2
};
5631 unsigned PreferredFusedOpcode
=
5632 HasFMAD
? TargetOpcode::G_FMAD
: TargetOpcode::G_FMA
;
5634 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5635 // prefer to fold the multiply with fewer uses.
5636 if (Aggressive
&& isContractableFMul(*LHS
.MI
, AllowFusionGlobally
) &&
5637 isContractableFMul(*RHS
.MI
, AllowFusionGlobally
)) {
5638 if (hasMoreUses(*LHS
.MI
, *RHS
.MI
, MRI
))
5639 std::swap(LHS
, RHS
);
5642 // Builds: (fma x, y, (fma (fpext u), (fpext v), z))
5643 auto buildMatchInfo
= [=, &MI
](Register U
, Register V
, Register Z
, Register X
,
5644 Register Y
, MachineIRBuilder
&B
) {
5645 Register FpExtU
= B
.buildFPExt(DstType
, U
).getReg(0);
5646 Register FpExtV
= B
.buildFPExt(DstType
, V
).getReg(0);
5648 B
.buildInstr(PreferredFusedOpcode
, {DstType
}, {FpExtU
, FpExtV
, Z
})
5650 B
.buildInstr(PreferredFusedOpcode
, {MI
.getOperand(0).getReg()},
5654 MachineInstr
*FMulMI
, *FMAMI
;
5655 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
5656 // -> (fma x, y, (fma (fpext u), (fpext v), z))
5657 if (LHS
.MI
->getOpcode() == PreferredFusedOpcode
&&
5658 mi_match(LHS
.MI
->getOperand(3).getReg(), MRI
,
5659 m_GFPExt(m_MInstr(FMulMI
))) &&
5660 isContractableFMul(*FMulMI
, AllowFusionGlobally
) &&
5661 TLI
.isFPExtFoldable(MI
, PreferredFusedOpcode
, DstType
,
5662 MRI
.getType(FMulMI
->getOperand(0).getReg()))) {
5663 MatchInfo
= [=](MachineIRBuilder
&B
) {
5664 buildMatchInfo(FMulMI
->getOperand(1).getReg(),
5665 FMulMI
->getOperand(2).getReg(), RHS
.Reg
,
5666 LHS
.MI
->getOperand(1).getReg(),
5667 LHS
.MI
->getOperand(2).getReg(), B
);
5672 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
5673 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
5674 // FIXME: This turns two single-precision and one double-precision
5675 // operation into two double-precision operations, which might not be
5676 // interesting for all targets, especially GPUs.
5677 if (mi_match(LHS
.Reg
, MRI
, m_GFPExt(m_MInstr(FMAMI
))) &&
5678 FMAMI
->getOpcode() == PreferredFusedOpcode
) {
5679 MachineInstr
*FMulMI
= MRI
.getVRegDef(FMAMI
->getOperand(3).getReg());
5680 if (isContractableFMul(*FMulMI
, AllowFusionGlobally
) &&
5681 TLI
.isFPExtFoldable(MI
, PreferredFusedOpcode
, DstType
,
5682 MRI
.getType(FMAMI
->getOperand(0).getReg()))) {
5683 MatchInfo
= [=](MachineIRBuilder
&B
) {
5684 Register X
= FMAMI
->getOperand(1).getReg();
5685 Register Y
= FMAMI
->getOperand(2).getReg();
5686 X
= B
.buildFPExt(DstType
, X
).getReg(0);
5687 Y
= B
.buildFPExt(DstType
, Y
).getReg(0);
5688 buildMatchInfo(FMulMI
->getOperand(1).getReg(),
5689 FMulMI
->getOperand(2).getReg(), RHS
.Reg
, X
, Y
, B
);
5696 // fold (fadd z, (fma x, y, (fpext (fmul u, v)))
5697 // -> (fma x, y, (fma (fpext u), (fpext v), z))
5698 if (RHS
.MI
->getOpcode() == PreferredFusedOpcode
&&
5699 mi_match(RHS
.MI
->getOperand(3).getReg(), MRI
,
5700 m_GFPExt(m_MInstr(FMulMI
))) &&
5701 isContractableFMul(*FMulMI
, AllowFusionGlobally
) &&
5702 TLI
.isFPExtFoldable(MI
, PreferredFusedOpcode
, DstType
,
5703 MRI
.getType(FMulMI
->getOperand(0).getReg()))) {
5704 MatchInfo
= [=](MachineIRBuilder
&B
) {
5705 buildMatchInfo(FMulMI
->getOperand(1).getReg(),
5706 FMulMI
->getOperand(2).getReg(), LHS
.Reg
,
5707 RHS
.MI
->getOperand(1).getReg(),
5708 RHS
.MI
->getOperand(2).getReg(), B
);
5713 // fold (fadd z, (fpext (fma x, y, (fmul u, v)))
5714 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
5715 // FIXME: This turns two single-precision and one double-precision
5716 // operation into two double-precision operations, which might not be
5717 // interesting for all targets, especially GPUs.
5718 if (mi_match(RHS
.Reg
, MRI
, m_GFPExt(m_MInstr(FMAMI
))) &&
5719 FMAMI
->getOpcode() == PreferredFusedOpcode
) {
5720 MachineInstr
*FMulMI
= MRI
.getVRegDef(FMAMI
->getOperand(3).getReg());
5721 if (isContractableFMul(*FMulMI
, AllowFusionGlobally
) &&
5722 TLI
.isFPExtFoldable(MI
, PreferredFusedOpcode
, DstType
,
5723 MRI
.getType(FMAMI
->getOperand(0).getReg()))) {
5724 MatchInfo
= [=](MachineIRBuilder
&B
) {
5725 Register X
= FMAMI
->getOperand(1).getReg();
5726 Register Y
= FMAMI
->getOperand(2).getReg();
5727 X
= B
.buildFPExt(DstType
, X
).getReg(0);
5728 Y
= B
.buildFPExt(DstType
, Y
).getReg(0);
5729 buildMatchInfo(FMulMI
->getOperand(1).getReg(),
5730 FMulMI
->getOperand(2).getReg(), LHS
.Reg
, X
, Y
, B
);
5739 bool CombinerHelper::matchCombineFSubFMulToFMadOrFMA(
5740 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
5741 assert(MI
.getOpcode() == TargetOpcode::G_FSUB
);
5743 bool AllowFusionGlobally
, HasFMAD
, Aggressive
;
5744 if (!canCombineFMadOrFMA(MI
, AllowFusionGlobally
, HasFMAD
, Aggressive
))
5747 Register Op1
= MI
.getOperand(1).getReg();
5748 Register Op2
= MI
.getOperand(2).getReg();
5749 DefinitionAndSourceRegister LHS
= {MRI
.getVRegDef(Op1
), Op1
};
5750 DefinitionAndSourceRegister RHS
= {MRI
.getVRegDef(Op2
), Op2
};
5751 LLT DstTy
= MRI
.getType(MI
.getOperand(0).getReg());
5753 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5754 // prefer to fold the multiply with fewer uses.
5755 int FirstMulHasFewerUses
= true;
5756 if (isContractableFMul(*LHS
.MI
, AllowFusionGlobally
) &&
5757 isContractableFMul(*RHS
.MI
, AllowFusionGlobally
) &&
5758 hasMoreUses(*LHS
.MI
, *RHS
.MI
, MRI
))
5759 FirstMulHasFewerUses
= false;
5761 unsigned PreferredFusedOpcode
=
5762 HasFMAD
? TargetOpcode::G_FMAD
: TargetOpcode::G_FMA
;
5764 // fold (fsub (fmul x, y), z) -> (fma x, y, -z)
5765 if (FirstMulHasFewerUses
&&
5766 (isContractableFMul(*LHS
.MI
, AllowFusionGlobally
) &&
5767 (Aggressive
|| MRI
.hasOneNonDBGUse(LHS
.Reg
)))) {
5768 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
5769 Register NegZ
= B
.buildFNeg(DstTy
, RHS
.Reg
).getReg(0);
5770 B
.buildInstr(PreferredFusedOpcode
, {MI
.getOperand(0).getReg()},
5771 {LHS
.MI
->getOperand(1).getReg(),
5772 LHS
.MI
->getOperand(2).getReg(), NegZ
});
5776 // fold (fsub x, (fmul y, z)) -> (fma -y, z, x)
5777 else if ((isContractableFMul(*RHS
.MI
, AllowFusionGlobally
) &&
5778 (Aggressive
|| MRI
.hasOneNonDBGUse(RHS
.Reg
)))) {
5779 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
5781 B
.buildFNeg(DstTy
, RHS
.MI
->getOperand(1).getReg()).getReg(0);
5782 B
.buildInstr(PreferredFusedOpcode
, {MI
.getOperand(0).getReg()},
5783 {NegY
, RHS
.MI
->getOperand(2).getReg(), LHS
.Reg
});
5791 bool CombinerHelper::matchCombineFSubFNegFMulToFMadOrFMA(
5792 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
5793 assert(MI
.getOpcode() == TargetOpcode::G_FSUB
);
5795 bool AllowFusionGlobally
, HasFMAD
, Aggressive
;
5796 if (!canCombineFMadOrFMA(MI
, AllowFusionGlobally
, HasFMAD
, Aggressive
))
5799 Register LHSReg
= MI
.getOperand(1).getReg();
5800 Register RHSReg
= MI
.getOperand(2).getReg();
5801 LLT DstTy
= MRI
.getType(MI
.getOperand(0).getReg());
5803 unsigned PreferredFusedOpcode
=
5804 HasFMAD
? TargetOpcode::G_FMAD
: TargetOpcode::G_FMA
;
5806 MachineInstr
*FMulMI
;
5807 // fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
5808 if (mi_match(LHSReg
, MRI
, m_GFNeg(m_MInstr(FMulMI
))) &&
5809 (Aggressive
|| (MRI
.hasOneNonDBGUse(LHSReg
) &&
5810 MRI
.hasOneNonDBGUse(FMulMI
->getOperand(0).getReg()))) &&
5811 isContractableFMul(*FMulMI
, AllowFusionGlobally
)) {
5812 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
5814 B
.buildFNeg(DstTy
, FMulMI
->getOperand(1).getReg()).getReg(0);
5815 Register NegZ
= B
.buildFNeg(DstTy
, RHSReg
).getReg(0);
5816 B
.buildInstr(PreferredFusedOpcode
, {MI
.getOperand(0).getReg()},
5817 {NegX
, FMulMI
->getOperand(2).getReg(), NegZ
});
5822 // fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x)
5823 if (mi_match(RHSReg
, MRI
, m_GFNeg(m_MInstr(FMulMI
))) &&
5824 (Aggressive
|| (MRI
.hasOneNonDBGUse(RHSReg
) &&
5825 MRI
.hasOneNonDBGUse(FMulMI
->getOperand(0).getReg()))) &&
5826 isContractableFMul(*FMulMI
, AllowFusionGlobally
)) {
5827 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
5828 B
.buildInstr(PreferredFusedOpcode
, {MI
.getOperand(0).getReg()},
5829 {FMulMI
->getOperand(1).getReg(),
5830 FMulMI
->getOperand(2).getReg(), LHSReg
});
5838 bool CombinerHelper::matchCombineFSubFpExtFMulToFMadOrFMA(
5839 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
5840 assert(MI
.getOpcode() == TargetOpcode::G_FSUB
);
5842 bool AllowFusionGlobally
, HasFMAD
, Aggressive
;
5843 if (!canCombineFMadOrFMA(MI
, AllowFusionGlobally
, HasFMAD
, Aggressive
))
5846 Register LHSReg
= MI
.getOperand(1).getReg();
5847 Register RHSReg
= MI
.getOperand(2).getReg();
5848 LLT DstTy
= MRI
.getType(MI
.getOperand(0).getReg());
5850 unsigned PreferredFusedOpcode
=
5851 HasFMAD
? TargetOpcode::G_FMAD
: TargetOpcode::G_FMA
;
5853 MachineInstr
*FMulMI
;
5854 // fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z))
5855 if (mi_match(LHSReg
, MRI
, m_GFPExt(m_MInstr(FMulMI
))) &&
5856 isContractableFMul(*FMulMI
, AllowFusionGlobally
) &&
5857 (Aggressive
|| MRI
.hasOneNonDBGUse(LHSReg
))) {
5858 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
5860 B
.buildFPExt(DstTy
, FMulMI
->getOperand(1).getReg()).getReg(0);
5862 B
.buildFPExt(DstTy
, FMulMI
->getOperand(2).getReg()).getReg(0);
5863 Register NegZ
= B
.buildFNeg(DstTy
, RHSReg
).getReg(0);
5864 B
.buildInstr(PreferredFusedOpcode
, {MI
.getOperand(0).getReg()},
5865 {FpExtX
, FpExtY
, NegZ
});
5870 // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x)
5871 if (mi_match(RHSReg
, MRI
, m_GFPExt(m_MInstr(FMulMI
))) &&
5872 isContractableFMul(*FMulMI
, AllowFusionGlobally
) &&
5873 (Aggressive
|| MRI
.hasOneNonDBGUse(RHSReg
))) {
5874 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
5876 B
.buildFPExt(DstTy
, FMulMI
->getOperand(1).getReg()).getReg(0);
5877 Register NegY
= B
.buildFNeg(DstTy
, FpExtY
).getReg(0);
5879 B
.buildFPExt(DstTy
, FMulMI
->getOperand(2).getReg()).getReg(0);
5880 B
.buildInstr(PreferredFusedOpcode
, {MI
.getOperand(0).getReg()},
5881 {NegY
, FpExtZ
, LHSReg
});
5889 bool CombinerHelper::matchCombineFSubFpExtFNegFMulToFMadOrFMA(
5890 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
5891 assert(MI
.getOpcode() == TargetOpcode::G_FSUB
);
5893 bool AllowFusionGlobally
, HasFMAD
, Aggressive
;
5894 if (!canCombineFMadOrFMA(MI
, AllowFusionGlobally
, HasFMAD
, Aggressive
))
5897 const auto &TLI
= *MI
.getMF()->getSubtarget().getTargetLowering();
5898 LLT DstTy
= MRI
.getType(MI
.getOperand(0).getReg());
5899 Register LHSReg
= MI
.getOperand(1).getReg();
5900 Register RHSReg
= MI
.getOperand(2).getReg();
5902 unsigned PreferredFusedOpcode
=
5903 HasFMAD
? TargetOpcode::G_FMAD
: TargetOpcode::G_FMA
;
5905 auto buildMatchInfo
= [=](Register Dst
, Register X
, Register Y
, Register Z
,
5906 MachineIRBuilder
&B
) {
5907 Register FpExtX
= B
.buildFPExt(DstTy
, X
).getReg(0);
5908 Register FpExtY
= B
.buildFPExt(DstTy
, Y
).getReg(0);
5909 B
.buildInstr(PreferredFusedOpcode
, {Dst
}, {FpExtX
, FpExtY
, Z
});
5912 MachineInstr
*FMulMI
;
5913 // fold (fsub (fpext (fneg (fmul x, y))), z) ->
5914 // (fneg (fma (fpext x), (fpext y), z))
5915 // fold (fsub (fneg (fpext (fmul x, y))), z) ->
5916 // (fneg (fma (fpext x), (fpext y), z))
5917 if ((mi_match(LHSReg
, MRI
, m_GFPExt(m_GFNeg(m_MInstr(FMulMI
)))) ||
5918 mi_match(LHSReg
, MRI
, m_GFNeg(m_GFPExt(m_MInstr(FMulMI
))))) &&
5919 isContractableFMul(*FMulMI
, AllowFusionGlobally
) &&
5920 TLI
.isFPExtFoldable(MI
, PreferredFusedOpcode
, DstTy
,
5921 MRI
.getType(FMulMI
->getOperand(0).getReg()))) {
5922 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
5923 Register FMAReg
= MRI
.createGenericVirtualRegister(DstTy
);
5924 buildMatchInfo(FMAReg
, FMulMI
->getOperand(1).getReg(),
5925 FMulMI
->getOperand(2).getReg(), RHSReg
, B
);
5926 B
.buildFNeg(MI
.getOperand(0).getReg(), FMAReg
);
5931 // fold (fsub x, (fpext (fneg (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
5932 // fold (fsub x, (fneg (fpext (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
5933 if ((mi_match(RHSReg
, MRI
, m_GFPExt(m_GFNeg(m_MInstr(FMulMI
)))) ||
5934 mi_match(RHSReg
, MRI
, m_GFNeg(m_GFPExt(m_MInstr(FMulMI
))))) &&
5935 isContractableFMul(*FMulMI
, AllowFusionGlobally
) &&
5936 TLI
.isFPExtFoldable(MI
, PreferredFusedOpcode
, DstTy
,
5937 MRI
.getType(FMulMI
->getOperand(0).getReg()))) {
5938 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
5939 buildMatchInfo(MI
.getOperand(0).getReg(), FMulMI
->getOperand(1).getReg(),
5940 FMulMI
->getOperand(2).getReg(), LHSReg
, B
);
5948 bool CombinerHelper::matchCombineFMinMaxNaN(MachineInstr
&MI
,
5949 unsigned &IdxToPropagate
) {
5951 switch (MI
.getOpcode()) {
5954 case TargetOpcode::G_FMINNUM
:
5955 case TargetOpcode::G_FMAXNUM
:
5956 PropagateNaN
= false;
5958 case TargetOpcode::G_FMINIMUM
:
5959 case TargetOpcode::G_FMAXIMUM
:
5960 PropagateNaN
= true;
5964 auto MatchNaN
= [&](unsigned Idx
) {
5965 Register MaybeNaNReg
= MI
.getOperand(Idx
).getReg();
5966 const ConstantFP
*MaybeCst
= getConstantFPVRegVal(MaybeNaNReg
, MRI
);
5967 if (!MaybeCst
|| !MaybeCst
->getValueAPF().isNaN())
5969 IdxToPropagate
= PropagateNaN
? Idx
: (Idx
== 1 ? 2 : 1);
5973 return MatchNaN(1) || MatchNaN(2);
5976 bool CombinerHelper::matchAddSubSameReg(MachineInstr
&MI
, Register
&Src
) {
5977 assert(MI
.getOpcode() == TargetOpcode::G_ADD
&& "Expected a G_ADD");
5978 Register LHS
= MI
.getOperand(1).getReg();
5979 Register RHS
= MI
.getOperand(2).getReg();
5981 // Helper lambda to check for opportunities for
5984 auto CheckFold
= [&](Register MaybeSub
, Register MaybeSameReg
) {
5986 return mi_match(MaybeSub
, MRI
, m_GSub(m_Reg(Src
), m_Reg(Reg
))) &&
5987 Reg
== MaybeSameReg
;
5989 return CheckFold(LHS
, RHS
) || CheckFold(RHS
, LHS
);
5992 bool CombinerHelper::matchBuildVectorIdentityFold(MachineInstr
&MI
,
5993 Register
&MatchInfo
) {
5994 // This combine folds the following patterns:
5996 // G_BUILD_VECTOR_TRUNC (G_BITCAST(x), G_LSHR(G_BITCAST(x), k))
5997 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), G_TRUNC(G_LSHR(G_BITCAST(x), k)))
6001 // k == sizeof(VecEltTy)/2
6002 // type(x) == type(dst)
6004 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), undef)
6008 // type(x) == type(dst)
6010 LLT DstVecTy
= MRI
.getType(MI
.getOperand(0).getReg());
6011 LLT DstEltTy
= DstVecTy
.getElementType();
6017 m_GBuildVector(m_GTrunc(m_GBitcast(m_Reg(Lo
))), m_GImplicitDef()))) {
6019 return MRI
.getType(MatchInfo
) == DstVecTy
;
6022 std::optional
<ValueAndVReg
> ShiftAmount
;
6023 const auto LoPattern
= m_GBitcast(m_Reg(Lo
));
6024 const auto HiPattern
= m_GLShr(m_GBitcast(m_Reg(Hi
)), m_GCst(ShiftAmount
));
6027 m_any_of(m_GBuildVectorTrunc(LoPattern
, HiPattern
),
6028 m_GBuildVector(m_GTrunc(LoPattern
), m_GTrunc(HiPattern
))))) {
6029 if (Lo
== Hi
&& ShiftAmount
->Value
== DstEltTy
.getSizeInBits()) {
6031 return MRI
.getType(MatchInfo
) == DstVecTy
;
6038 bool CombinerHelper::matchTruncBuildVectorFold(MachineInstr
&MI
,
6039 Register
&MatchInfo
) {
6040 // Replace (G_TRUNC (G_BITCAST (G_BUILD_VECTOR x, y)) with just x
6041 // if type(x) == type(G_TRUNC)
6042 if (!mi_match(MI
.getOperand(1).getReg(), MRI
,
6043 m_GBitcast(m_GBuildVector(m_Reg(MatchInfo
), m_Reg()))))
6046 return MRI
.getType(MatchInfo
) == MRI
.getType(MI
.getOperand(0).getReg());
6049 bool CombinerHelper::matchTruncLshrBuildVectorFold(MachineInstr
&MI
,
6050 Register
&MatchInfo
) {
6051 // Replace (G_TRUNC (G_LSHR (G_BITCAST (G_BUILD_VECTOR x, y)), K)) with
6052 // y if K == size of vector element type
6053 std::optional
<ValueAndVReg
> ShiftAmt
;
6054 if (!mi_match(MI
.getOperand(1).getReg(), MRI
,
6055 m_GLShr(m_GBitcast(m_GBuildVector(m_Reg(), m_Reg(MatchInfo
))),
6059 LLT MatchTy
= MRI
.getType(MatchInfo
);
6060 return ShiftAmt
->Value
.getZExtValue() == MatchTy
.getSizeInBits() &&
6061 MatchTy
== MRI
.getType(MI
.getOperand(0).getReg());
6064 unsigned CombinerHelper::getFPMinMaxOpcForSelect(
6065 CmpInst::Predicate Pred
, LLT DstTy
,
6066 SelectPatternNaNBehaviour VsNaNRetVal
) const {
6067 assert(VsNaNRetVal
!= SelectPatternNaNBehaviour::NOT_APPLICABLE
&&
6068 "Expected a NaN behaviour?");
6069 // Choose an opcode based off of legality or the behaviour when one of the
6070 // LHS/RHS may be NaN.
6074 case CmpInst::FCMP_UGT
:
6075 case CmpInst::FCMP_UGE
:
6076 case CmpInst::FCMP_OGT
:
6077 case CmpInst::FCMP_OGE
:
6078 if (VsNaNRetVal
== SelectPatternNaNBehaviour::RETURNS_OTHER
)
6079 return TargetOpcode::G_FMAXNUM
;
6080 if (VsNaNRetVal
== SelectPatternNaNBehaviour::RETURNS_NAN
)
6081 return TargetOpcode::G_FMAXIMUM
;
6082 if (isLegal({TargetOpcode::G_FMAXNUM
, {DstTy
}}))
6083 return TargetOpcode::G_FMAXNUM
;
6084 if (isLegal({TargetOpcode::G_FMAXIMUM
, {DstTy
}}))
6085 return TargetOpcode::G_FMAXIMUM
;
6087 case CmpInst::FCMP_ULT
:
6088 case CmpInst::FCMP_ULE
:
6089 case CmpInst::FCMP_OLT
:
6090 case CmpInst::FCMP_OLE
:
6091 if (VsNaNRetVal
== SelectPatternNaNBehaviour::RETURNS_OTHER
)
6092 return TargetOpcode::G_FMINNUM
;
6093 if (VsNaNRetVal
== SelectPatternNaNBehaviour::RETURNS_NAN
)
6094 return TargetOpcode::G_FMINIMUM
;
6095 if (isLegal({TargetOpcode::G_FMINNUM
, {DstTy
}}))
6096 return TargetOpcode::G_FMINNUM
;
6097 if (!isLegal({TargetOpcode::G_FMINIMUM
, {DstTy
}}))
6099 return TargetOpcode::G_FMINIMUM
;
6103 CombinerHelper::SelectPatternNaNBehaviour
6104 CombinerHelper::computeRetValAgainstNaN(Register LHS
, Register RHS
,
6105 bool IsOrderedComparison
) const {
6106 bool LHSSafe
= isKnownNeverNaN(LHS
, MRI
);
6107 bool RHSSafe
= isKnownNeverNaN(RHS
, MRI
);
6108 // Completely unsafe.
6109 if (!LHSSafe
&& !RHSSafe
)
6110 return SelectPatternNaNBehaviour::NOT_APPLICABLE
;
6111 if (LHSSafe
&& RHSSafe
)
6112 return SelectPatternNaNBehaviour::RETURNS_ANY
;
6113 // An ordered comparison will return false when given a NaN, so it
6115 if (IsOrderedComparison
)
6116 return LHSSafe
? SelectPatternNaNBehaviour::RETURNS_NAN
6117 : SelectPatternNaNBehaviour::RETURNS_OTHER
;
6118 // An unordered comparison will return true when given a NaN, so it
6120 return LHSSafe
? SelectPatternNaNBehaviour::RETURNS_OTHER
6121 : SelectPatternNaNBehaviour::RETURNS_NAN
;
6124 bool CombinerHelper::matchFPSelectToMinMax(Register Dst
, Register Cond
,
6125 Register TrueVal
, Register FalseVal
,
6126 BuildFnTy
&MatchInfo
) {
6127 // Match: select (fcmp cond x, y) x, y
6128 // select (fcmp cond x, y) y, x
6129 // And turn it into fminnum/fmaxnum or fmin/fmax based off of the condition.
6130 LLT DstTy
= MRI
.getType(Dst
);
6131 // Bail out early on pointers, since we'll never want to fold to a min/max.
6132 if (DstTy
.isPointer())
6134 // Match a floating point compare with a less-than/greater-than predicate.
6135 // TODO: Allow multiple users of the compare if they are all selects.
6136 CmpInst::Predicate Pred
;
6137 Register CmpLHS
, CmpRHS
;
6138 if (!mi_match(Cond
, MRI
,
6140 m_GFCmp(m_Pred(Pred
), m_Reg(CmpLHS
), m_Reg(CmpRHS
)))) ||
6141 CmpInst::isEquality(Pred
))
6143 SelectPatternNaNBehaviour ResWithKnownNaNInfo
=
6144 computeRetValAgainstNaN(CmpLHS
, CmpRHS
, CmpInst::isOrdered(Pred
));
6145 if (ResWithKnownNaNInfo
== SelectPatternNaNBehaviour::NOT_APPLICABLE
)
6147 if (TrueVal
== CmpRHS
&& FalseVal
== CmpLHS
) {
6148 std::swap(CmpLHS
, CmpRHS
);
6149 Pred
= CmpInst::getSwappedPredicate(Pred
);
6150 if (ResWithKnownNaNInfo
== SelectPatternNaNBehaviour::RETURNS_NAN
)
6151 ResWithKnownNaNInfo
= SelectPatternNaNBehaviour::RETURNS_OTHER
;
6152 else if (ResWithKnownNaNInfo
== SelectPatternNaNBehaviour::RETURNS_OTHER
)
6153 ResWithKnownNaNInfo
= SelectPatternNaNBehaviour::RETURNS_NAN
;
6155 if (TrueVal
!= CmpLHS
|| FalseVal
!= CmpRHS
)
6157 // Decide what type of max/min this should be based off of the predicate.
6158 unsigned Opc
= getFPMinMaxOpcForSelect(Pred
, DstTy
, ResWithKnownNaNInfo
);
6159 if (!Opc
|| !isLegal({Opc
, {DstTy
}}))
6161 // Comparisons between signed zero and zero may have different results...
6162 // unless we have fmaximum/fminimum. In that case, we know -0 < 0.
6163 if (Opc
!= TargetOpcode::G_FMAXIMUM
&& Opc
!= TargetOpcode::G_FMINIMUM
) {
6164 // We don't know if a comparison between two 0s will give us a consistent
6165 // result. Be conservative and only proceed if at least one side is
6167 auto KnownNonZeroSide
= getFConstantVRegValWithLookThrough(CmpLHS
, MRI
);
6168 if (!KnownNonZeroSide
|| !KnownNonZeroSide
->Value
.isNonZero()) {
6169 KnownNonZeroSide
= getFConstantVRegValWithLookThrough(CmpRHS
, MRI
);
6170 if (!KnownNonZeroSide
|| !KnownNonZeroSide
->Value
.isNonZero())
6174 MatchInfo
= [=](MachineIRBuilder
&B
) {
6175 B
.buildInstr(Opc
, {Dst
}, {CmpLHS
, CmpRHS
});
6180 bool CombinerHelper::matchSimplifySelectToMinMax(MachineInstr
&MI
,
6181 BuildFnTy
&MatchInfo
) {
6182 // TODO: Handle integer cases.
6183 assert(MI
.getOpcode() == TargetOpcode::G_SELECT
);
6184 // Condition may be fed by a truncated compare.
6185 Register Cond
= MI
.getOperand(1).getReg();
6186 Register MaybeTrunc
;
6187 if (mi_match(Cond
, MRI
, m_OneNonDBGUse(m_GTrunc(m_Reg(MaybeTrunc
)))))
6189 Register Dst
= MI
.getOperand(0).getReg();
6190 Register TrueVal
= MI
.getOperand(2).getReg();
6191 Register FalseVal
= MI
.getOperand(3).getReg();
6192 return matchFPSelectToMinMax(Dst
, Cond
, TrueVal
, FalseVal
, MatchInfo
);
6195 bool CombinerHelper::matchRedundantBinOpInEquality(MachineInstr
&MI
,
6196 BuildFnTy
&MatchInfo
) {
6197 assert(MI
.getOpcode() == TargetOpcode::G_ICMP
);
6198 // (X + Y) == X --> Y == 0
6199 // (X + Y) != X --> Y != 0
6200 // (X - Y) == X --> Y == 0
6201 // (X - Y) != X --> Y != 0
6202 // (X ^ Y) == X --> Y == 0
6203 // (X ^ Y) != X --> Y != 0
6204 Register Dst
= MI
.getOperand(0).getReg();
6205 CmpInst::Predicate Pred
;
6206 Register X
, Y
, OpLHS
, OpRHS
;
6207 bool MatchedSub
= mi_match(
6209 m_c_GICmp(m_Pred(Pred
), m_Reg(X
), m_GSub(m_Reg(OpLHS
), m_Reg(Y
))));
6210 if (MatchedSub
&& X
!= OpLHS
)
6213 if (!mi_match(Dst
, MRI
,
6214 m_c_GICmp(m_Pred(Pred
), m_Reg(X
),
6215 m_any_of(m_GAdd(m_Reg(OpLHS
), m_Reg(OpRHS
)),
6216 m_GXor(m_Reg(OpLHS
), m_Reg(OpRHS
))))))
6218 Y
= X
== OpLHS
? OpRHS
: X
== OpRHS
? OpLHS
: Register();
6220 MatchInfo
= [=](MachineIRBuilder
&B
) {
6221 auto Zero
= B
.buildConstant(MRI
.getType(Y
), 0);
6222 B
.buildICmp(Pred
, Dst
, Y
, Zero
);
6224 return CmpInst::isEquality(Pred
) && Y
.isValid();
6227 bool CombinerHelper::matchShiftsTooBig(MachineInstr
&MI
) {
6228 Register ShiftReg
= MI
.getOperand(2).getReg();
6229 LLT ResTy
= MRI
.getType(MI
.getOperand(0).getReg());
6230 auto IsShiftTooBig
= [&](const Constant
*C
) {
6231 auto *CI
= dyn_cast
<ConstantInt
>(C
);
6232 return CI
&& CI
->uge(ResTy
.getScalarSizeInBits());
6234 return matchUnaryPredicate(MRI
, ShiftReg
, IsShiftTooBig
);
6237 bool CombinerHelper::matchCommuteConstantToRHS(MachineInstr
&MI
) {
6238 Register LHS
= MI
.getOperand(1).getReg();
6239 Register RHS
= MI
.getOperand(2).getReg();
6240 auto *LHSDef
= MRI
.getVRegDef(LHS
);
6241 if (getIConstantVRegVal(LHS
, MRI
).has_value())
6244 // LHS may be a G_CONSTANT_FOLD_BARRIER. If so we commute
6245 // as long as we don't already have a constant on the RHS.
6246 if (LHSDef
->getOpcode() != TargetOpcode::G_CONSTANT_FOLD_BARRIER
)
6248 return MRI
.getVRegDef(RHS
)->getOpcode() !=
6249 TargetOpcode::G_CONSTANT_FOLD_BARRIER
&&
6250 !getIConstantVRegVal(RHS
, MRI
);
6253 bool CombinerHelper::matchCommuteFPConstantToRHS(MachineInstr
&MI
) {
6254 Register LHS
= MI
.getOperand(1).getReg();
6255 Register RHS
= MI
.getOperand(2).getReg();
6256 std::optional
<FPValueAndVReg
> ValAndVReg
;
6257 if (!mi_match(LHS
, MRI
, m_GFCstOrSplat(ValAndVReg
)))
6259 return !mi_match(RHS
, MRI
, m_GFCstOrSplat(ValAndVReg
));
6262 void CombinerHelper::applyCommuteBinOpOperands(MachineInstr
&MI
) {
6263 Observer
.changingInstr(MI
);
6264 Register LHSReg
= MI
.getOperand(1).getReg();
6265 Register RHSReg
= MI
.getOperand(2).getReg();
6266 MI
.getOperand(1).setReg(RHSReg
);
6267 MI
.getOperand(2).setReg(LHSReg
);
6268 Observer
.changedInstr(MI
);
6271 bool CombinerHelper::isOneOrOneSplat(Register Src
, bool AllowUndefs
) {
6272 LLT SrcTy
= MRI
.getType(Src
);
6273 if (SrcTy
.isFixedVector())
6274 return isConstantSplatVector(Src
, 1, AllowUndefs
);
6275 if (SrcTy
.isScalar()) {
6276 if (AllowUndefs
&& getOpcodeDef
<GImplicitDef
>(Src
, MRI
) != nullptr)
6278 auto IConstant
= getIConstantVRegValWithLookThrough(Src
, MRI
);
6279 return IConstant
&& IConstant
->Value
== 1;
6281 return false; // scalable vector
6284 bool CombinerHelper::isZeroOrZeroSplat(Register Src
, bool AllowUndefs
) {
6285 LLT SrcTy
= MRI
.getType(Src
);
6286 if (SrcTy
.isFixedVector())
6287 return isConstantSplatVector(Src
, 0, AllowUndefs
);
6288 if (SrcTy
.isScalar()) {
6289 if (AllowUndefs
&& getOpcodeDef
<GImplicitDef
>(Src
, MRI
) != nullptr)
6291 auto IConstant
= getIConstantVRegValWithLookThrough(Src
, MRI
);
6292 return IConstant
&& IConstant
->Value
== 0;
6294 return false; // scalable vector
6297 // Ignores COPYs during conformance checks.
6298 // FIXME scalable vectors.
6299 bool CombinerHelper::isConstantSplatVector(Register Src
, int64_t SplatValue
,
6301 GBuildVector
*BuildVector
= getOpcodeDef
<GBuildVector
>(Src
, MRI
);
6304 unsigned NumSources
= BuildVector
->getNumSources();
6306 for (unsigned I
= 0; I
< NumSources
; ++I
) {
6307 GImplicitDef
*ImplicitDef
=
6308 getOpcodeDef
<GImplicitDef
>(BuildVector
->getSourceReg(I
), MRI
);
6309 if (ImplicitDef
&& AllowUndefs
)
6311 if (ImplicitDef
&& !AllowUndefs
)
6313 std::optional
<ValueAndVReg
> IConstant
=
6314 getIConstantVRegValWithLookThrough(BuildVector
->getSourceReg(I
), MRI
);
6315 if (IConstant
&& IConstant
->Value
== SplatValue
)
6322 // Ignores COPYs during lookups.
6323 // FIXME scalable vectors
6324 std::optional
<APInt
>
6325 CombinerHelper::getConstantOrConstantSplatVector(Register Src
) {
6326 auto IConstant
= getIConstantVRegValWithLookThrough(Src
, MRI
);
6328 return IConstant
->Value
;
6330 GBuildVector
*BuildVector
= getOpcodeDef
<GBuildVector
>(Src
, MRI
);
6332 return std::nullopt
;
6333 unsigned NumSources
= BuildVector
->getNumSources();
6335 std::optional
<APInt
> Value
= std::nullopt
;
6336 for (unsigned I
= 0; I
< NumSources
; ++I
) {
6337 std::optional
<ValueAndVReg
> IConstant
=
6338 getIConstantVRegValWithLookThrough(BuildVector
->getSourceReg(I
), MRI
);
6340 return std::nullopt
;
6342 Value
= IConstant
->Value
;
6343 else if (*Value
!= IConstant
->Value
)
6344 return std::nullopt
;
6349 // TODO: use knownbits to determine zeros
6350 bool CombinerHelper::tryFoldSelectOfConstants(GSelect
*Select
,
6351 BuildFnTy
&MatchInfo
) {
6352 uint32_t Flags
= Select
->getFlags();
6353 Register Dest
= Select
->getReg(0);
6354 Register Cond
= Select
->getCondReg();
6355 Register True
= Select
->getTrueReg();
6356 Register False
= Select
->getFalseReg();
6357 LLT CondTy
= MRI
.getType(Select
->getCondReg());
6358 LLT TrueTy
= MRI
.getType(Select
->getTrueReg());
6360 // We only do this combine for scalar boolean conditions.
6361 if (CondTy
!= LLT::scalar(1))
6364 // Both are scalars.
6365 std::optional
<ValueAndVReg
> TrueOpt
=
6366 getIConstantVRegValWithLookThrough(True
, MRI
);
6367 std::optional
<ValueAndVReg
> FalseOpt
=
6368 getIConstantVRegValWithLookThrough(False
, MRI
);
6370 if (!TrueOpt
|| !FalseOpt
)
6373 APInt TrueValue
= TrueOpt
->Value
;
6374 APInt FalseValue
= FalseOpt
->Value
;
6376 // select Cond, 1, 0 --> zext (Cond)
6377 if (TrueValue
.isOne() && FalseValue
.isZero()) {
6378 MatchInfo
= [=](MachineIRBuilder
&B
) {
6379 B
.setInstrAndDebugLoc(*Select
);
6380 B
.buildZExtOrTrunc(Dest
, Cond
);
6385 // select Cond, -1, 0 --> sext (Cond)
6386 if (TrueValue
.isAllOnes() && FalseValue
.isZero()) {
6387 MatchInfo
= [=](MachineIRBuilder
&B
) {
6388 B
.setInstrAndDebugLoc(*Select
);
6389 B
.buildSExtOrTrunc(Dest
, Cond
);
6394 // select Cond, 0, 1 --> zext (!Cond)
6395 if (TrueValue
.isZero() && FalseValue
.isOne()) {
6396 MatchInfo
= [=](MachineIRBuilder
&B
) {
6397 B
.setInstrAndDebugLoc(*Select
);
6398 Register Inner
= MRI
.createGenericVirtualRegister(CondTy
);
6399 B
.buildNot(Inner
, Cond
);
6400 B
.buildZExtOrTrunc(Dest
, Inner
);
6405 // select Cond, 0, -1 --> sext (!Cond)
6406 if (TrueValue
.isZero() && FalseValue
.isAllOnes()) {
6407 MatchInfo
= [=](MachineIRBuilder
&B
) {
6408 B
.setInstrAndDebugLoc(*Select
);
6409 Register Inner
= MRI
.createGenericVirtualRegister(CondTy
);
6410 B
.buildNot(Inner
, Cond
);
6411 B
.buildSExtOrTrunc(Dest
, Inner
);
6416 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
6417 if (TrueValue
- 1 == FalseValue
) {
6418 MatchInfo
= [=](MachineIRBuilder
&B
) {
6419 B
.setInstrAndDebugLoc(*Select
);
6420 Register Inner
= MRI
.createGenericVirtualRegister(TrueTy
);
6421 B
.buildZExtOrTrunc(Inner
, Cond
);
6422 B
.buildAdd(Dest
, Inner
, False
);
6427 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
6428 if (TrueValue
+ 1 == FalseValue
) {
6429 MatchInfo
= [=](MachineIRBuilder
&B
) {
6430 B
.setInstrAndDebugLoc(*Select
);
6431 Register Inner
= MRI
.createGenericVirtualRegister(TrueTy
);
6432 B
.buildSExtOrTrunc(Inner
, Cond
);
6433 B
.buildAdd(Dest
, Inner
, False
);
6438 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
6439 if (TrueValue
.isPowerOf2() && FalseValue
.isZero()) {
6440 MatchInfo
= [=](MachineIRBuilder
&B
) {
6441 B
.setInstrAndDebugLoc(*Select
);
6442 Register Inner
= MRI
.createGenericVirtualRegister(TrueTy
);
6443 B
.buildZExtOrTrunc(Inner
, Cond
);
6444 // The shift amount must be scalar.
6445 LLT ShiftTy
= TrueTy
.isVector() ? TrueTy
.getElementType() : TrueTy
;
6446 auto ShAmtC
= B
.buildConstant(ShiftTy
, TrueValue
.exactLogBase2());
6447 B
.buildShl(Dest
, Inner
, ShAmtC
, Flags
);
6451 // select Cond, -1, C --> or (sext Cond), C
6452 if (TrueValue
.isAllOnes()) {
6453 MatchInfo
= [=](MachineIRBuilder
&B
) {
6454 B
.setInstrAndDebugLoc(*Select
);
6455 Register Inner
= MRI
.createGenericVirtualRegister(TrueTy
);
6456 B
.buildSExtOrTrunc(Inner
, Cond
);
6457 B
.buildOr(Dest
, Inner
, False
, Flags
);
6462 // select Cond, C, -1 --> or (sext (not Cond)), C
6463 if (FalseValue
.isAllOnes()) {
6464 MatchInfo
= [=](MachineIRBuilder
&B
) {
6465 B
.setInstrAndDebugLoc(*Select
);
6466 Register Not
= MRI
.createGenericVirtualRegister(CondTy
);
6467 B
.buildNot(Not
, Cond
);
6468 Register Inner
= MRI
.createGenericVirtualRegister(TrueTy
);
6469 B
.buildSExtOrTrunc(Inner
, Not
);
6470 B
.buildOr(Dest
, Inner
, True
, Flags
);
6478 // TODO: use knownbits to determine zeros
6479 bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect
*Select
,
6480 BuildFnTy
&MatchInfo
) {
6481 uint32_t Flags
= Select
->getFlags();
6482 Register DstReg
= Select
->getReg(0);
6483 Register Cond
= Select
->getCondReg();
6484 Register True
= Select
->getTrueReg();
6485 Register False
= Select
->getFalseReg();
6486 LLT CondTy
= MRI
.getType(Select
->getCondReg());
6487 LLT TrueTy
= MRI
.getType(Select
->getTrueReg());
6489 // Boolean or fixed vector of booleans.
6490 if (CondTy
.isScalableVector() ||
6491 (CondTy
.isFixedVector() &&
6492 CondTy
.getElementType().getScalarSizeInBits() != 1) ||
6493 CondTy
.getScalarSizeInBits() != 1)
6496 if (CondTy
!= TrueTy
)
6499 // select Cond, Cond, F --> or Cond, F
6500 // select Cond, 1, F --> or Cond, F
6501 if ((Cond
== True
) || isOneOrOneSplat(True
, /* AllowUndefs */ true)) {
6502 MatchInfo
= [=](MachineIRBuilder
&B
) {
6503 B
.setInstrAndDebugLoc(*Select
);
6504 Register Ext
= MRI
.createGenericVirtualRegister(TrueTy
);
6505 B
.buildZExtOrTrunc(Ext
, Cond
);
6506 B
.buildOr(DstReg
, Ext
, False
, Flags
);
6511 // select Cond, T, Cond --> and Cond, T
6512 // select Cond, T, 0 --> and Cond, T
6513 if ((Cond
== False
) || isZeroOrZeroSplat(False
, /* AllowUndefs */ true)) {
6514 MatchInfo
= [=](MachineIRBuilder
&B
) {
6515 B
.setInstrAndDebugLoc(*Select
);
6516 Register Ext
= MRI
.createGenericVirtualRegister(TrueTy
);
6517 B
.buildZExtOrTrunc(Ext
, Cond
);
6518 B
.buildAnd(DstReg
, Ext
, True
);
6523 // select Cond, T, 1 --> or (not Cond), T
6524 if (isOneOrOneSplat(False
, /* AllowUndefs */ true)) {
6525 MatchInfo
= [=](MachineIRBuilder
&B
) {
6526 B
.setInstrAndDebugLoc(*Select
);
6528 Register Inner
= MRI
.createGenericVirtualRegister(CondTy
);
6529 B
.buildNot(Inner
, Cond
);
6530 // Then an ext to match the destination register.
6531 Register Ext
= MRI
.createGenericVirtualRegister(TrueTy
);
6532 B
.buildZExtOrTrunc(Ext
, Inner
);
6533 B
.buildOr(DstReg
, Ext
, True
, Flags
);
6538 // select Cond, 0, F --> and (not Cond), F
6539 if (isZeroOrZeroSplat(True
, /* AllowUndefs */ true)) {
6540 MatchInfo
= [=](MachineIRBuilder
&B
) {
6541 B
.setInstrAndDebugLoc(*Select
);
6543 Register Inner
= MRI
.createGenericVirtualRegister(CondTy
);
6544 B
.buildNot(Inner
, Cond
);
6545 // Then an ext to match the destination register.
6546 Register Ext
= MRI
.createGenericVirtualRegister(TrueTy
);
6547 B
.buildZExtOrTrunc(Ext
, Inner
);
6548 B
.buildAnd(DstReg
, Ext
, False
);
6556 bool CombinerHelper::tryFoldSelectToIntMinMax(GSelect
*Select
,
6557 BuildFnTy
&MatchInfo
) {
6558 Register DstReg
= Select
->getReg(0);
6559 Register Cond
= Select
->getCondReg();
6560 Register True
= Select
->getTrueReg();
6561 Register False
= Select
->getFalseReg();
6562 LLT DstTy
= MRI
.getType(DstReg
);
6564 if (DstTy
.isPointer())
6567 // We need an G_ICMP on the condition register.
6568 GICmp
*Cmp
= getOpcodeDef
<GICmp
>(Cond
, MRI
);
6572 // We want to fold the icmp and replace the select.
6573 if (!MRI
.hasOneNonDBGUse(Cmp
->getReg(0)))
6576 CmpInst::Predicate Pred
= Cmp
->getCond();
6577 // We need a larger or smaller predicate for
6578 // canonicalization.
6579 if (CmpInst::isEquality(Pred
))
6582 Register CmpLHS
= Cmp
->getLHSReg();
6583 Register CmpRHS
= Cmp
->getRHSReg();
6585 // We can swap CmpLHS and CmpRHS for higher hitrate.
6586 if (True
== CmpRHS
&& False
== CmpLHS
) {
6587 std::swap(CmpLHS
, CmpRHS
);
6588 Pred
= CmpInst::getSwappedPredicate(Pred
);
6591 // (icmp X, Y) ? X : Y -> integer minmax.
6592 // see matchSelectPattern in ValueTracking.
6593 // Legality between G_SELECT and integer minmax can differ.
6594 if (True
== CmpLHS
&& False
== CmpRHS
) {
6596 case ICmpInst::ICMP_UGT
:
6597 case ICmpInst::ICMP_UGE
: {
6598 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX
, DstTy
}))
6600 MatchInfo
= [=](MachineIRBuilder
&B
) {
6601 B
.buildUMax(DstReg
, True
, False
);
6605 case ICmpInst::ICMP_SGT
:
6606 case ICmpInst::ICMP_SGE
: {
6607 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX
, DstTy
}))
6609 MatchInfo
= [=](MachineIRBuilder
&B
) {
6610 B
.buildSMax(DstReg
, True
, False
);
6614 case ICmpInst::ICMP_ULT
:
6615 case ICmpInst::ICMP_ULE
: {
6616 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN
, DstTy
}))
6618 MatchInfo
= [=](MachineIRBuilder
&B
) {
6619 B
.buildUMin(DstReg
, True
, False
);
6623 case ICmpInst::ICMP_SLT
:
6624 case ICmpInst::ICMP_SLE
: {
6625 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN
, DstTy
}))
6627 MatchInfo
= [=](MachineIRBuilder
&B
) {
6628 B
.buildSMin(DstReg
, True
, False
);
6640 bool CombinerHelper::matchSelect(MachineInstr
&MI
, BuildFnTy
&MatchInfo
) {
6641 GSelect
*Select
= cast
<GSelect
>(&MI
);
6643 if (tryFoldSelectOfConstants(Select
, MatchInfo
))
6646 if (tryFoldBoolSelectToLogic(Select
, MatchInfo
))
6649 if (tryFoldSelectToIntMinMax(Select
, MatchInfo
))