1 //===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
8 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
9 #include "llvm/ADT/SetVector.h"
10 #include "llvm/ADT/SmallBitVector.h"
11 #include "llvm/CodeGen/GlobalISel/Combiner.h"
12 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
13 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
14 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
15 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
16 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
17 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
18 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
19 #include "llvm/CodeGen/GlobalISel/Utils.h"
20 #include "llvm/CodeGen/LowLevelType.h"
21 #include "llvm/CodeGen/MachineBasicBlock.h"
22 #include "llvm/CodeGen/MachineDominators.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineInstr.h"
25 #include "llvm/CodeGen/MachineMemOperand.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/RegisterBankInfo.h"
28 #include "llvm/CodeGen/TargetInstrInfo.h"
29 #include "llvm/CodeGen/TargetLowering.h"
30 #include "llvm/CodeGen/TargetOpcodes.h"
31 #include "llvm/IR/DataLayout.h"
32 #include "llvm/Support/Casting.h"
33 #include "llvm/Support/DivisionByConstantInfo.h"
34 #include "llvm/Support/MathExtras.h"
35 #include "llvm/Target/TargetMachine.h"
38 #define DEBUG_TYPE "gi-combiner"
41 using namespace MIPatternMatch
;
43 // Option to allow testing of the combiner while no targets know about indexed
46 ForceLegalIndexing("force-legal-indexing", cl::Hidden
, cl::init(false),
47 cl::desc("Force all indexed operations to be "
48 "legal for the GlobalISel combiner"));
50 CombinerHelper::CombinerHelper(GISelChangeObserver
&Observer
,
51 MachineIRBuilder
&B
, GISelKnownBits
*KB
,
52 MachineDominatorTree
*MDT
,
53 const LegalizerInfo
*LI
)
54 : Builder(B
), MRI(Builder
.getMF().getRegInfo()), Observer(Observer
), KB(KB
),
55 MDT(MDT
), LI(LI
), RBI(Builder
.getMF().getSubtarget().getRegBankInfo()),
56 TRI(Builder
.getMF().getSubtarget().getRegisterInfo()) {
60 const TargetLowering
&CombinerHelper::getTargetLowering() const {
61 return *Builder
.getMF().getSubtarget().getTargetLowering();
64 /// \returns The little endian in-memory byte position of byte \p I in a
65 /// \p ByteWidth bytes wide type.
67 /// E.g. Given a 4-byte type x, x[0] -> byte 0
68 static unsigned littleEndianByteAt(const unsigned ByteWidth
, const unsigned I
) {
69 assert(I
< ByteWidth
&& "I must be in [0, ByteWidth)");
73 /// Determines the LogBase2 value for a non-null input value using the
74 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
75 static Register
buildLogBase2(Register V
, MachineIRBuilder
&MIB
) {
76 auto &MRI
= *MIB
.getMRI();
77 LLT Ty
= MRI
.getType(V
);
78 auto Ctlz
= MIB
.buildCTLZ(Ty
, V
);
79 auto Base
= MIB
.buildConstant(Ty
, Ty
.getScalarSizeInBits() - 1);
80 return MIB
.buildSub(Ty
, Base
, Ctlz
).getReg(0);
83 /// \returns The big endian in-memory byte position of byte \p I in a
84 /// \p ByteWidth bytes wide type.
86 /// E.g. Given a 4-byte type x, x[0] -> byte 3
87 static unsigned bigEndianByteAt(const unsigned ByteWidth
, const unsigned I
) {
88 assert(I
< ByteWidth
&& "I must be in [0, ByteWidth)");
89 return ByteWidth
- I
- 1;
92 /// Given a map from byte offsets in memory to indices in a load/store,
93 /// determine if that map corresponds to a little or big endian byte pattern.
95 /// \param MemOffset2Idx maps memory offsets to address offsets.
96 /// \param LowestIdx is the lowest index in \p MemOffset2Idx.
98 /// \returns true if the map corresponds to a big endian byte pattern, false
99 /// if it corresponds to a little endian byte pattern, and None otherwise.
101 /// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
104 /// AddrOffset Little endian Big endian
109 static Optional
<bool>
110 isBigEndian(const SmallDenseMap
<int64_t, int64_t, 8> &MemOffset2Idx
,
112 // Need at least two byte positions to decide on endianness.
113 unsigned Width
= MemOffset2Idx
.size();
116 bool BigEndian
= true, LittleEndian
= true;
117 for (unsigned MemOffset
= 0; MemOffset
< Width
; ++ MemOffset
) {
118 auto MemOffsetAndIdx
= MemOffset2Idx
.find(MemOffset
);
119 if (MemOffsetAndIdx
== MemOffset2Idx
.end())
121 const int64_t Idx
= MemOffsetAndIdx
->second
- LowestIdx
;
122 assert(Idx
>= 0 && "Expected non-negative byte offset?");
123 LittleEndian
&= Idx
== littleEndianByteAt(Width
, MemOffset
);
124 BigEndian
&= Idx
== bigEndianByteAt(Width
, MemOffset
);
125 if (!BigEndian
&& !LittleEndian
)
129 assert((BigEndian
!= LittleEndian
) &&
130 "Pattern cannot be both big and little endian!");
134 bool CombinerHelper::isPreLegalize() const { return !LI
; }
136 bool CombinerHelper::isLegal(const LegalityQuery
&Query
) const {
137 assert(LI
&& "Must have LegalizerInfo to query isLegal!");
138 return LI
->getAction(Query
).Action
== LegalizeActions::Legal
;
141 bool CombinerHelper::isLegalOrBeforeLegalizer(
142 const LegalityQuery
&Query
) const {
143 return isPreLegalize() || isLegal(Query
);
146 bool CombinerHelper::isConstantLegalOrBeforeLegalizer(const LLT Ty
) const {
148 return isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT
, {Ty
}});
149 // Vector constants are represented as a G_BUILD_VECTOR of scalar G_CONSTANTs.
152 LLT EltTy
= Ty
.getElementType();
153 return isLegal({TargetOpcode::G_BUILD_VECTOR
, {Ty
, EltTy
}}) &&
154 isLegal({TargetOpcode::G_CONSTANT
, {EltTy
}});
157 void CombinerHelper::replaceRegWith(MachineRegisterInfo
&MRI
, Register FromReg
,
158 Register ToReg
) const {
159 Observer
.changingAllUsesOfReg(MRI
, FromReg
);
161 if (MRI
.constrainRegAttrs(ToReg
, FromReg
))
162 MRI
.replaceRegWith(FromReg
, ToReg
);
164 Builder
.buildCopy(ToReg
, FromReg
);
166 Observer
.finishedChangingAllUsesOfReg();
169 void CombinerHelper::replaceRegOpWith(MachineRegisterInfo
&MRI
,
170 MachineOperand
&FromRegOp
,
171 Register ToReg
) const {
172 assert(FromRegOp
.getParent() && "Expected an operand in an MI");
173 Observer
.changingInstr(*FromRegOp
.getParent());
175 FromRegOp
.setReg(ToReg
);
177 Observer
.changedInstr(*FromRegOp
.getParent());
180 void CombinerHelper::replaceOpcodeWith(MachineInstr
&FromMI
,
181 unsigned ToOpcode
) const {
182 Observer
.changingInstr(FromMI
);
184 FromMI
.setDesc(Builder
.getTII().get(ToOpcode
));
186 Observer
.changedInstr(FromMI
);
189 const RegisterBank
*CombinerHelper::getRegBank(Register Reg
) const {
190 return RBI
->getRegBank(Reg
, MRI
, *TRI
);
193 void CombinerHelper::setRegBank(Register Reg
, const RegisterBank
*RegBank
) {
195 MRI
.setRegBank(Reg
, *RegBank
);
198 bool CombinerHelper::tryCombineCopy(MachineInstr
&MI
) {
199 if (matchCombineCopy(MI
)) {
200 applyCombineCopy(MI
);
205 bool CombinerHelper::matchCombineCopy(MachineInstr
&MI
) {
206 if (MI
.getOpcode() != TargetOpcode::COPY
)
208 Register DstReg
= MI
.getOperand(0).getReg();
209 Register SrcReg
= MI
.getOperand(1).getReg();
210 return canReplaceReg(DstReg
, SrcReg
, MRI
);
212 void CombinerHelper::applyCombineCopy(MachineInstr
&MI
) {
213 Register DstReg
= MI
.getOperand(0).getReg();
214 Register SrcReg
= MI
.getOperand(1).getReg();
215 MI
.eraseFromParent();
216 replaceRegWith(MRI
, DstReg
, SrcReg
);
219 bool CombinerHelper::tryCombineConcatVectors(MachineInstr
&MI
) {
220 bool IsUndef
= false;
221 SmallVector
<Register
, 4> Ops
;
222 if (matchCombineConcatVectors(MI
, IsUndef
, Ops
)) {
223 applyCombineConcatVectors(MI
, IsUndef
, Ops
);
229 bool CombinerHelper::matchCombineConcatVectors(MachineInstr
&MI
, bool &IsUndef
,
230 SmallVectorImpl
<Register
> &Ops
) {
231 assert(MI
.getOpcode() == TargetOpcode::G_CONCAT_VECTORS
&&
232 "Invalid instruction");
234 MachineInstr
*Undef
= nullptr;
236 // Walk over all the operands of concat vectors and check if they are
237 // build_vector themselves or undef.
238 // Then collect their operands in Ops.
239 for (const MachineOperand
&MO
: MI
.uses()) {
240 Register Reg
= MO
.getReg();
241 MachineInstr
*Def
= MRI
.getVRegDef(Reg
);
242 assert(Def
&& "Operand not defined");
243 switch (Def
->getOpcode()) {
244 case TargetOpcode::G_BUILD_VECTOR
:
246 // Remember the operands of the build_vector to fold
247 // them into the yet-to-build flattened concat vectors.
248 for (const MachineOperand
&BuildVecMO
: Def
->uses())
249 Ops
.push_back(BuildVecMO
.getReg());
251 case TargetOpcode::G_IMPLICIT_DEF
: {
252 LLT OpType
= MRI
.getType(Reg
);
253 // Keep one undef value for all the undef operands.
255 Builder
.setInsertPt(*MI
.getParent(), MI
);
256 Undef
= Builder
.buildUndef(OpType
.getScalarType());
258 assert(MRI
.getType(Undef
->getOperand(0).getReg()) ==
259 OpType
.getScalarType() &&
260 "All undefs should have the same type");
261 // Break the undef vector in as many scalar elements as needed
262 // for the flattening.
263 for (unsigned EltIdx
= 0, EltEnd
= OpType
.getNumElements();
264 EltIdx
!= EltEnd
; ++EltIdx
)
265 Ops
.push_back(Undef
->getOperand(0).getReg());
274 void CombinerHelper::applyCombineConcatVectors(
275 MachineInstr
&MI
, bool IsUndef
, const ArrayRef
<Register
> Ops
) {
276 // We determined that the concat_vectors can be flatten.
277 // Generate the flattened build_vector.
278 Register DstReg
= MI
.getOperand(0).getReg();
279 Builder
.setInsertPt(*MI
.getParent(), MI
);
280 Register NewDstReg
= MRI
.cloneVirtualRegister(DstReg
);
282 // Note: IsUndef is sort of redundant. We could have determine it by
283 // checking that at all Ops are undef. Alternatively, we could have
284 // generate a build_vector of undefs and rely on another combine to
285 // clean that up. For now, given we already gather this information
286 // in tryCombineConcatVectors, just save compile time and issue the
289 Builder
.buildUndef(NewDstReg
);
291 Builder
.buildBuildVector(NewDstReg
, Ops
);
292 MI
.eraseFromParent();
293 replaceRegWith(MRI
, DstReg
, NewDstReg
);
296 bool CombinerHelper::tryCombineShuffleVector(MachineInstr
&MI
) {
297 SmallVector
<Register
, 4> Ops
;
298 if (matchCombineShuffleVector(MI
, Ops
)) {
299 applyCombineShuffleVector(MI
, Ops
);
305 bool CombinerHelper::matchCombineShuffleVector(MachineInstr
&MI
,
306 SmallVectorImpl
<Register
> &Ops
) {
307 assert(MI
.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR
&&
308 "Invalid instruction kind");
309 LLT DstType
= MRI
.getType(MI
.getOperand(0).getReg());
310 Register Src1
= MI
.getOperand(1).getReg();
311 LLT SrcType
= MRI
.getType(Src1
);
312 // As bizarre as it may look, shuffle vector can actually produce
313 // scalar! This is because at the IR level a <1 x ty> shuffle
314 // vector is perfectly valid.
315 unsigned DstNumElts
= DstType
.isVector() ? DstType
.getNumElements() : 1;
316 unsigned SrcNumElts
= SrcType
.isVector() ? SrcType
.getNumElements() : 1;
318 // If the resulting vector is smaller than the size of the source
319 // vectors being concatenated, we won't be able to replace the
320 // shuffle vector into a concat_vectors.
322 // Note: We may still be able to produce a concat_vectors fed by
323 // extract_vector_elt and so on. It is less clear that would
324 // be better though, so don't bother for now.
326 // If the destination is a scalar, the size of the sources doesn't
327 // matter. we will lower the shuffle to a plain copy. This will
328 // work only if the source and destination have the same size. But
329 // that's covered by the next condition.
331 // TODO: If the size between the source and destination don't match
332 // we could still emit an extract vector element in that case.
333 if (DstNumElts
< 2 * SrcNumElts
&& DstNumElts
!= 1)
336 // Check that the shuffle mask can be broken evenly between the
337 // different sources.
338 if (DstNumElts
% SrcNumElts
!= 0)
341 // Mask length is a multiple of the source vector length.
342 // Check if the shuffle is some kind of concatenation of the input
344 unsigned NumConcat
= DstNumElts
/ SrcNumElts
;
345 SmallVector
<int, 8> ConcatSrcs(NumConcat
, -1);
346 ArrayRef
<int> Mask
= MI
.getOperand(3).getShuffleMask();
347 for (unsigned i
= 0; i
!= DstNumElts
; ++i
) {
352 // Ensure the indices in each SrcType sized piece are sequential and that
353 // the same source is used for the whole piece.
354 if ((Idx
% SrcNumElts
!= (i
% SrcNumElts
)) ||
355 (ConcatSrcs
[i
/ SrcNumElts
] >= 0 &&
356 ConcatSrcs
[i
/ SrcNumElts
] != (int)(Idx
/ SrcNumElts
)))
358 // Remember which source this index came from.
359 ConcatSrcs
[i
/ SrcNumElts
] = Idx
/ SrcNumElts
;
362 // The shuffle is concatenating multiple vectors together.
363 // Collect the different operands for that.
365 Register Src2
= MI
.getOperand(2).getReg();
366 for (auto Src
: ConcatSrcs
) {
369 Builder
.setInsertPt(*MI
.getParent(), MI
);
370 UndefReg
= Builder
.buildUndef(SrcType
).getReg(0);
372 Ops
.push_back(UndefReg
);
381 void CombinerHelper::applyCombineShuffleVector(MachineInstr
&MI
,
382 const ArrayRef
<Register
> Ops
) {
383 Register DstReg
= MI
.getOperand(0).getReg();
384 Builder
.setInsertPt(*MI
.getParent(), MI
);
385 Register NewDstReg
= MRI
.cloneVirtualRegister(DstReg
);
388 Builder
.buildCopy(NewDstReg
, Ops
[0]);
390 Builder
.buildMerge(NewDstReg
, Ops
);
392 MI
.eraseFromParent();
393 replaceRegWith(MRI
, DstReg
, NewDstReg
);
398 /// Select a preference between two uses. CurrentUse is the current preference
399 /// while *ForCandidate is attributes of the candidate under consideration.
400 PreferredTuple
ChoosePreferredUse(PreferredTuple
&CurrentUse
,
401 const LLT TyForCandidate
,
402 unsigned OpcodeForCandidate
,
403 MachineInstr
*MIForCandidate
) {
404 if (!CurrentUse
.Ty
.isValid()) {
405 if (CurrentUse
.ExtendOpcode
== OpcodeForCandidate
||
406 CurrentUse
.ExtendOpcode
== TargetOpcode::G_ANYEXT
)
407 return {TyForCandidate
, OpcodeForCandidate
, MIForCandidate
};
411 // We permit the extend to hoist through basic blocks but this is only
412 // sensible if the target has extending loads. If you end up lowering back
413 // into a load and extend during the legalizer then the end result is
414 // hoisting the extend up to the load.
416 // Prefer defined extensions to undefined extensions as these are more
417 // likely to reduce the number of instructions.
418 if (OpcodeForCandidate
== TargetOpcode::G_ANYEXT
&&
419 CurrentUse
.ExtendOpcode
!= TargetOpcode::G_ANYEXT
)
421 else if (CurrentUse
.ExtendOpcode
== TargetOpcode::G_ANYEXT
&&
422 OpcodeForCandidate
!= TargetOpcode::G_ANYEXT
)
423 return {TyForCandidate
, OpcodeForCandidate
, MIForCandidate
};
425 // Prefer sign extensions to zero extensions as sign-extensions tend to be
427 if (CurrentUse
.Ty
== TyForCandidate
) {
428 if (CurrentUse
.ExtendOpcode
== TargetOpcode::G_SEXT
&&
429 OpcodeForCandidate
== TargetOpcode::G_ZEXT
)
431 else if (CurrentUse
.ExtendOpcode
== TargetOpcode::G_ZEXT
&&
432 OpcodeForCandidate
== TargetOpcode::G_SEXT
)
433 return {TyForCandidate
, OpcodeForCandidate
, MIForCandidate
};
436 // This is potentially target specific. We've chosen the largest type
437 // because G_TRUNC is usually free. One potential catch with this is that
438 // some targets have a reduced number of larger registers than smaller
439 // registers and this choice potentially increases the live-range for the
441 if (TyForCandidate
.getSizeInBits() > CurrentUse
.Ty
.getSizeInBits()) {
442 return {TyForCandidate
, OpcodeForCandidate
, MIForCandidate
};
447 /// Find a suitable place to insert some instructions and insert them. This
448 /// function accounts for special cases like inserting before a PHI node.
449 /// The current strategy for inserting before PHI's is to duplicate the
450 /// instructions for each predecessor. However, while that's ok for G_TRUNC
451 /// on most targets since it generally requires no code, other targets/cases may
452 /// want to try harder to find a dominating block.
453 static void InsertInsnsWithoutSideEffectsBeforeUse(
454 MachineIRBuilder
&Builder
, MachineInstr
&DefMI
, MachineOperand
&UseMO
,
455 std::function
<void(MachineBasicBlock
*, MachineBasicBlock::iterator
,
456 MachineOperand
&UseMO
)>
458 MachineInstr
&UseMI
= *UseMO
.getParent();
460 MachineBasicBlock
*InsertBB
= UseMI
.getParent();
462 // If the use is a PHI then we want the predecessor block instead.
464 MachineOperand
*PredBB
= std::next(&UseMO
);
465 InsertBB
= PredBB
->getMBB();
468 // If the block is the same block as the def then we want to insert just after
469 // the def instead of at the start of the block.
470 if (InsertBB
== DefMI
.getParent()) {
471 MachineBasicBlock::iterator InsertPt
= &DefMI
;
472 Inserter(InsertBB
, std::next(InsertPt
), UseMO
);
476 // Otherwise we want the start of the BB
477 Inserter(InsertBB
, InsertBB
->getFirstNonPHI(), UseMO
);
479 } // end anonymous namespace
481 bool CombinerHelper::tryCombineExtendingLoads(MachineInstr
&MI
) {
482 PreferredTuple Preferred
;
483 if (matchCombineExtendingLoads(MI
, Preferred
)) {
484 applyCombineExtendingLoads(MI
, Preferred
);
490 bool CombinerHelper::matchCombineExtendingLoads(MachineInstr
&MI
,
491 PreferredTuple
&Preferred
) {
492 // We match the loads and follow the uses to the extend instead of matching
493 // the extends and following the def to the load. This is because the load
494 // must remain in the same position for correctness (unless we also add code
495 // to find a safe place to sink it) whereas the extend is freely movable.
496 // It also prevents us from duplicating the load for the volatile case or just
498 GAnyLoad
*LoadMI
= dyn_cast
<GAnyLoad
>(&MI
);
502 Register LoadReg
= LoadMI
->getDstReg();
504 LLT LoadValueTy
= MRI
.getType(LoadReg
);
505 if (!LoadValueTy
.isScalar())
508 // Most architectures are going to legalize <s8 loads into at least a 1 byte
509 // load, and the MMOs can only describe memory accesses in multiples of bytes.
510 // If we try to perform extload combining on those, we can end up with
511 // %a(s8) = extload %ptr (load 1 byte from %ptr)
512 // ... which is an illegal extload instruction.
513 if (LoadValueTy
.getSizeInBits() < 8)
516 // For non power-of-2 types, they will very likely be legalized into multiple
517 // loads. Don't bother trying to match them into extending loads.
518 if (!isPowerOf2_32(LoadValueTy
.getSizeInBits()))
521 // Find the preferred type aside from the any-extends (unless it's the only
522 // one) and non-extending ops. We'll emit an extending load to that type and
523 // and emit a variant of (extend (trunc X)) for the others according to the
524 // relative type sizes. At the same time, pick an extend to use based on the
525 // extend involved in the chosen type.
526 unsigned PreferredOpcode
=
528 ? TargetOpcode::G_ANYEXT
529 : isa
<GSExtLoad
>(&MI
) ? TargetOpcode::G_SEXT
: TargetOpcode::G_ZEXT
;
530 Preferred
= {LLT(), PreferredOpcode
, nullptr};
531 for (auto &UseMI
: MRI
.use_nodbg_instructions(LoadReg
)) {
532 if (UseMI
.getOpcode() == TargetOpcode::G_SEXT
||
533 UseMI
.getOpcode() == TargetOpcode::G_ZEXT
||
534 (UseMI
.getOpcode() == TargetOpcode::G_ANYEXT
)) {
535 const auto &MMO
= LoadMI
->getMMO();
536 // For atomics, only form anyextending loads.
537 if (MMO
.isAtomic() && UseMI
.getOpcode() != TargetOpcode::G_ANYEXT
)
539 // Check for legality.
541 LegalityQuery::MemDesc
MMDesc(MMO
);
542 LLT UseTy
= MRI
.getType(UseMI
.getOperand(0).getReg());
543 LLT SrcTy
= MRI
.getType(LoadMI
->getPointerReg());
544 if (LI
->getAction({LoadMI
->getOpcode(), {UseTy
, SrcTy
}, {MMDesc
}})
545 .Action
!= LegalizeActions::Legal
)
548 Preferred
= ChoosePreferredUse(Preferred
,
549 MRI
.getType(UseMI
.getOperand(0).getReg()),
550 UseMI
.getOpcode(), &UseMI
);
554 // There were no extends
557 // It should be impossible to chose an extend without selecting a different
558 // type since by definition the result of an extend is larger.
559 assert(Preferred
.Ty
!= LoadValueTy
&& "Extending to same type?");
561 LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred
.MI
);
565 void CombinerHelper::applyCombineExtendingLoads(MachineInstr
&MI
,
566 PreferredTuple
&Preferred
) {
567 // Rewrite the load to the chosen extending load.
568 Register ChosenDstReg
= Preferred
.MI
->getOperand(0).getReg();
570 // Inserter to insert a truncate back to the original type at a given point
571 // with some basic CSE to limit truncate duplication to one per BB.
572 DenseMap
<MachineBasicBlock
*, MachineInstr
*> EmittedInsns
;
573 auto InsertTruncAt
= [&](MachineBasicBlock
*InsertIntoBB
,
574 MachineBasicBlock::iterator InsertBefore
,
575 MachineOperand
&UseMO
) {
576 MachineInstr
*PreviouslyEmitted
= EmittedInsns
.lookup(InsertIntoBB
);
577 if (PreviouslyEmitted
) {
578 Observer
.changingInstr(*UseMO
.getParent());
579 UseMO
.setReg(PreviouslyEmitted
->getOperand(0).getReg());
580 Observer
.changedInstr(*UseMO
.getParent());
584 Builder
.setInsertPt(*InsertIntoBB
, InsertBefore
);
585 Register NewDstReg
= MRI
.cloneVirtualRegister(MI
.getOperand(0).getReg());
586 MachineInstr
*NewMI
= Builder
.buildTrunc(NewDstReg
, ChosenDstReg
);
587 EmittedInsns
[InsertIntoBB
] = NewMI
;
588 replaceRegOpWith(MRI
, UseMO
, NewDstReg
);
591 Observer
.changingInstr(MI
);
593 Builder
.getTII().get(Preferred
.ExtendOpcode
== TargetOpcode::G_SEXT
594 ? TargetOpcode::G_SEXTLOAD
595 : Preferred
.ExtendOpcode
== TargetOpcode::G_ZEXT
596 ? TargetOpcode::G_ZEXTLOAD
597 : TargetOpcode::G_LOAD
));
599 // Rewrite all the uses to fix up the types.
600 auto &LoadValue
= MI
.getOperand(0);
601 SmallVector
<MachineOperand
*, 4> Uses
;
602 for (auto &UseMO
: MRI
.use_operands(LoadValue
.getReg()))
603 Uses
.push_back(&UseMO
);
605 for (auto *UseMO
: Uses
) {
606 MachineInstr
*UseMI
= UseMO
->getParent();
608 // If the extend is compatible with the preferred extend then we should fix
609 // up the type and extend so that it uses the preferred use.
610 if (UseMI
->getOpcode() == Preferred
.ExtendOpcode
||
611 UseMI
->getOpcode() == TargetOpcode::G_ANYEXT
) {
612 Register UseDstReg
= UseMI
->getOperand(0).getReg();
613 MachineOperand
&UseSrcMO
= UseMI
->getOperand(1);
614 const LLT UseDstTy
= MRI
.getType(UseDstReg
);
615 if (UseDstReg
!= ChosenDstReg
) {
616 if (Preferred
.Ty
== UseDstTy
) {
617 // If the use has the same type as the preferred use, then merge
618 // the vregs and erase the extend. For example:
619 // %1:_(s8) = G_LOAD ...
620 // %2:_(s32) = G_SEXT %1(s8)
621 // %3:_(s32) = G_ANYEXT %1(s8)
624 // %2:_(s32) = G_SEXTLOAD ...
626 replaceRegWith(MRI
, UseDstReg
, ChosenDstReg
);
627 Observer
.erasingInstr(*UseMO
->getParent());
628 UseMO
->getParent()->eraseFromParent();
629 } else if (Preferred
.Ty
.getSizeInBits() < UseDstTy
.getSizeInBits()) {
630 // If the preferred size is smaller, then keep the extend but extend
631 // from the result of the extending load. For example:
632 // %1:_(s8) = G_LOAD ...
633 // %2:_(s32) = G_SEXT %1(s8)
634 // %3:_(s64) = G_ANYEXT %1(s8)
637 // %2:_(s32) = G_SEXTLOAD ...
638 // %3:_(s64) = G_ANYEXT %2:_(s32)
640 replaceRegOpWith(MRI
, UseSrcMO
, ChosenDstReg
);
642 // If the preferred size is large, then insert a truncate. For
644 // %1:_(s8) = G_LOAD ...
645 // %2:_(s64) = G_SEXT %1(s8)
646 // %3:_(s32) = G_ZEXT %1(s8)
649 // %2:_(s64) = G_SEXTLOAD ...
650 // %4:_(s8) = G_TRUNC %2:_(s32)
651 // %3:_(s64) = G_ZEXT %2:_(s8)
653 InsertInsnsWithoutSideEffectsBeforeUse(Builder
, MI
, *UseMO
,
658 // The use is (one of) the uses of the preferred use we chose earlier.
659 // We're going to update the load to def this value later so just erase
661 Observer
.erasingInstr(*UseMO
->getParent());
662 UseMO
->getParent()->eraseFromParent();
666 // The use isn't an extend. Truncate back to the type we originally loaded.
667 // This is free on many targets.
668 InsertInsnsWithoutSideEffectsBeforeUse(Builder
, MI
, *UseMO
, InsertTruncAt
);
671 MI
.getOperand(0).setReg(ChosenDstReg
);
672 Observer
.changedInstr(MI
);
675 bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr
&MI
,
676 BuildFnTy
&MatchInfo
) {
677 assert(MI
.getOpcode() == TargetOpcode::G_AND
);
679 // If we have the following code:
680 // %mask = G_CONSTANT 255
681 // %ld = G_LOAD %ptr, (load s16)
682 // %and = G_AND %ld, %mask
684 // Try to fold it into
685 // %ld = G_ZEXTLOAD %ptr, (load s8)
687 Register Dst
= MI
.getOperand(0).getReg();
688 if (MRI
.getType(Dst
).isVector())
692 getIConstantVRegValWithLookThrough(MI
.getOperand(2).getReg(), MRI
);
696 APInt MaskVal
= MaybeMask
->Value
;
698 if (!MaskVal
.isMask())
701 Register SrcReg
= MI
.getOperand(1).getReg();
702 GAnyLoad
*LoadMI
= getOpcodeDef
<GAnyLoad
>(SrcReg
, MRI
);
703 if (!LoadMI
|| !MRI
.hasOneNonDBGUse(LoadMI
->getDstReg()) ||
707 Register LoadReg
= LoadMI
->getDstReg();
708 LLT LoadTy
= MRI
.getType(LoadReg
);
709 Register PtrReg
= LoadMI
->getPointerReg();
710 uint64_t LoadSizeBits
= LoadMI
->getMemSizeInBits();
711 unsigned MaskSizeBits
= MaskVal
.countTrailingOnes();
713 // The mask may not be larger than the in-memory type, as it might cover sign
715 if (MaskSizeBits
> LoadSizeBits
)
718 // If the mask covers the whole destination register, there's nothing to
720 if (MaskSizeBits
>= LoadTy
.getSizeInBits())
723 // Most targets cannot deal with loads of size < 8 and need to re-legalize to
724 // at least byte loads. Avoid creating such loads here
725 if (MaskSizeBits
< 8 || !isPowerOf2_32(MaskSizeBits
))
728 const MachineMemOperand
&MMO
= LoadMI
->getMMO();
729 LegalityQuery::MemDesc
MemDesc(MMO
);
730 MemDesc
.MemoryTy
= LLT::scalar(MaskSizeBits
);
731 if (!isLegalOrBeforeLegalizer(
732 {TargetOpcode::G_ZEXTLOAD
, {LoadTy
, MRI
.getType(PtrReg
)}, {MemDesc
}}))
735 MatchInfo
= [=](MachineIRBuilder
&B
) {
736 B
.setInstrAndDebugLoc(*LoadMI
);
737 auto &MF
= B
.getMF();
738 auto PtrInfo
= MMO
.getPointerInfo();
739 auto *NewMMO
= MF
.getMachineMemOperand(&MMO
, PtrInfo
, MaskSizeBits
/ 8);
740 B
.buildLoadInstr(TargetOpcode::G_ZEXTLOAD
, Dst
, PtrReg
, *NewMMO
);
745 bool CombinerHelper::isPredecessor(const MachineInstr
&DefMI
,
746 const MachineInstr
&UseMI
) {
747 assert(!DefMI
.isDebugInstr() && !UseMI
.isDebugInstr() &&
748 "shouldn't consider debug uses");
749 assert(DefMI
.getParent() == UseMI
.getParent());
750 if (&DefMI
== &UseMI
)
752 const MachineBasicBlock
&MBB
= *DefMI
.getParent();
753 auto DefOrUse
= find_if(MBB
, [&DefMI
, &UseMI
](const MachineInstr
&MI
) {
754 return &MI
== &DefMI
|| &MI
== &UseMI
;
756 if (DefOrUse
== MBB
.end())
757 llvm_unreachable("Block must contain both DefMI and UseMI!");
758 return &*DefOrUse
== &DefMI
;
761 bool CombinerHelper::dominates(const MachineInstr
&DefMI
,
762 const MachineInstr
&UseMI
) {
763 assert(!DefMI
.isDebugInstr() && !UseMI
.isDebugInstr() &&
764 "shouldn't consider debug uses");
766 return MDT
->dominates(&DefMI
, &UseMI
);
767 else if (DefMI
.getParent() != UseMI
.getParent())
770 return isPredecessor(DefMI
, UseMI
);
773 bool CombinerHelper::matchSextTruncSextLoad(MachineInstr
&MI
) {
774 assert(MI
.getOpcode() == TargetOpcode::G_SEXT_INREG
);
775 Register SrcReg
= MI
.getOperand(1).getReg();
776 Register LoadUser
= SrcReg
;
778 if (MRI
.getType(SrcReg
).isVector())
782 if (mi_match(SrcReg
, MRI
, m_GTrunc(m_Reg(TruncSrc
))))
785 uint64_t SizeInBits
= MI
.getOperand(2).getImm();
786 // If the source is a G_SEXTLOAD from the same bit width, then we don't
787 // need any extend at all, just a truncate.
788 if (auto *LoadMI
= getOpcodeDef
<GSExtLoad
>(LoadUser
, MRI
)) {
789 // If truncating more than the original extended value, abort.
790 auto LoadSizeBits
= LoadMI
->getMemSizeInBits();
791 if (TruncSrc
&& MRI
.getType(TruncSrc
).getSizeInBits() < LoadSizeBits
)
793 if (LoadSizeBits
== SizeInBits
)
799 void CombinerHelper::applySextTruncSextLoad(MachineInstr
&MI
) {
800 assert(MI
.getOpcode() == TargetOpcode::G_SEXT_INREG
);
801 Builder
.setInstrAndDebugLoc(MI
);
802 Builder
.buildCopy(MI
.getOperand(0).getReg(), MI
.getOperand(1).getReg());
803 MI
.eraseFromParent();
806 bool CombinerHelper::matchSextInRegOfLoad(
807 MachineInstr
&MI
, std::tuple
<Register
, unsigned> &MatchInfo
) {
808 assert(MI
.getOpcode() == TargetOpcode::G_SEXT_INREG
);
810 // Only supports scalars for now.
811 if (MRI
.getType(MI
.getOperand(0).getReg()).isVector())
814 Register SrcReg
= MI
.getOperand(1).getReg();
815 auto *LoadDef
= getOpcodeDef
<GLoad
>(SrcReg
, MRI
);
816 if (!LoadDef
|| !MRI
.hasOneNonDBGUse(LoadDef
->getOperand(0).getReg()) ||
817 !LoadDef
->isSimple())
820 // If the sign extend extends from a narrower width than the load's width,
821 // then we can narrow the load width when we combine to a G_SEXTLOAD.
822 // Avoid widening the load at all.
823 unsigned NewSizeBits
= std::min((uint64_t)MI
.getOperand(2).getImm(),
824 LoadDef
->getMemSizeInBits());
826 // Don't generate G_SEXTLOADs with a < 1 byte width.
829 // Don't bother creating a non-power-2 sextload, it will likely be broken up
830 // anyway for most targets.
831 if (!isPowerOf2_32(NewSizeBits
))
834 const MachineMemOperand
&MMO
= LoadDef
->getMMO();
835 LegalityQuery::MemDesc
MMDesc(MMO
);
836 MMDesc
.MemoryTy
= LLT::scalar(NewSizeBits
);
837 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD
,
838 {MRI
.getType(LoadDef
->getDstReg()),
839 MRI
.getType(LoadDef
->getPointerReg())},
843 MatchInfo
= std::make_tuple(LoadDef
->getDstReg(), NewSizeBits
);
847 void CombinerHelper::applySextInRegOfLoad(
848 MachineInstr
&MI
, std::tuple
<Register
, unsigned> &MatchInfo
) {
849 assert(MI
.getOpcode() == TargetOpcode::G_SEXT_INREG
);
851 unsigned ScalarSizeBits
;
852 std::tie(LoadReg
, ScalarSizeBits
) = MatchInfo
;
853 GLoad
*LoadDef
= cast
<GLoad
>(MRI
.getVRegDef(LoadReg
));
855 // If we have the following:
856 // %ld = G_LOAD %ptr, (load 2)
857 // %ext = G_SEXT_INREG %ld, 8
859 // %ld = G_SEXTLOAD %ptr (load 1)
861 auto &MMO
= LoadDef
->getMMO();
862 Builder
.setInstrAndDebugLoc(*LoadDef
);
863 auto &MF
= Builder
.getMF();
864 auto PtrInfo
= MMO
.getPointerInfo();
865 auto *NewMMO
= MF
.getMachineMemOperand(&MMO
, PtrInfo
, ScalarSizeBits
/ 8);
866 Builder
.buildLoadInstr(TargetOpcode::G_SEXTLOAD
, MI
.getOperand(0).getReg(),
867 LoadDef
->getPointerReg(), *NewMMO
);
868 MI
.eraseFromParent();
871 bool CombinerHelper::findPostIndexCandidate(MachineInstr
&MI
, Register
&Addr
,
872 Register
&Base
, Register
&Offset
) {
873 auto &MF
= *MI
.getParent()->getParent();
874 const auto &TLI
= *MF
.getSubtarget().getTargetLowering();
877 unsigned Opcode
= MI
.getOpcode();
878 assert(Opcode
== TargetOpcode::G_LOAD
|| Opcode
== TargetOpcode::G_SEXTLOAD
||
879 Opcode
== TargetOpcode::G_ZEXTLOAD
|| Opcode
== TargetOpcode::G_STORE
);
882 Base
= MI
.getOperand(1).getReg();
883 MachineInstr
*BaseDef
= MRI
.getUniqueVRegDef(Base
);
884 if (BaseDef
&& BaseDef
->getOpcode() == TargetOpcode::G_FRAME_INDEX
)
887 LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI
);
888 // FIXME: The following use traversal needs a bail out for patholigical cases.
889 for (auto &Use
: MRI
.use_nodbg_instructions(Base
)) {
890 if (Use
.getOpcode() != TargetOpcode::G_PTR_ADD
)
893 Offset
= Use
.getOperand(2).getReg();
894 if (!ForceLegalIndexing
&&
895 !TLI
.isIndexingLegal(MI
, Base
, Offset
, /*IsPre*/ false, MRI
)) {
896 LLVM_DEBUG(dbgs() << " Ignoring candidate with illegal addrmode: "
901 // Make sure the offset calculation is before the potentially indexed op.
902 // FIXME: we really care about dependency here. The offset calculation might
904 MachineInstr
*OffsetDef
= MRI
.getUniqueVRegDef(Offset
);
905 if (!OffsetDef
|| !dominates(*OffsetDef
, MI
)) {
906 LLVM_DEBUG(dbgs() << " Ignoring candidate with offset after mem-op: "
911 // FIXME: check whether all uses of Base are load/store with foldable
912 // addressing modes. If so, using the normal addr-modes is better than
913 // forming an indexed one.
915 bool MemOpDominatesAddrUses
= true;
916 for (auto &PtrAddUse
:
917 MRI
.use_nodbg_instructions(Use
.getOperand(0).getReg())) {
918 if (!dominates(MI
, PtrAddUse
)) {
919 MemOpDominatesAddrUses
= false;
924 if (!MemOpDominatesAddrUses
) {
926 dbgs() << " Ignoring candidate as memop does not dominate uses: "
931 LLVM_DEBUG(dbgs() << " Found match: " << Use
);
932 Addr
= Use
.getOperand(0).getReg();
939 bool CombinerHelper::findPreIndexCandidate(MachineInstr
&MI
, Register
&Addr
,
940 Register
&Base
, Register
&Offset
) {
941 auto &MF
= *MI
.getParent()->getParent();
942 const auto &TLI
= *MF
.getSubtarget().getTargetLowering();
945 unsigned Opcode
= MI
.getOpcode();
946 assert(Opcode
== TargetOpcode::G_LOAD
|| Opcode
== TargetOpcode::G_SEXTLOAD
||
947 Opcode
== TargetOpcode::G_ZEXTLOAD
|| Opcode
== TargetOpcode::G_STORE
);
950 Addr
= MI
.getOperand(1).getReg();
951 MachineInstr
*AddrDef
= getOpcodeDef(TargetOpcode::G_PTR_ADD
, Addr
, MRI
);
952 if (!AddrDef
|| MRI
.hasOneNonDBGUse(Addr
))
955 Base
= AddrDef
->getOperand(1).getReg();
956 Offset
= AddrDef
->getOperand(2).getReg();
958 LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI
);
960 if (!ForceLegalIndexing
&&
961 !TLI
.isIndexingLegal(MI
, Base
, Offset
, /*IsPre*/ true, MRI
)) {
962 LLVM_DEBUG(dbgs() << " Skipping, not legal for target");
966 MachineInstr
*BaseDef
= getDefIgnoringCopies(Base
, MRI
);
967 if (BaseDef
->getOpcode() == TargetOpcode::G_FRAME_INDEX
) {
968 LLVM_DEBUG(dbgs() << " Skipping, frame index would need copy anyway.");
972 if (MI
.getOpcode() == TargetOpcode::G_STORE
) {
973 // Would require a copy.
974 if (Base
== MI
.getOperand(0).getReg()) {
975 LLVM_DEBUG(dbgs() << " Skipping, storing base so need copy anyway.");
979 // We're expecting one use of Addr in MI, but it could also be the
980 // value stored, which isn't actually dominated by the instruction.
981 if (MI
.getOperand(0).getReg() == Addr
) {
982 LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses");
987 // FIXME: check whether all uses of the base pointer are constant PtrAdds.
988 // That might allow us to end base's liveness here by adjusting the constant.
990 for (auto &UseMI
: MRI
.use_nodbg_instructions(Addr
)) {
991 if (!dominates(MI
, UseMI
)) {
992 LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses.");
1000 bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr
&MI
) {
1001 IndexedLoadStoreMatchInfo MatchInfo
;
1002 if (matchCombineIndexedLoadStore(MI
, MatchInfo
)) {
1003 applyCombineIndexedLoadStore(MI
, MatchInfo
);
1009 bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr
&MI
, IndexedLoadStoreMatchInfo
&MatchInfo
) {
1010 unsigned Opcode
= MI
.getOpcode();
1011 if (Opcode
!= TargetOpcode::G_LOAD
&& Opcode
!= TargetOpcode::G_SEXTLOAD
&&
1012 Opcode
!= TargetOpcode::G_ZEXTLOAD
&& Opcode
!= TargetOpcode::G_STORE
)
1015 // For now, no targets actually support these opcodes so don't waste time
1016 // running these unless we're forced to for testing.
1017 if (!ForceLegalIndexing
)
1020 MatchInfo
.IsPre
= findPreIndexCandidate(MI
, MatchInfo
.Addr
, MatchInfo
.Base
,
1022 if (!MatchInfo
.IsPre
&&
1023 !findPostIndexCandidate(MI
, MatchInfo
.Addr
, MatchInfo
.Base
,
1030 void CombinerHelper::applyCombineIndexedLoadStore(
1031 MachineInstr
&MI
, IndexedLoadStoreMatchInfo
&MatchInfo
) {
1032 MachineInstr
&AddrDef
= *MRI
.getUniqueVRegDef(MatchInfo
.Addr
);
1033 MachineIRBuilder
MIRBuilder(MI
);
1034 unsigned Opcode
= MI
.getOpcode();
1035 bool IsStore
= Opcode
== TargetOpcode::G_STORE
;
1038 case TargetOpcode::G_LOAD
:
1039 NewOpcode
= TargetOpcode::G_INDEXED_LOAD
;
1041 case TargetOpcode::G_SEXTLOAD
:
1042 NewOpcode
= TargetOpcode::G_INDEXED_SEXTLOAD
;
1044 case TargetOpcode::G_ZEXTLOAD
:
1045 NewOpcode
= TargetOpcode::G_INDEXED_ZEXTLOAD
;
1047 case TargetOpcode::G_STORE
:
1048 NewOpcode
= TargetOpcode::G_INDEXED_STORE
;
1051 llvm_unreachable("Unknown load/store opcode");
1054 auto MIB
= MIRBuilder
.buildInstr(NewOpcode
);
1056 MIB
.addDef(MatchInfo
.Addr
);
1057 MIB
.addUse(MI
.getOperand(0).getReg());
1059 MIB
.addDef(MI
.getOperand(0).getReg());
1060 MIB
.addDef(MatchInfo
.Addr
);
1063 MIB
.addUse(MatchInfo
.Base
);
1064 MIB
.addUse(MatchInfo
.Offset
);
1065 MIB
.addImm(MatchInfo
.IsPre
);
1066 MI
.eraseFromParent();
1067 AddrDef
.eraseFromParent();
1069 LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
1072 bool CombinerHelper::matchCombineDivRem(MachineInstr
&MI
,
1073 MachineInstr
*&OtherMI
) {
1074 unsigned Opcode
= MI
.getOpcode();
1075 bool IsDiv
, IsSigned
;
1079 llvm_unreachable("Unexpected opcode!");
1080 case TargetOpcode::G_SDIV
:
1081 case TargetOpcode::G_UDIV
: {
1083 IsSigned
= Opcode
== TargetOpcode::G_SDIV
;
1086 case TargetOpcode::G_SREM
:
1087 case TargetOpcode::G_UREM
: {
1089 IsSigned
= Opcode
== TargetOpcode::G_SREM
;
1094 Register Src1
= MI
.getOperand(1).getReg();
1095 unsigned DivOpcode
, RemOpcode
, DivremOpcode
;
1097 DivOpcode
= TargetOpcode::G_SDIV
;
1098 RemOpcode
= TargetOpcode::G_SREM
;
1099 DivremOpcode
= TargetOpcode::G_SDIVREM
;
1101 DivOpcode
= TargetOpcode::G_UDIV
;
1102 RemOpcode
= TargetOpcode::G_UREM
;
1103 DivremOpcode
= TargetOpcode::G_UDIVREM
;
1106 if (!isLegalOrBeforeLegalizer({DivremOpcode
, {MRI
.getType(Src1
)}}))
1110 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1111 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1113 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1116 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1117 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1119 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1121 for (auto &UseMI
: MRI
.use_nodbg_instructions(Src1
)) {
1122 if (MI
.getParent() == UseMI
.getParent() &&
1123 ((IsDiv
&& UseMI
.getOpcode() == RemOpcode
) ||
1124 (!IsDiv
&& UseMI
.getOpcode() == DivOpcode
)) &&
1125 matchEqualDefs(MI
.getOperand(2), UseMI
.getOperand(2))) {
1134 void CombinerHelper::applyCombineDivRem(MachineInstr
&MI
,
1135 MachineInstr
*&OtherMI
) {
1136 unsigned Opcode
= MI
.getOpcode();
1137 assert(OtherMI
&& "OtherMI shouldn't be empty.");
1139 Register DestDivReg
, DestRemReg
;
1140 if (Opcode
== TargetOpcode::G_SDIV
|| Opcode
== TargetOpcode::G_UDIV
) {
1141 DestDivReg
= MI
.getOperand(0).getReg();
1142 DestRemReg
= OtherMI
->getOperand(0).getReg();
1144 DestDivReg
= OtherMI
->getOperand(0).getReg();
1145 DestRemReg
= MI
.getOperand(0).getReg();
1149 Opcode
== TargetOpcode::G_SDIV
|| Opcode
== TargetOpcode::G_SREM
;
1151 // Check which instruction is first in the block so we don't break def-use
1152 // deps by "moving" the instruction incorrectly.
1153 if (dominates(MI
, *OtherMI
))
1154 Builder
.setInstrAndDebugLoc(MI
);
1156 Builder
.setInstrAndDebugLoc(*OtherMI
);
1158 Builder
.buildInstr(IsSigned
? TargetOpcode::G_SDIVREM
1159 : TargetOpcode::G_UDIVREM
,
1160 {DestDivReg
, DestRemReg
},
1161 {MI
.getOperand(1).getReg(), MI
.getOperand(2).getReg()});
1162 MI
.eraseFromParent();
1163 OtherMI
->eraseFromParent();
1166 bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr
&MI
,
1167 MachineInstr
*&BrCond
) {
1168 assert(MI
.getOpcode() == TargetOpcode::G_BR
);
1170 // Try to match the following:
1172 // G_BRCOND %c1, %bb2
1178 // The above pattern does not have a fall through to the successor bb2, always
1179 // resulting in a branch no matter which path is taken. Here we try to find
1180 // and replace that pattern with conditional branch to bb3 and otherwise
1181 // fallthrough to bb2. This is generally better for branch predictors.
1183 MachineBasicBlock
*MBB
= MI
.getParent();
1184 MachineBasicBlock::iterator
BrIt(MI
);
1185 if (BrIt
== MBB
->begin())
1187 assert(std::next(BrIt
) == MBB
->end() && "expected G_BR to be a terminator");
1189 BrCond
= &*std::prev(BrIt
);
1190 if (BrCond
->getOpcode() != TargetOpcode::G_BRCOND
)
1193 // Check that the next block is the conditional branch target. Also make sure
1194 // that it isn't the same as the G_BR's target (otherwise, this will loop.)
1195 MachineBasicBlock
*BrCondTarget
= BrCond
->getOperand(1).getMBB();
1196 return BrCondTarget
!= MI
.getOperand(0).getMBB() &&
1197 MBB
->isLayoutSuccessor(BrCondTarget
);
1200 void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr
&MI
,
1201 MachineInstr
*&BrCond
) {
1202 MachineBasicBlock
*BrTarget
= MI
.getOperand(0).getMBB();
1203 Builder
.setInstrAndDebugLoc(*BrCond
);
1204 LLT Ty
= MRI
.getType(BrCond
->getOperand(0).getReg());
1205 // FIXME: Does int/fp matter for this? If so, we might need to restrict
1206 // this to i1 only since we might not know for sure what kind of
1207 // compare generated the condition value.
1208 auto True
= Builder
.buildConstant(
1209 Ty
, getICmpTrueVal(getTargetLowering(), false, false));
1210 auto Xor
= Builder
.buildXor(Ty
, BrCond
->getOperand(0), True
);
1212 auto *FallthroughBB
= BrCond
->getOperand(1).getMBB();
1213 Observer
.changingInstr(MI
);
1214 MI
.getOperand(0).setMBB(FallthroughBB
);
1215 Observer
.changedInstr(MI
);
1217 // Change the conditional branch to use the inverted condition and
1218 // new target block.
1219 Observer
.changingInstr(*BrCond
);
1220 BrCond
->getOperand(0).setReg(Xor
.getReg(0));
1221 BrCond
->getOperand(1).setMBB(BrTarget
);
1222 Observer
.changedInstr(*BrCond
);
1225 static Type
*getTypeForLLT(LLT Ty
, LLVMContext
&C
) {
1227 return FixedVectorType::get(IntegerType::get(C
, Ty
.getScalarSizeInBits()),
1228 Ty
.getNumElements());
1229 return IntegerType::get(C
, Ty
.getSizeInBits());
1232 bool CombinerHelper::tryEmitMemcpyInline(MachineInstr
&MI
) {
1233 MachineIRBuilder
HelperBuilder(MI
);
1234 GISelObserverWrapper DummyObserver
;
1235 LegalizerHelper
Helper(HelperBuilder
.getMF(), DummyObserver
, HelperBuilder
);
1236 return Helper
.lowerMemcpyInline(MI
) ==
1237 LegalizerHelper::LegalizeResult::Legalized
;
1240 bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr
&MI
, unsigned MaxLen
) {
1241 MachineIRBuilder
HelperBuilder(MI
);
1242 GISelObserverWrapper DummyObserver
;
1243 LegalizerHelper
Helper(HelperBuilder
.getMF(), DummyObserver
, HelperBuilder
);
1244 return Helper
.lowerMemCpyFamily(MI
, MaxLen
) ==
1245 LegalizerHelper::LegalizeResult::Legalized
;
1248 static Optional
<APFloat
> constantFoldFpUnary(unsigned Opcode
, LLT DstTy
,
1250 const MachineRegisterInfo
&MRI
) {
1251 const ConstantFP
*MaybeCst
= getConstantFPVRegVal(Op
, MRI
);
1255 APFloat V
= MaybeCst
->getValueAPF();
1258 llvm_unreachable("Unexpected opcode!");
1259 case TargetOpcode::G_FNEG
: {
1263 case TargetOpcode::G_FABS
: {
1267 case TargetOpcode::G_FPTRUNC
:
1269 case TargetOpcode::G_FSQRT
: {
1271 V
.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven
, &Unused
);
1272 V
= APFloat(sqrt(V
.convertToDouble()));
1275 case TargetOpcode::G_FLOG2
: {
1277 V
.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven
, &Unused
);
1278 V
= APFloat(log2(V
.convertToDouble()));
1282 // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
1283 // `buildFConstant` will assert on size mismatch. Only `G_FPTRUNC`, `G_FSQRT`,
1284 // and `G_FLOG2` reach here.
1286 V
.convert(getFltSemanticForLLT(DstTy
), APFloat::rmNearestTiesToEven
, &Unused
);
1290 bool CombinerHelper::matchCombineConstantFoldFpUnary(MachineInstr
&MI
,
1291 Optional
<APFloat
> &Cst
) {
1292 Register DstReg
= MI
.getOperand(0).getReg();
1293 Register SrcReg
= MI
.getOperand(1).getReg();
1294 LLT DstTy
= MRI
.getType(DstReg
);
1295 Cst
= constantFoldFpUnary(MI
.getOpcode(), DstTy
, SrcReg
, MRI
);
1296 return Cst
.hasValue();
1299 void CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr
&MI
,
1300 Optional
<APFloat
> &Cst
) {
1301 assert(Cst
.hasValue() && "Optional is unexpectedly empty!");
1302 Builder
.setInstrAndDebugLoc(MI
);
1303 MachineFunction
&MF
= Builder
.getMF();
1304 auto *FPVal
= ConstantFP::get(MF
.getFunction().getContext(), *Cst
);
1305 Register DstReg
= MI
.getOperand(0).getReg();
1306 Builder
.buildFConstant(DstReg
, *FPVal
);
1307 MI
.eraseFromParent();
1310 bool CombinerHelper::matchPtrAddImmedChain(MachineInstr
&MI
,
1311 PtrAddChain
&MatchInfo
) {
1312 // We're trying to match the following pattern:
1313 // %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1314 // %root = G_PTR_ADD %t1, G_CONSTANT imm2
1316 // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1318 if (MI
.getOpcode() != TargetOpcode::G_PTR_ADD
)
1321 Register Add2
= MI
.getOperand(1).getReg();
1322 Register Imm1
= MI
.getOperand(2).getReg();
1323 auto MaybeImmVal
= getIConstantVRegValWithLookThrough(Imm1
, MRI
);
1327 MachineInstr
*Add2Def
= MRI
.getVRegDef(Add2
);
1328 if (!Add2Def
|| Add2Def
->getOpcode() != TargetOpcode::G_PTR_ADD
)
1331 Register Base
= Add2Def
->getOperand(1).getReg();
1332 Register Imm2
= Add2Def
->getOperand(2).getReg();
1333 auto MaybeImm2Val
= getIConstantVRegValWithLookThrough(Imm2
, MRI
);
1337 // Check if the new combined immediate forms an illegal addressing mode.
1338 // Do not combine if it was legal before but would get illegal.
1339 // To do so, we need to find a load/store user of the pointer to get
1341 Type
*AccessTy
= nullptr;
1342 auto &MF
= *MI
.getMF();
1343 for (auto &UseMI
: MRI
.use_nodbg_instructions(MI
.getOperand(0).getReg())) {
1344 if (auto *LdSt
= dyn_cast
<GLoadStore
>(&UseMI
)) {
1345 AccessTy
= getTypeForLLT(MRI
.getType(LdSt
->getReg(0)),
1346 MF
.getFunction().getContext());
1350 TargetLoweringBase::AddrMode AMNew
;
1351 APInt CombinedImm
= MaybeImmVal
->Value
+ MaybeImm2Val
->Value
;
1352 AMNew
.BaseOffs
= CombinedImm
.getSExtValue();
1354 AMNew
.HasBaseReg
= true;
1355 TargetLoweringBase::AddrMode AMOld
;
1356 AMOld
.BaseOffs
= MaybeImm2Val
->Value
.getSExtValue();
1357 AMOld
.HasBaseReg
= true;
1358 unsigned AS
= MRI
.getType(Add2
).getAddressSpace();
1359 const auto &TLI
= *MF
.getSubtarget().getTargetLowering();
1360 if (TLI
.isLegalAddressingMode(MF
.getDataLayout(), AMOld
, AccessTy
, AS
) &&
1361 !TLI
.isLegalAddressingMode(MF
.getDataLayout(), AMNew
, AccessTy
, AS
))
1365 // Pass the combined immediate to the apply function.
1366 MatchInfo
.Imm
= AMNew
.BaseOffs
;
1367 MatchInfo
.Base
= Base
;
1368 MatchInfo
.Bank
= getRegBank(Imm2
);
1372 void CombinerHelper::applyPtrAddImmedChain(MachineInstr
&MI
,
1373 PtrAddChain
&MatchInfo
) {
1374 assert(MI
.getOpcode() == TargetOpcode::G_PTR_ADD
&& "Expected G_PTR_ADD");
1375 MachineIRBuilder
MIB(MI
);
1376 LLT OffsetTy
= MRI
.getType(MI
.getOperand(2).getReg());
1377 auto NewOffset
= MIB
.buildConstant(OffsetTy
, MatchInfo
.Imm
);
1378 setRegBank(NewOffset
.getReg(0), MatchInfo
.Bank
);
1379 Observer
.changingInstr(MI
);
1380 MI
.getOperand(1).setReg(MatchInfo
.Base
);
1381 MI
.getOperand(2).setReg(NewOffset
.getReg(0));
1382 Observer
.changedInstr(MI
);
1385 bool CombinerHelper::matchShiftImmedChain(MachineInstr
&MI
,
1386 RegisterImmPair
&MatchInfo
) {
1387 // We're trying to match the following pattern with any of
1388 // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
1389 // %t1 = SHIFT %base, G_CONSTANT imm1
1390 // %root = SHIFT %t1, G_CONSTANT imm2
1392 // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
1394 unsigned Opcode
= MI
.getOpcode();
1395 assert((Opcode
== TargetOpcode::G_SHL
|| Opcode
== TargetOpcode::G_ASHR
||
1396 Opcode
== TargetOpcode::G_LSHR
|| Opcode
== TargetOpcode::G_SSHLSAT
||
1397 Opcode
== TargetOpcode::G_USHLSAT
) &&
1398 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1400 Register Shl2
= MI
.getOperand(1).getReg();
1401 Register Imm1
= MI
.getOperand(2).getReg();
1402 auto MaybeImmVal
= getIConstantVRegValWithLookThrough(Imm1
, MRI
);
1406 MachineInstr
*Shl2Def
= MRI
.getUniqueVRegDef(Shl2
);
1407 if (Shl2Def
->getOpcode() != Opcode
)
1410 Register Base
= Shl2Def
->getOperand(1).getReg();
1411 Register Imm2
= Shl2Def
->getOperand(2).getReg();
1412 auto MaybeImm2Val
= getIConstantVRegValWithLookThrough(Imm2
, MRI
);
1416 // Pass the combined immediate to the apply function.
1418 (MaybeImmVal
->Value
.getSExtValue() + MaybeImm2Val
->Value
).getSExtValue();
1419 MatchInfo
.Reg
= Base
;
1421 // There is no simple replacement for a saturating unsigned left shift that
1422 // exceeds the scalar size.
1423 if (Opcode
== TargetOpcode::G_USHLSAT
&&
1424 MatchInfo
.Imm
>= MRI
.getType(Shl2
).getScalarSizeInBits())
1430 void CombinerHelper::applyShiftImmedChain(MachineInstr
&MI
,
1431 RegisterImmPair
&MatchInfo
) {
1432 unsigned Opcode
= MI
.getOpcode();
1433 assert((Opcode
== TargetOpcode::G_SHL
|| Opcode
== TargetOpcode::G_ASHR
||
1434 Opcode
== TargetOpcode::G_LSHR
|| Opcode
== TargetOpcode::G_SSHLSAT
||
1435 Opcode
== TargetOpcode::G_USHLSAT
) &&
1436 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1438 Builder
.setInstrAndDebugLoc(MI
);
1439 LLT Ty
= MRI
.getType(MI
.getOperand(1).getReg());
1440 unsigned const ScalarSizeInBits
= Ty
.getScalarSizeInBits();
1441 auto Imm
= MatchInfo
.Imm
;
1443 if (Imm
>= ScalarSizeInBits
) {
1444 // Any logical shift that exceeds scalar size will produce zero.
1445 if (Opcode
== TargetOpcode::G_SHL
|| Opcode
== TargetOpcode::G_LSHR
) {
1446 Builder
.buildConstant(MI
.getOperand(0), 0);
1447 MI
.eraseFromParent();
1450 // Arithmetic shift and saturating signed left shift have no effect beyond
1452 Imm
= ScalarSizeInBits
- 1;
1455 LLT ImmTy
= MRI
.getType(MI
.getOperand(2).getReg());
1456 Register NewImm
= Builder
.buildConstant(ImmTy
, Imm
).getReg(0);
1457 Observer
.changingInstr(MI
);
1458 MI
.getOperand(1).setReg(MatchInfo
.Reg
);
1459 MI
.getOperand(2).setReg(NewImm
);
1460 Observer
.changedInstr(MI
);
1463 bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr
&MI
,
1464 ShiftOfShiftedLogic
&MatchInfo
) {
1465 // We're trying to match the following pattern with any of
1466 // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
1467 // with any of G_AND/G_OR/G_XOR logic instructions.
1468 // %t1 = SHIFT %X, G_CONSTANT C0
1469 // %t2 = LOGIC %t1, %Y
1470 // %root = SHIFT %t2, G_CONSTANT C1
1472 // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
1473 // %t4 = SHIFT %Y, G_CONSTANT C1
1474 // %root = LOGIC %t3, %t4
1475 unsigned ShiftOpcode
= MI
.getOpcode();
1476 assert((ShiftOpcode
== TargetOpcode::G_SHL
||
1477 ShiftOpcode
== TargetOpcode::G_ASHR
||
1478 ShiftOpcode
== TargetOpcode::G_LSHR
||
1479 ShiftOpcode
== TargetOpcode::G_USHLSAT
||
1480 ShiftOpcode
== TargetOpcode::G_SSHLSAT
) &&
1481 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1483 // Match a one-use bitwise logic op.
1484 Register LogicDest
= MI
.getOperand(1).getReg();
1485 if (!MRI
.hasOneNonDBGUse(LogicDest
))
1488 MachineInstr
*LogicMI
= MRI
.getUniqueVRegDef(LogicDest
);
1489 unsigned LogicOpcode
= LogicMI
->getOpcode();
1490 if (LogicOpcode
!= TargetOpcode::G_AND
&& LogicOpcode
!= TargetOpcode::G_OR
&&
1491 LogicOpcode
!= TargetOpcode::G_XOR
)
1494 // Find a matching one-use shift by constant.
1495 const Register C1
= MI
.getOperand(2).getReg();
1496 auto MaybeImmVal
= getIConstantVRegValWithLookThrough(C1
, MRI
);
1500 const uint64_t C1Val
= MaybeImmVal
->Value
.getZExtValue();
1502 auto matchFirstShift
= [&](const MachineInstr
*MI
, uint64_t &ShiftVal
) {
1503 // Shift should match previous one and should be a one-use.
1504 if (MI
->getOpcode() != ShiftOpcode
||
1505 !MRI
.hasOneNonDBGUse(MI
->getOperand(0).getReg()))
1508 // Must be a constant.
1510 getIConstantVRegValWithLookThrough(MI
->getOperand(2).getReg(), MRI
);
1514 ShiftVal
= MaybeImmVal
->Value
.getSExtValue();
1518 // Logic ops are commutative, so check each operand for a match.
1519 Register LogicMIReg1
= LogicMI
->getOperand(1).getReg();
1520 MachineInstr
*LogicMIOp1
= MRI
.getUniqueVRegDef(LogicMIReg1
);
1521 Register LogicMIReg2
= LogicMI
->getOperand(2).getReg();
1522 MachineInstr
*LogicMIOp2
= MRI
.getUniqueVRegDef(LogicMIReg2
);
1525 if (matchFirstShift(LogicMIOp1
, C0Val
)) {
1526 MatchInfo
.LogicNonShiftReg
= LogicMIReg2
;
1527 MatchInfo
.Shift2
= LogicMIOp1
;
1528 } else if (matchFirstShift(LogicMIOp2
, C0Val
)) {
1529 MatchInfo
.LogicNonShiftReg
= LogicMIReg1
;
1530 MatchInfo
.Shift2
= LogicMIOp2
;
1534 MatchInfo
.ValSum
= C0Val
+ C1Val
;
1536 // The fold is not valid if the sum of the shift values exceeds bitwidth.
1537 if (MatchInfo
.ValSum
>= MRI
.getType(LogicDest
).getScalarSizeInBits())
1540 MatchInfo
.Logic
= LogicMI
;
1544 void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr
&MI
,
1545 ShiftOfShiftedLogic
&MatchInfo
) {
1546 unsigned Opcode
= MI
.getOpcode();
1547 assert((Opcode
== TargetOpcode::G_SHL
|| Opcode
== TargetOpcode::G_ASHR
||
1548 Opcode
== TargetOpcode::G_LSHR
|| Opcode
== TargetOpcode::G_USHLSAT
||
1549 Opcode
== TargetOpcode::G_SSHLSAT
) &&
1550 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1552 LLT ShlType
= MRI
.getType(MI
.getOperand(2).getReg());
1553 LLT DestType
= MRI
.getType(MI
.getOperand(0).getReg());
1554 Builder
.setInstrAndDebugLoc(MI
);
1556 Register Const
= Builder
.buildConstant(ShlType
, MatchInfo
.ValSum
).getReg(0);
1558 Register Shift1Base
= MatchInfo
.Shift2
->getOperand(1).getReg();
1560 Builder
.buildInstr(Opcode
, {DestType
}, {Shift1Base
, Const
}).getReg(0);
1562 Register Shift2Const
= MI
.getOperand(2).getReg();
1563 Register Shift2
= Builder
1564 .buildInstr(Opcode
, {DestType
},
1565 {MatchInfo
.LogicNonShiftReg
, Shift2Const
})
1568 Register Dest
= MI
.getOperand(0).getReg();
1569 Builder
.buildInstr(MatchInfo
.Logic
->getOpcode(), {Dest
}, {Shift1
, Shift2
});
1571 // These were one use so it's safe to remove them.
1572 MatchInfo
.Shift2
->eraseFromParent();
1573 MatchInfo
.Logic
->eraseFromParent();
1575 MI
.eraseFromParent();
1578 bool CombinerHelper::matchCombineMulToShl(MachineInstr
&MI
,
1579 unsigned &ShiftVal
) {
1580 assert(MI
.getOpcode() == TargetOpcode::G_MUL
&& "Expected a G_MUL");
1582 getIConstantVRegValWithLookThrough(MI
.getOperand(2).getReg(), MRI
);
1586 ShiftVal
= MaybeImmVal
->Value
.exactLogBase2();
1587 return (static_cast<int32_t>(ShiftVal
) != -1);
1590 void CombinerHelper::applyCombineMulToShl(MachineInstr
&MI
,
1591 unsigned &ShiftVal
) {
1592 assert(MI
.getOpcode() == TargetOpcode::G_MUL
&& "Expected a G_MUL");
1593 MachineIRBuilder
MIB(MI
);
1594 LLT ShiftTy
= MRI
.getType(MI
.getOperand(0).getReg());
1595 auto ShiftCst
= MIB
.buildConstant(ShiftTy
, ShiftVal
);
1596 Observer
.changingInstr(MI
);
1597 MI
.setDesc(MIB
.getTII().get(TargetOpcode::G_SHL
));
1598 MI
.getOperand(2).setReg(ShiftCst
.getReg(0));
1599 Observer
.changedInstr(MI
);
1602 // shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
1603 bool CombinerHelper::matchCombineShlOfExtend(MachineInstr
&MI
,
1604 RegisterImmPair
&MatchData
) {
1605 assert(MI
.getOpcode() == TargetOpcode::G_SHL
&& KB
);
1607 Register LHS
= MI
.getOperand(1).getReg();
1610 if (!mi_match(LHS
, MRI
, m_GAnyExt(m_Reg(ExtSrc
))) &&
1611 !mi_match(LHS
, MRI
, m_GZExt(m_Reg(ExtSrc
))) &&
1612 !mi_match(LHS
, MRI
, m_GSExt(m_Reg(ExtSrc
))))
1615 // TODO: Should handle vector splat.
1616 Register RHS
= MI
.getOperand(2).getReg();
1617 auto MaybeShiftAmtVal
= getIConstantVRegValWithLookThrough(RHS
, MRI
);
1618 if (!MaybeShiftAmtVal
)
1622 LLT SrcTy
= MRI
.getType(ExtSrc
);
1624 // We only really care about the legality with the shifted value. We can
1625 // pick any type the constant shift amount, so ask the target what to
1626 // use. Otherwise we would have to guess and hope it is reported as legal.
1627 LLT ShiftAmtTy
= getTargetLowering().getPreferredShiftAmountTy(SrcTy
);
1628 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL
, {SrcTy
, ShiftAmtTy
}}))
1632 int64_t ShiftAmt
= MaybeShiftAmtVal
->Value
.getSExtValue();
1633 MatchData
.Reg
= ExtSrc
;
1634 MatchData
.Imm
= ShiftAmt
;
1636 unsigned MinLeadingZeros
= KB
->getKnownZeroes(ExtSrc
).countLeadingOnes();
1637 return MinLeadingZeros
>= ShiftAmt
;
1640 void CombinerHelper::applyCombineShlOfExtend(MachineInstr
&MI
,
1641 const RegisterImmPair
&MatchData
) {
1642 Register ExtSrcReg
= MatchData
.Reg
;
1643 int64_t ShiftAmtVal
= MatchData
.Imm
;
1645 LLT ExtSrcTy
= MRI
.getType(ExtSrcReg
);
1646 Builder
.setInstrAndDebugLoc(MI
);
1647 auto ShiftAmt
= Builder
.buildConstant(ExtSrcTy
, ShiftAmtVal
);
1649 Builder
.buildShl(ExtSrcTy
, ExtSrcReg
, ShiftAmt
, MI
.getFlags());
1650 Builder
.buildZExt(MI
.getOperand(0), NarrowShift
);
1651 MI
.eraseFromParent();
1654 bool CombinerHelper::matchCombineMergeUnmerge(MachineInstr
&MI
,
1655 Register
&MatchInfo
) {
1656 GMerge
&Merge
= cast
<GMerge
>(MI
);
1657 SmallVector
<Register
, 16> MergedValues
;
1658 for (unsigned I
= 0; I
< Merge
.getNumSources(); ++I
)
1659 MergedValues
.emplace_back(Merge
.getSourceReg(I
));
1661 auto *Unmerge
= getOpcodeDef
<GUnmerge
>(MergedValues
[0], MRI
);
1662 if (!Unmerge
|| Unmerge
->getNumDefs() != Merge
.getNumSources())
1665 for (unsigned I
= 0; I
< MergedValues
.size(); ++I
)
1666 if (MergedValues
[I
] != Unmerge
->getReg(I
))
1669 MatchInfo
= Unmerge
->getSourceReg();
1673 static Register
peekThroughBitcast(Register Reg
,
1674 const MachineRegisterInfo
&MRI
) {
1675 while (mi_match(Reg
, MRI
, m_GBitcast(m_Reg(Reg
))))
1681 bool CombinerHelper::matchCombineUnmergeMergeToPlainValues(
1682 MachineInstr
&MI
, SmallVectorImpl
<Register
> &Operands
) {
1683 assert(MI
.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
&&
1684 "Expected an unmerge");
1685 auto &Unmerge
= cast
<GUnmerge
>(MI
);
1686 Register SrcReg
= peekThroughBitcast(Unmerge
.getSourceReg(), MRI
);
1688 auto *SrcInstr
= getOpcodeDef
<GMergeLikeOp
>(SrcReg
, MRI
);
1692 // Check the source type of the merge.
1693 LLT SrcMergeTy
= MRI
.getType(SrcInstr
->getSourceReg(0));
1694 LLT Dst0Ty
= MRI
.getType(Unmerge
.getReg(0));
1695 bool SameSize
= Dst0Ty
.getSizeInBits() == SrcMergeTy
.getSizeInBits();
1696 if (SrcMergeTy
!= Dst0Ty
&& !SameSize
)
1698 // They are the same now (modulo a bitcast).
1699 // We can collect all the src registers.
1700 for (unsigned Idx
= 0; Idx
< SrcInstr
->getNumSources(); ++Idx
)
1701 Operands
.push_back(SrcInstr
->getSourceReg(Idx
));
1705 void CombinerHelper::applyCombineUnmergeMergeToPlainValues(
1706 MachineInstr
&MI
, SmallVectorImpl
<Register
> &Operands
) {
1707 assert(MI
.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
&&
1708 "Expected an unmerge");
1709 assert((MI
.getNumOperands() - 1 == Operands
.size()) &&
1710 "Not enough operands to replace all defs");
1711 unsigned NumElems
= MI
.getNumOperands() - 1;
1713 LLT SrcTy
= MRI
.getType(Operands
[0]);
1714 LLT DstTy
= MRI
.getType(MI
.getOperand(0).getReg());
1715 bool CanReuseInputDirectly
= DstTy
== SrcTy
;
1716 Builder
.setInstrAndDebugLoc(MI
);
1717 for (unsigned Idx
= 0; Idx
< NumElems
; ++Idx
) {
1718 Register DstReg
= MI
.getOperand(Idx
).getReg();
1719 Register SrcReg
= Operands
[Idx
];
1720 if (CanReuseInputDirectly
)
1721 replaceRegWith(MRI
, DstReg
, SrcReg
);
1723 Builder
.buildCast(DstReg
, SrcReg
);
1725 MI
.eraseFromParent();
1728 bool CombinerHelper::matchCombineUnmergeConstant(MachineInstr
&MI
,
1729 SmallVectorImpl
<APInt
> &Csts
) {
1730 unsigned SrcIdx
= MI
.getNumOperands() - 1;
1731 Register SrcReg
= MI
.getOperand(SrcIdx
).getReg();
1732 MachineInstr
*SrcInstr
= MRI
.getVRegDef(SrcReg
);
1733 if (SrcInstr
->getOpcode() != TargetOpcode::G_CONSTANT
&&
1734 SrcInstr
->getOpcode() != TargetOpcode::G_FCONSTANT
)
1736 // Break down the big constant in smaller ones.
1737 const MachineOperand
&CstVal
= SrcInstr
->getOperand(1);
1738 APInt Val
= SrcInstr
->getOpcode() == TargetOpcode::G_CONSTANT
1739 ? CstVal
.getCImm()->getValue()
1740 : CstVal
.getFPImm()->getValueAPF().bitcastToAPInt();
1742 LLT Dst0Ty
= MRI
.getType(MI
.getOperand(0).getReg());
1743 unsigned ShiftAmt
= Dst0Ty
.getSizeInBits();
1744 // Unmerge a constant.
1745 for (unsigned Idx
= 0; Idx
!= SrcIdx
; ++Idx
) {
1746 Csts
.emplace_back(Val
.trunc(ShiftAmt
));
1747 Val
= Val
.lshr(ShiftAmt
);
1753 void CombinerHelper::applyCombineUnmergeConstant(MachineInstr
&MI
,
1754 SmallVectorImpl
<APInt
> &Csts
) {
1755 assert(MI
.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
&&
1756 "Expected an unmerge");
1757 assert((MI
.getNumOperands() - 1 == Csts
.size()) &&
1758 "Not enough operands to replace all defs");
1759 unsigned NumElems
= MI
.getNumOperands() - 1;
1760 Builder
.setInstrAndDebugLoc(MI
);
1761 for (unsigned Idx
= 0; Idx
< NumElems
; ++Idx
) {
1762 Register DstReg
= MI
.getOperand(Idx
).getReg();
1763 Builder
.buildConstant(DstReg
, Csts
[Idx
]);
1766 MI
.eraseFromParent();
1769 bool CombinerHelper::matchCombineUnmergeUndef(
1770 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
1771 unsigned SrcIdx
= MI
.getNumOperands() - 1;
1772 Register SrcReg
= MI
.getOperand(SrcIdx
).getReg();
1773 MatchInfo
= [&MI
](MachineIRBuilder
&B
) {
1774 unsigned NumElems
= MI
.getNumOperands() - 1;
1775 for (unsigned Idx
= 0; Idx
< NumElems
; ++Idx
) {
1776 Register DstReg
= MI
.getOperand(Idx
).getReg();
1777 B
.buildUndef(DstReg
);
1780 return isa
<GImplicitDef
>(MRI
.getVRegDef(SrcReg
));
1783 bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr
&MI
) {
1784 assert(MI
.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
&&
1785 "Expected an unmerge");
1786 // Check that all the lanes are dead except the first one.
1787 for (unsigned Idx
= 1, EndIdx
= MI
.getNumDefs(); Idx
!= EndIdx
; ++Idx
) {
1788 if (!MRI
.use_nodbg_empty(MI
.getOperand(Idx
).getReg()))
1794 void CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr
&MI
) {
1795 Builder
.setInstrAndDebugLoc(MI
);
1796 Register SrcReg
= MI
.getOperand(MI
.getNumDefs()).getReg();
1797 // Truncating a vector is going to truncate every single lane,
1798 // whereas we want the full lowbits.
1799 // Do the operation on a scalar instead.
1800 LLT SrcTy
= MRI
.getType(SrcReg
);
1801 if (SrcTy
.isVector())
1803 Builder
.buildCast(LLT::scalar(SrcTy
.getSizeInBits()), SrcReg
).getReg(0);
1805 Register Dst0Reg
= MI
.getOperand(0).getReg();
1806 LLT Dst0Ty
= MRI
.getType(Dst0Reg
);
1807 if (Dst0Ty
.isVector()) {
1808 auto MIB
= Builder
.buildTrunc(LLT::scalar(Dst0Ty
.getSizeInBits()), SrcReg
);
1809 Builder
.buildCast(Dst0Reg
, MIB
);
1811 Builder
.buildTrunc(Dst0Reg
, SrcReg
);
1812 MI
.eraseFromParent();
1815 bool CombinerHelper::matchCombineUnmergeZExtToZExt(MachineInstr
&MI
) {
1816 assert(MI
.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
&&
1817 "Expected an unmerge");
1818 Register Dst0Reg
= MI
.getOperand(0).getReg();
1819 LLT Dst0Ty
= MRI
.getType(Dst0Reg
);
1820 // G_ZEXT on vector applies to each lane, so it will
1821 // affect all destinations. Therefore we won't be able
1822 // to simplify the unmerge to just the first definition.
1823 if (Dst0Ty
.isVector())
1825 Register SrcReg
= MI
.getOperand(MI
.getNumDefs()).getReg();
1826 LLT SrcTy
= MRI
.getType(SrcReg
);
1827 if (SrcTy
.isVector())
1830 Register ZExtSrcReg
;
1831 if (!mi_match(SrcReg
, MRI
, m_GZExt(m_Reg(ZExtSrcReg
))))
1834 // Finally we can replace the first definition with
1835 // a zext of the source if the definition is big enough to hold
1836 // all of ZExtSrc bits.
1837 LLT ZExtSrcTy
= MRI
.getType(ZExtSrcReg
);
1838 return ZExtSrcTy
.getSizeInBits() <= Dst0Ty
.getSizeInBits();
1841 void CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr
&MI
) {
1842 assert(MI
.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
&&
1843 "Expected an unmerge");
1845 Register Dst0Reg
= MI
.getOperand(0).getReg();
1847 MachineInstr
*ZExtInstr
=
1848 MRI
.getVRegDef(MI
.getOperand(MI
.getNumDefs()).getReg());
1849 assert(ZExtInstr
&& ZExtInstr
->getOpcode() == TargetOpcode::G_ZEXT
&&
1850 "Expecting a G_ZEXT");
1852 Register ZExtSrcReg
= ZExtInstr
->getOperand(1).getReg();
1853 LLT Dst0Ty
= MRI
.getType(Dst0Reg
);
1854 LLT ZExtSrcTy
= MRI
.getType(ZExtSrcReg
);
1856 Builder
.setInstrAndDebugLoc(MI
);
1858 if (Dst0Ty
.getSizeInBits() > ZExtSrcTy
.getSizeInBits()) {
1859 Builder
.buildZExt(Dst0Reg
, ZExtSrcReg
);
1861 assert(Dst0Ty
.getSizeInBits() == ZExtSrcTy
.getSizeInBits() &&
1862 "ZExt src doesn't fit in destination");
1863 replaceRegWith(MRI
, Dst0Reg
, ZExtSrcReg
);
1867 for (unsigned Idx
= 1, EndIdx
= MI
.getNumDefs(); Idx
!= EndIdx
; ++Idx
) {
1869 ZeroReg
= Builder
.buildConstant(Dst0Ty
, 0).getReg(0);
1870 replaceRegWith(MRI
, MI
.getOperand(Idx
).getReg(), ZeroReg
);
1872 MI
.eraseFromParent();
1875 bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr
&MI
,
1876 unsigned TargetShiftSize
,
1877 unsigned &ShiftVal
) {
1878 assert((MI
.getOpcode() == TargetOpcode::G_SHL
||
1879 MI
.getOpcode() == TargetOpcode::G_LSHR
||
1880 MI
.getOpcode() == TargetOpcode::G_ASHR
) && "Expected a shift");
1882 LLT Ty
= MRI
.getType(MI
.getOperand(0).getReg());
1883 if (Ty
.isVector()) // TODO:
1886 // Don't narrow further than the requested size.
1887 unsigned Size
= Ty
.getSizeInBits();
1888 if (Size
<= TargetShiftSize
)
1892 getIConstantVRegValWithLookThrough(MI
.getOperand(2).getReg(), MRI
);
1896 ShiftVal
= MaybeImmVal
->Value
.getSExtValue();
1897 return ShiftVal
>= Size
/ 2 && ShiftVal
< Size
;
1900 void CombinerHelper::applyCombineShiftToUnmerge(MachineInstr
&MI
,
1901 const unsigned &ShiftVal
) {
1902 Register DstReg
= MI
.getOperand(0).getReg();
1903 Register SrcReg
= MI
.getOperand(1).getReg();
1904 LLT Ty
= MRI
.getType(SrcReg
);
1905 unsigned Size
= Ty
.getSizeInBits();
1906 unsigned HalfSize
= Size
/ 2;
1907 assert(ShiftVal
>= HalfSize
);
1909 LLT HalfTy
= LLT::scalar(HalfSize
);
1911 Builder
.setInstr(MI
);
1912 auto Unmerge
= Builder
.buildUnmerge(HalfTy
, SrcReg
);
1913 unsigned NarrowShiftAmt
= ShiftVal
- HalfSize
;
1915 if (MI
.getOpcode() == TargetOpcode::G_LSHR
) {
1916 Register Narrowed
= Unmerge
.getReg(1);
1918 // dst = G_LSHR s64:x, C for C >= 32
1920 // lo, hi = G_UNMERGE_VALUES x
1921 // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
1923 if (NarrowShiftAmt
!= 0) {
1924 Narrowed
= Builder
.buildLShr(HalfTy
, Narrowed
,
1925 Builder
.buildConstant(HalfTy
, NarrowShiftAmt
)).getReg(0);
1928 auto Zero
= Builder
.buildConstant(HalfTy
, 0);
1929 Builder
.buildMerge(DstReg
, { Narrowed
, Zero
});
1930 } else if (MI
.getOpcode() == TargetOpcode::G_SHL
) {
1931 Register Narrowed
= Unmerge
.getReg(0);
1932 // dst = G_SHL s64:x, C for C >= 32
1934 // lo, hi = G_UNMERGE_VALUES x
1935 // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
1936 if (NarrowShiftAmt
!= 0) {
1937 Narrowed
= Builder
.buildShl(HalfTy
, Narrowed
,
1938 Builder
.buildConstant(HalfTy
, NarrowShiftAmt
)).getReg(0);
1941 auto Zero
= Builder
.buildConstant(HalfTy
, 0);
1942 Builder
.buildMerge(DstReg
, { Zero
, Narrowed
});
1944 assert(MI
.getOpcode() == TargetOpcode::G_ASHR
);
1945 auto Hi
= Builder
.buildAShr(
1946 HalfTy
, Unmerge
.getReg(1),
1947 Builder
.buildConstant(HalfTy
, HalfSize
- 1));
1949 if (ShiftVal
== HalfSize
) {
1950 // (G_ASHR i64:x, 32) ->
1951 // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
1952 Builder
.buildMerge(DstReg
, { Unmerge
.getReg(1), Hi
});
1953 } else if (ShiftVal
== Size
- 1) {
1954 // Don't need a second shift.
1955 // (G_ASHR i64:x, 63) ->
1956 // %narrowed = (G_ASHR hi_32(x), 31)
1957 // G_MERGE_VALUES %narrowed, %narrowed
1958 Builder
.buildMerge(DstReg
, { Hi
, Hi
});
1960 auto Lo
= Builder
.buildAShr(
1961 HalfTy
, Unmerge
.getReg(1),
1962 Builder
.buildConstant(HalfTy
, ShiftVal
- HalfSize
));
1964 // (G_ASHR i64:x, C) ->, for C >= 32
1965 // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
1966 Builder
.buildMerge(DstReg
, { Lo
, Hi
});
1970 MI
.eraseFromParent();
1973 bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr
&MI
,
1974 unsigned TargetShiftAmount
) {
1976 if (matchCombineShiftToUnmerge(MI
, TargetShiftAmount
, ShiftAmt
)) {
1977 applyCombineShiftToUnmerge(MI
, ShiftAmt
);
1984 bool CombinerHelper::matchCombineI2PToP2I(MachineInstr
&MI
, Register
&Reg
) {
1985 assert(MI
.getOpcode() == TargetOpcode::G_INTTOPTR
&& "Expected a G_INTTOPTR");
1986 Register DstReg
= MI
.getOperand(0).getReg();
1987 LLT DstTy
= MRI
.getType(DstReg
);
1988 Register SrcReg
= MI
.getOperand(1).getReg();
1989 return mi_match(SrcReg
, MRI
,
1990 m_GPtrToInt(m_all_of(m_SpecificType(DstTy
), m_Reg(Reg
))));
1993 void CombinerHelper::applyCombineI2PToP2I(MachineInstr
&MI
, Register
&Reg
) {
1994 assert(MI
.getOpcode() == TargetOpcode::G_INTTOPTR
&& "Expected a G_INTTOPTR");
1995 Register DstReg
= MI
.getOperand(0).getReg();
1996 Builder
.setInstr(MI
);
1997 Builder
.buildCopy(DstReg
, Reg
);
1998 MI
.eraseFromParent();
2001 bool CombinerHelper::matchCombineP2IToI2P(MachineInstr
&MI
, Register
&Reg
) {
2002 assert(MI
.getOpcode() == TargetOpcode::G_PTRTOINT
&& "Expected a G_PTRTOINT");
2003 Register SrcReg
= MI
.getOperand(1).getReg();
2004 return mi_match(SrcReg
, MRI
, m_GIntToPtr(m_Reg(Reg
)));
2007 void CombinerHelper::applyCombineP2IToI2P(MachineInstr
&MI
, Register
&Reg
) {
2008 assert(MI
.getOpcode() == TargetOpcode::G_PTRTOINT
&& "Expected a G_PTRTOINT");
2009 Register DstReg
= MI
.getOperand(0).getReg();
2010 Builder
.setInstr(MI
);
2011 Builder
.buildZExtOrTrunc(DstReg
, Reg
);
2012 MI
.eraseFromParent();
2015 bool CombinerHelper::matchCombineAddP2IToPtrAdd(
2016 MachineInstr
&MI
, std::pair
<Register
, bool> &PtrReg
) {
2017 assert(MI
.getOpcode() == TargetOpcode::G_ADD
);
2018 Register LHS
= MI
.getOperand(1).getReg();
2019 Register RHS
= MI
.getOperand(2).getReg();
2020 LLT IntTy
= MRI
.getType(LHS
);
2022 // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
2024 PtrReg
.second
= false;
2025 for (Register SrcReg
: {LHS
, RHS
}) {
2026 if (mi_match(SrcReg
, MRI
, m_GPtrToInt(m_Reg(PtrReg
.first
)))) {
2027 // Don't handle cases where the integer is implicitly converted to the
2029 LLT PtrTy
= MRI
.getType(PtrReg
.first
);
2030 if (PtrTy
.getScalarSizeInBits() == IntTy
.getScalarSizeInBits())
2034 PtrReg
.second
= true;
2040 void CombinerHelper::applyCombineAddP2IToPtrAdd(
2041 MachineInstr
&MI
, std::pair
<Register
, bool> &PtrReg
) {
2042 Register Dst
= MI
.getOperand(0).getReg();
2043 Register LHS
= MI
.getOperand(1).getReg();
2044 Register RHS
= MI
.getOperand(2).getReg();
2046 const bool DoCommute
= PtrReg
.second
;
2048 std::swap(LHS
, RHS
);
2051 LLT PtrTy
= MRI
.getType(LHS
);
2053 Builder
.setInstrAndDebugLoc(MI
);
2054 auto PtrAdd
= Builder
.buildPtrAdd(PtrTy
, LHS
, RHS
);
2055 Builder
.buildPtrToInt(Dst
, PtrAdd
);
2056 MI
.eraseFromParent();
2059 bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr
&MI
,
2061 auto &PtrAdd
= cast
<GPtrAdd
>(MI
);
2062 Register LHS
= PtrAdd
.getBaseReg();
2063 Register RHS
= PtrAdd
.getOffsetReg();
2064 MachineRegisterInfo
&MRI
= Builder
.getMF().getRegInfo();
2066 if (auto RHSCst
= getIConstantVRegVal(RHS
, MRI
)) {
2068 if (mi_match(LHS
, MRI
, m_GIntToPtr(m_ICst(Cst
)))) {
2069 auto DstTy
= MRI
.getType(PtrAdd
.getReg(0));
2070 // G_INTTOPTR uses zero-extension
2071 NewCst
= Cst
.zextOrTrunc(DstTy
.getSizeInBits());
2072 NewCst
+= RHSCst
->sextOrTrunc(DstTy
.getSizeInBits());
2080 void CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr
&MI
,
2082 auto &PtrAdd
= cast
<GPtrAdd
>(MI
);
2083 Register Dst
= PtrAdd
.getReg(0);
2085 Builder
.setInstrAndDebugLoc(MI
);
2086 Builder
.buildConstant(Dst
, NewCst
);
2087 PtrAdd
.eraseFromParent();
2090 bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr
&MI
, Register
&Reg
) {
2091 assert(MI
.getOpcode() == TargetOpcode::G_ANYEXT
&& "Expected a G_ANYEXT");
2092 Register DstReg
= MI
.getOperand(0).getReg();
2093 Register SrcReg
= MI
.getOperand(1).getReg();
2094 LLT DstTy
= MRI
.getType(DstReg
);
2095 return mi_match(SrcReg
, MRI
,
2096 m_GTrunc(m_all_of(m_Reg(Reg
), m_SpecificType(DstTy
))));
2099 bool CombinerHelper::matchCombineZextTrunc(MachineInstr
&MI
, Register
&Reg
) {
2100 assert(MI
.getOpcode() == TargetOpcode::G_ZEXT
&& "Expected a G_ZEXT");
2101 Register DstReg
= MI
.getOperand(0).getReg();
2102 Register SrcReg
= MI
.getOperand(1).getReg();
2103 LLT DstTy
= MRI
.getType(DstReg
);
2104 if (mi_match(SrcReg
, MRI
,
2105 m_GTrunc(m_all_of(m_Reg(Reg
), m_SpecificType(DstTy
))))) {
2106 unsigned DstSize
= DstTy
.getScalarSizeInBits();
2107 unsigned SrcSize
= MRI
.getType(SrcReg
).getScalarSizeInBits();
2108 return KB
->getKnownBits(Reg
).countMinLeadingZeros() >= DstSize
- SrcSize
;
2113 bool CombinerHelper::matchCombineExtOfExt(
2114 MachineInstr
&MI
, std::tuple
<Register
, unsigned> &MatchInfo
) {
2115 assert((MI
.getOpcode() == TargetOpcode::G_ANYEXT
||
2116 MI
.getOpcode() == TargetOpcode::G_SEXT
||
2117 MI
.getOpcode() == TargetOpcode::G_ZEXT
) &&
2118 "Expected a G_[ASZ]EXT");
2119 Register SrcReg
= MI
.getOperand(1).getReg();
2120 MachineInstr
*SrcMI
= MRI
.getVRegDef(SrcReg
);
2121 // Match exts with the same opcode, anyext([sz]ext) and sext(zext).
2122 unsigned Opc
= MI
.getOpcode();
2123 unsigned SrcOpc
= SrcMI
->getOpcode();
2124 if (Opc
== SrcOpc
||
2125 (Opc
== TargetOpcode::G_ANYEXT
&&
2126 (SrcOpc
== TargetOpcode::G_SEXT
|| SrcOpc
== TargetOpcode::G_ZEXT
)) ||
2127 (Opc
== TargetOpcode::G_SEXT
&& SrcOpc
== TargetOpcode::G_ZEXT
)) {
2128 MatchInfo
= std::make_tuple(SrcMI
->getOperand(1).getReg(), SrcOpc
);
2134 void CombinerHelper::applyCombineExtOfExt(
2135 MachineInstr
&MI
, std::tuple
<Register
, unsigned> &MatchInfo
) {
2136 assert((MI
.getOpcode() == TargetOpcode::G_ANYEXT
||
2137 MI
.getOpcode() == TargetOpcode::G_SEXT
||
2138 MI
.getOpcode() == TargetOpcode::G_ZEXT
) &&
2139 "Expected a G_[ASZ]EXT");
2141 Register Reg
= std::get
<0>(MatchInfo
);
2142 unsigned SrcExtOp
= std::get
<1>(MatchInfo
);
2144 // Combine exts with the same opcode.
2145 if (MI
.getOpcode() == SrcExtOp
) {
2146 Observer
.changingInstr(MI
);
2147 MI
.getOperand(1).setReg(Reg
);
2148 Observer
.changedInstr(MI
);
2153 // - anyext([sz]ext x) to [sz]ext x
2154 // - sext(zext x) to zext x
2155 if (MI
.getOpcode() == TargetOpcode::G_ANYEXT
||
2156 (MI
.getOpcode() == TargetOpcode::G_SEXT
&&
2157 SrcExtOp
== TargetOpcode::G_ZEXT
)) {
2158 Register DstReg
= MI
.getOperand(0).getReg();
2159 Builder
.setInstrAndDebugLoc(MI
);
2160 Builder
.buildInstr(SrcExtOp
, {DstReg
}, {Reg
});
2161 MI
.eraseFromParent();
2165 void CombinerHelper::applyCombineMulByNegativeOne(MachineInstr
&MI
) {
2166 assert(MI
.getOpcode() == TargetOpcode::G_MUL
&& "Expected a G_MUL");
2167 Register DstReg
= MI
.getOperand(0).getReg();
2168 Register SrcReg
= MI
.getOperand(1).getReg();
2169 LLT DstTy
= MRI
.getType(DstReg
);
2171 Builder
.setInstrAndDebugLoc(MI
);
2172 Builder
.buildSub(DstReg
, Builder
.buildConstant(DstTy
, 0), SrcReg
,
2174 MI
.eraseFromParent();
2177 bool CombinerHelper::matchCombineFNegOfFNeg(MachineInstr
&MI
, Register
&Reg
) {
2178 assert(MI
.getOpcode() == TargetOpcode::G_FNEG
&& "Expected a G_FNEG");
2179 Register SrcReg
= MI
.getOperand(1).getReg();
2180 return mi_match(SrcReg
, MRI
, m_GFNeg(m_Reg(Reg
)));
2183 bool CombinerHelper::matchCombineFAbsOfFAbs(MachineInstr
&MI
, Register
&Src
) {
2184 assert(MI
.getOpcode() == TargetOpcode::G_FABS
&& "Expected a G_FABS");
2185 Src
= MI
.getOperand(1).getReg();
2187 return mi_match(Src
, MRI
, m_GFabs(m_Reg(AbsSrc
)));
2190 bool CombinerHelper::matchCombineFAbsOfFNeg(MachineInstr
&MI
,
2191 BuildFnTy
&MatchInfo
) {
2192 assert(MI
.getOpcode() == TargetOpcode::G_FABS
&& "Expected a G_FABS");
2193 Register Src
= MI
.getOperand(1).getReg();
2196 if (!mi_match(Src
, MRI
, m_GFNeg(m_Reg(NegSrc
))))
2199 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
2200 Observer
.changingInstr(MI
);
2201 MI
.getOperand(1).setReg(NegSrc
);
2202 Observer
.changedInstr(MI
);
2207 bool CombinerHelper::matchCombineTruncOfExt(
2208 MachineInstr
&MI
, std::pair
<Register
, unsigned> &MatchInfo
) {
2209 assert(MI
.getOpcode() == TargetOpcode::G_TRUNC
&& "Expected a G_TRUNC");
2210 Register SrcReg
= MI
.getOperand(1).getReg();
2211 MachineInstr
*SrcMI
= MRI
.getVRegDef(SrcReg
);
2212 unsigned SrcOpc
= SrcMI
->getOpcode();
2213 if (SrcOpc
== TargetOpcode::G_ANYEXT
|| SrcOpc
== TargetOpcode::G_SEXT
||
2214 SrcOpc
== TargetOpcode::G_ZEXT
) {
2215 MatchInfo
= std::make_pair(SrcMI
->getOperand(1).getReg(), SrcOpc
);
2221 void CombinerHelper::applyCombineTruncOfExt(
2222 MachineInstr
&MI
, std::pair
<Register
, unsigned> &MatchInfo
) {
2223 assert(MI
.getOpcode() == TargetOpcode::G_TRUNC
&& "Expected a G_TRUNC");
2224 Register SrcReg
= MatchInfo
.first
;
2225 unsigned SrcExtOp
= MatchInfo
.second
;
2226 Register DstReg
= MI
.getOperand(0).getReg();
2227 LLT SrcTy
= MRI
.getType(SrcReg
);
2228 LLT DstTy
= MRI
.getType(DstReg
);
2229 if (SrcTy
== DstTy
) {
2230 MI
.eraseFromParent();
2231 replaceRegWith(MRI
, DstReg
, SrcReg
);
2234 Builder
.setInstrAndDebugLoc(MI
);
2235 if (SrcTy
.getSizeInBits() < DstTy
.getSizeInBits())
2236 Builder
.buildInstr(SrcExtOp
, {DstReg
}, {SrcReg
});
2238 Builder
.buildTrunc(DstReg
, SrcReg
);
2239 MI
.eraseFromParent();
2242 bool CombinerHelper::matchCombineTruncOfShl(
2243 MachineInstr
&MI
, std::pair
<Register
, Register
> &MatchInfo
) {
2244 assert(MI
.getOpcode() == TargetOpcode::G_TRUNC
&& "Expected a G_TRUNC");
2245 Register DstReg
= MI
.getOperand(0).getReg();
2246 Register SrcReg
= MI
.getOperand(1).getReg();
2247 LLT DstTy
= MRI
.getType(DstReg
);
2251 if (MRI
.hasOneNonDBGUse(SrcReg
) &&
2252 mi_match(SrcReg
, MRI
, m_GShl(m_Reg(ShiftSrc
), m_Reg(ShiftAmt
))) &&
2253 isLegalOrBeforeLegalizer(
2254 {TargetOpcode::G_SHL
,
2255 {DstTy
, getTargetLowering().getPreferredShiftAmountTy(DstTy
)}})) {
2256 KnownBits Known
= KB
->getKnownBits(ShiftAmt
);
2257 unsigned Size
= DstTy
.getSizeInBits();
2258 if (Known
.countMaxActiveBits() <= Log2_32(Size
)) {
2259 MatchInfo
= std::make_pair(ShiftSrc
, ShiftAmt
);
2266 void CombinerHelper::applyCombineTruncOfShl(
2267 MachineInstr
&MI
, std::pair
<Register
, Register
> &MatchInfo
) {
2268 assert(MI
.getOpcode() == TargetOpcode::G_TRUNC
&& "Expected a G_TRUNC");
2269 Register DstReg
= MI
.getOperand(0).getReg();
2270 Register SrcReg
= MI
.getOperand(1).getReg();
2271 LLT DstTy
= MRI
.getType(DstReg
);
2272 MachineInstr
*SrcMI
= MRI
.getVRegDef(SrcReg
);
2274 Register ShiftSrc
= MatchInfo
.first
;
2275 Register ShiftAmt
= MatchInfo
.second
;
2276 Builder
.setInstrAndDebugLoc(MI
);
2277 auto TruncShiftSrc
= Builder
.buildTrunc(DstTy
, ShiftSrc
);
2278 Builder
.buildShl(DstReg
, TruncShiftSrc
, ShiftAmt
, SrcMI
->getFlags());
2279 MI
.eraseFromParent();
2282 bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr
&MI
) {
2283 return any_of(MI
.explicit_uses(), [this](const MachineOperand
&MO
) {
2284 return MO
.isReg() &&
2285 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF
, MO
.getReg(), MRI
);
2289 bool CombinerHelper::matchAllExplicitUsesAreUndef(MachineInstr
&MI
) {
2290 return all_of(MI
.explicit_uses(), [this](const MachineOperand
&MO
) {
2291 return !MO
.isReg() ||
2292 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF
, MO
.getReg(), MRI
);
2296 bool CombinerHelper::matchUndefShuffleVectorMask(MachineInstr
&MI
) {
2297 assert(MI
.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR
);
2298 ArrayRef
<int> Mask
= MI
.getOperand(3).getShuffleMask();
2299 return all_of(Mask
, [](int Elt
) { return Elt
< 0; });
2302 bool CombinerHelper::matchUndefStore(MachineInstr
&MI
) {
2303 assert(MI
.getOpcode() == TargetOpcode::G_STORE
);
2304 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF
, MI
.getOperand(0).getReg(),
2308 bool CombinerHelper::matchUndefSelectCmp(MachineInstr
&MI
) {
2309 assert(MI
.getOpcode() == TargetOpcode::G_SELECT
);
2310 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF
, MI
.getOperand(1).getReg(),
2314 bool CombinerHelper::matchConstantSelectCmp(MachineInstr
&MI
, unsigned &OpIdx
) {
2315 GSelect
&SelMI
= cast
<GSelect
>(MI
);
2317 isConstantOrConstantSplatVector(*MRI
.getVRegDef(SelMI
.getCondReg()), MRI
);
2320 OpIdx
= Cst
->isZero() ? 3 : 2;
2324 bool CombinerHelper::eraseInst(MachineInstr
&MI
) {
2325 MI
.eraseFromParent();
2329 bool CombinerHelper::matchEqualDefs(const MachineOperand
&MOP1
,
2330 const MachineOperand
&MOP2
) {
2331 if (!MOP1
.isReg() || !MOP2
.isReg())
2333 auto InstAndDef1
= getDefSrcRegIgnoringCopies(MOP1
.getReg(), MRI
);
2336 auto InstAndDef2
= getDefSrcRegIgnoringCopies(MOP2
.getReg(), MRI
);
2339 MachineInstr
*I1
= InstAndDef1
->MI
;
2340 MachineInstr
*I2
= InstAndDef2
->MI
;
2342 // Handle a case like this:
2344 // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
2346 // Even though %0 and %1 are produced by the same instruction they are not
2349 return MOP1
.getReg() == MOP2
.getReg();
2351 // If we have an instruction which loads or stores, we can't guarantee that
2354 // For example, we may have
2356 // %x1 = G_LOAD %addr (load N from @somewhere)
2360 // %x2 = G_LOAD %addr (load N from @somewhere)
2362 // %or = G_OR %x1, %x2
2364 // It's possible that @foo will modify whatever lives at the address we're
2365 // loading from. To be safe, let's just assume that all loads and stores
2366 // are different (unless we have something which is guaranteed to not
2368 if (I1
->mayLoadOrStore() && !I1
->isDereferenceableInvariantLoad(nullptr))
2371 // If both instructions are loads or stores, they are equal only if both
2372 // are dereferenceable invariant loads with the same number of bits.
2373 if (I1
->mayLoadOrStore() && I2
->mayLoadOrStore()) {
2374 GLoadStore
*LS1
= dyn_cast
<GLoadStore
>(I1
);
2375 GLoadStore
*LS2
= dyn_cast
<GLoadStore
>(I2
);
2379 if (!I2
->isDereferenceableInvariantLoad(nullptr) ||
2380 (LS1
->getMemSizeInBits() != LS2
->getMemSizeInBits()))
2384 // Check for physical registers on the instructions first to avoid cases
2387 // %a = COPY $physreg
2389 // SOMETHING implicit-def $physreg
2391 // %b = COPY $physreg
2393 // These copies are not equivalent.
2394 if (any_of(I1
->uses(), [](const MachineOperand
&MO
) {
2395 return MO
.isReg() && MO
.getReg().isPhysical();
2397 // Check if we have a case like this:
2399 // %a = COPY $physreg
2402 // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
2403 // From that, we know that they must have the same value, since they must
2404 // have come from the same COPY.
2405 return I1
->isIdenticalTo(*I2
);
2408 // We don't have any physical registers, so we don't necessarily need the
2411 // On the off-chance that there's some target instruction feeding into the
2412 // instruction, let's use produceSameValue instead of isIdenticalTo.
2413 if (Builder
.getTII().produceSameValue(*I1
, *I2
, &MRI
)) {
2414 // Handle instructions with multiple defs that produce same values. Values
2415 // are same for operands with same index.
2416 // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2417 // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2418 // I1 and I2 are different instructions but produce same values,
2419 // %1 and %6 are same, %1 and %7 are not the same value.
2420 return I1
->findRegisterDefOperandIdx(InstAndDef1
->Reg
) ==
2421 I2
->findRegisterDefOperandIdx(InstAndDef2
->Reg
);
2426 bool CombinerHelper::matchConstantOp(const MachineOperand
&MOP
, int64_t C
) {
2429 auto *MI
= MRI
.getVRegDef(MOP
.getReg());
2430 auto MaybeCst
= isConstantOrConstantSplatVector(*MI
, MRI
);
2431 return MaybeCst
.hasValue() && MaybeCst
->getBitWidth() <= 64 &&
2432 MaybeCst
->getSExtValue() == C
;
2435 bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr
&MI
,
2437 assert(MI
.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2438 Register OldReg
= MI
.getOperand(0).getReg();
2439 Register Replacement
= MI
.getOperand(OpIdx
).getReg();
2440 assert(canReplaceReg(OldReg
, Replacement
, MRI
) && "Cannot replace register?");
2441 MI
.eraseFromParent();
2442 replaceRegWith(MRI
, OldReg
, Replacement
);
2446 bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr
&MI
,
2447 Register Replacement
) {
2448 assert(MI
.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2449 Register OldReg
= MI
.getOperand(0).getReg();
2450 assert(canReplaceReg(OldReg
, Replacement
, MRI
) && "Cannot replace register?");
2451 MI
.eraseFromParent();
2452 replaceRegWith(MRI
, OldReg
, Replacement
);
2456 bool CombinerHelper::matchSelectSameVal(MachineInstr
&MI
) {
2457 assert(MI
.getOpcode() == TargetOpcode::G_SELECT
);
2458 // Match (cond ? x : x)
2459 return matchEqualDefs(MI
.getOperand(2), MI
.getOperand(3)) &&
2460 canReplaceReg(MI
.getOperand(0).getReg(), MI
.getOperand(2).getReg(),
2464 bool CombinerHelper::matchBinOpSameVal(MachineInstr
&MI
) {
2465 return matchEqualDefs(MI
.getOperand(1), MI
.getOperand(2)) &&
2466 canReplaceReg(MI
.getOperand(0).getReg(), MI
.getOperand(1).getReg(),
2470 bool CombinerHelper::matchOperandIsZero(MachineInstr
&MI
, unsigned OpIdx
) {
2471 return matchConstantOp(MI
.getOperand(OpIdx
), 0) &&
2472 canReplaceReg(MI
.getOperand(0).getReg(), MI
.getOperand(OpIdx
).getReg(),
2476 bool CombinerHelper::matchOperandIsUndef(MachineInstr
&MI
, unsigned OpIdx
) {
2477 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
2478 return MO
.isReg() &&
2479 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF
, MO
.getReg(), MRI
);
2482 bool CombinerHelper::matchOperandIsKnownToBeAPowerOfTwo(MachineInstr
&MI
,
2484 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
2485 return isKnownToBeAPowerOfTwo(MO
.getReg(), MRI
, KB
);
2488 bool CombinerHelper::replaceInstWithFConstant(MachineInstr
&MI
, double C
) {
2489 assert(MI
.getNumDefs() == 1 && "Expected only one def?");
2490 Builder
.setInstr(MI
);
2491 Builder
.buildFConstant(MI
.getOperand(0), C
);
2492 MI
.eraseFromParent();
2496 bool CombinerHelper::replaceInstWithConstant(MachineInstr
&MI
, int64_t C
) {
2497 assert(MI
.getNumDefs() == 1 && "Expected only one def?");
2498 Builder
.setInstr(MI
);
2499 Builder
.buildConstant(MI
.getOperand(0), C
);
2500 MI
.eraseFromParent();
2504 bool CombinerHelper::replaceInstWithConstant(MachineInstr
&MI
, APInt C
) {
2505 assert(MI
.getNumDefs() == 1 && "Expected only one def?");
2506 Builder
.setInstr(MI
);
2507 Builder
.buildConstant(MI
.getOperand(0), C
);
2508 MI
.eraseFromParent();
2512 bool CombinerHelper::replaceInstWithUndef(MachineInstr
&MI
) {
2513 assert(MI
.getNumDefs() == 1 && "Expected only one def?");
2514 Builder
.setInstr(MI
);
2515 Builder
.buildUndef(MI
.getOperand(0));
2516 MI
.eraseFromParent();
2520 bool CombinerHelper::matchSimplifyAddToSub(
2521 MachineInstr
&MI
, std::tuple
<Register
, Register
> &MatchInfo
) {
2522 Register LHS
= MI
.getOperand(1).getReg();
2523 Register RHS
= MI
.getOperand(2).getReg();
2524 Register
&NewLHS
= std::get
<0>(MatchInfo
);
2525 Register
&NewRHS
= std::get
<1>(MatchInfo
);
2527 // Helper lambda to check for opportunities for
2528 // ((0-A) + B) -> B - A
2529 // (A + (0-B)) -> A - B
2530 auto CheckFold
= [&](Register
&MaybeSub
, Register
&MaybeNewLHS
) {
2531 if (!mi_match(MaybeSub
, MRI
, m_Neg(m_Reg(NewRHS
))))
2533 NewLHS
= MaybeNewLHS
;
2537 return CheckFold(LHS
, RHS
) || CheckFold(RHS
, LHS
);
2540 bool CombinerHelper::matchCombineInsertVecElts(
2541 MachineInstr
&MI
, SmallVectorImpl
<Register
> &MatchInfo
) {
2542 assert(MI
.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT
&&
2544 Register DstReg
= MI
.getOperand(0).getReg();
2545 LLT DstTy
= MRI
.getType(DstReg
);
2546 assert(DstTy
.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
2547 unsigned NumElts
= DstTy
.getNumElements();
2548 // If this MI is part of a sequence of insert_vec_elts, then
2549 // don't do the combine in the middle of the sequence.
2550 if (MRI
.hasOneUse(DstReg
) && MRI
.use_instr_begin(DstReg
)->getOpcode() ==
2551 TargetOpcode::G_INSERT_VECTOR_ELT
)
2553 MachineInstr
*CurrInst
= &MI
;
2554 MachineInstr
*TmpInst
;
2557 MatchInfo
.resize(NumElts
);
2559 CurrInst
->getOperand(0).getReg(), MRI
,
2560 m_GInsertVecElt(m_MInstr(TmpInst
), m_Reg(TmpReg
), m_ICst(IntImm
)))) {
2561 if (IntImm
>= NumElts
)
2563 if (!MatchInfo
[IntImm
])
2564 MatchInfo
[IntImm
] = TmpReg
;
2568 if (CurrInst
->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT
)
2570 if (TmpInst
->getOpcode() == TargetOpcode::G_BUILD_VECTOR
) {
2571 for (unsigned I
= 1; I
< TmpInst
->getNumOperands(); ++I
) {
2572 if (!MatchInfo
[I
- 1].isValid())
2573 MatchInfo
[I
- 1] = TmpInst
->getOperand(I
).getReg();
2577 // If we didn't end in a G_IMPLICIT_DEF, bail out.
2578 return TmpInst
->getOpcode() == TargetOpcode::G_IMPLICIT_DEF
;
2581 void CombinerHelper::applyCombineInsertVecElts(
2582 MachineInstr
&MI
, SmallVectorImpl
<Register
> &MatchInfo
) {
2583 Builder
.setInstr(MI
);
2585 auto GetUndef
= [&]() {
2588 LLT DstTy
= MRI
.getType(MI
.getOperand(0).getReg());
2589 UndefReg
= Builder
.buildUndef(DstTy
.getScalarType()).getReg(0);
2592 for (unsigned I
= 0; I
< MatchInfo
.size(); ++I
) {
2594 MatchInfo
[I
] = GetUndef();
2596 Builder
.buildBuildVector(MI
.getOperand(0).getReg(), MatchInfo
);
2597 MI
.eraseFromParent();
2600 void CombinerHelper::applySimplifyAddToSub(
2601 MachineInstr
&MI
, std::tuple
<Register
, Register
> &MatchInfo
) {
2602 Builder
.setInstr(MI
);
2603 Register SubLHS
, SubRHS
;
2604 std::tie(SubLHS
, SubRHS
) = MatchInfo
;
2605 Builder
.buildSub(MI
.getOperand(0).getReg(), SubLHS
, SubRHS
);
2606 MI
.eraseFromParent();
2609 bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
2610 MachineInstr
&MI
, InstructionStepsMatchInfo
&MatchInfo
) {
2611 // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
2613 // Creates the new hand + logic instruction (but does not insert them.)
2615 // On success, MatchInfo is populated with the new instructions. These are
2616 // inserted in applyHoistLogicOpWithSameOpcodeHands.
2617 unsigned LogicOpcode
= MI
.getOpcode();
2618 assert(LogicOpcode
== TargetOpcode::G_AND
||
2619 LogicOpcode
== TargetOpcode::G_OR
||
2620 LogicOpcode
== TargetOpcode::G_XOR
);
2621 MachineIRBuilder
MIB(MI
);
2622 Register Dst
= MI
.getOperand(0).getReg();
2623 Register LHSReg
= MI
.getOperand(1).getReg();
2624 Register RHSReg
= MI
.getOperand(2).getReg();
2626 // Don't recompute anything.
2627 if (!MRI
.hasOneNonDBGUse(LHSReg
) || !MRI
.hasOneNonDBGUse(RHSReg
))
2630 // Make sure we have (hand x, ...), (hand y, ...)
2631 MachineInstr
*LeftHandInst
= getDefIgnoringCopies(LHSReg
, MRI
);
2632 MachineInstr
*RightHandInst
= getDefIgnoringCopies(RHSReg
, MRI
);
2633 if (!LeftHandInst
|| !RightHandInst
)
2635 unsigned HandOpcode
= LeftHandInst
->getOpcode();
2636 if (HandOpcode
!= RightHandInst
->getOpcode())
2638 if (!LeftHandInst
->getOperand(1).isReg() ||
2639 !RightHandInst
->getOperand(1).isReg())
2642 // Make sure the types match up, and if we're doing this post-legalization,
2643 // we end up with legal types.
2644 Register X
= LeftHandInst
->getOperand(1).getReg();
2645 Register Y
= RightHandInst
->getOperand(1).getReg();
2646 LLT XTy
= MRI
.getType(X
);
2647 LLT YTy
= MRI
.getType(Y
);
2650 if (!isLegalOrBeforeLegalizer({LogicOpcode
, {XTy
, YTy
}}))
2653 // Optional extra source register.
2654 Register ExtraHandOpSrcReg
;
2655 switch (HandOpcode
) {
2658 case TargetOpcode::G_ANYEXT
:
2659 case TargetOpcode::G_SEXT
:
2660 case TargetOpcode::G_ZEXT
: {
2661 // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
2664 case TargetOpcode::G_AND
:
2665 case TargetOpcode::G_ASHR
:
2666 case TargetOpcode::G_LSHR
:
2667 case TargetOpcode::G_SHL
: {
2668 // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
2669 MachineOperand
&ZOp
= LeftHandInst
->getOperand(2);
2670 if (!matchEqualDefs(ZOp
, RightHandInst
->getOperand(2)))
2672 ExtraHandOpSrcReg
= ZOp
.getReg();
2677 // Record the steps to build the new instructions.
2679 // Steps to build (logic x, y)
2680 auto NewLogicDst
= MRI
.createGenericVirtualRegister(XTy
);
2681 OperandBuildSteps LogicBuildSteps
= {
2682 [=](MachineInstrBuilder
&MIB
) { MIB
.addDef(NewLogicDst
); },
2683 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(X
); },
2684 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(Y
); }};
2685 InstructionBuildSteps
LogicSteps(LogicOpcode
, LogicBuildSteps
);
2687 // Steps to build hand (logic x, y), ...z
2688 OperandBuildSteps HandBuildSteps
= {
2689 [=](MachineInstrBuilder
&MIB
) { MIB
.addDef(Dst
); },
2690 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(NewLogicDst
); }};
2691 if (ExtraHandOpSrcReg
.isValid())
2692 HandBuildSteps
.push_back(
2693 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(ExtraHandOpSrcReg
); });
2694 InstructionBuildSteps
HandSteps(HandOpcode
, HandBuildSteps
);
2696 MatchInfo
= InstructionStepsMatchInfo({LogicSteps
, HandSteps
});
2700 void CombinerHelper::applyBuildInstructionSteps(
2701 MachineInstr
&MI
, InstructionStepsMatchInfo
&MatchInfo
) {
2702 assert(MatchInfo
.InstrsToBuild
.size() &&
2703 "Expected at least one instr to build?");
2704 Builder
.setInstr(MI
);
2705 for (auto &InstrToBuild
: MatchInfo
.InstrsToBuild
) {
2706 assert(InstrToBuild
.Opcode
&& "Expected a valid opcode?");
2707 assert(InstrToBuild
.OperandFns
.size() && "Expected at least one operand?");
2708 MachineInstrBuilder Instr
= Builder
.buildInstr(InstrToBuild
.Opcode
);
2709 for (auto &OperandFn
: InstrToBuild
.OperandFns
)
2712 MI
.eraseFromParent();
2715 bool CombinerHelper::matchAshrShlToSextInreg(
2716 MachineInstr
&MI
, std::tuple
<Register
, int64_t> &MatchInfo
) {
2717 assert(MI
.getOpcode() == TargetOpcode::G_ASHR
);
2718 int64_t ShlCst
, AshrCst
;
2720 // FIXME: detect splat constant vectors.
2721 if (!mi_match(MI
.getOperand(0).getReg(), MRI
,
2722 m_GAShr(m_GShl(m_Reg(Src
), m_ICst(ShlCst
)), m_ICst(AshrCst
))))
2724 if (ShlCst
!= AshrCst
)
2726 if (!isLegalOrBeforeLegalizer(
2727 {TargetOpcode::G_SEXT_INREG
, {MRI
.getType(Src
)}}))
2729 MatchInfo
= std::make_tuple(Src
, ShlCst
);
2733 void CombinerHelper::applyAshShlToSextInreg(
2734 MachineInstr
&MI
, std::tuple
<Register
, int64_t> &MatchInfo
) {
2735 assert(MI
.getOpcode() == TargetOpcode::G_ASHR
);
2738 std::tie(Src
, ShiftAmt
) = MatchInfo
;
2739 unsigned Size
= MRI
.getType(Src
).getScalarSizeInBits();
2740 Builder
.setInstrAndDebugLoc(MI
);
2741 Builder
.buildSExtInReg(MI
.getOperand(0).getReg(), Src
, Size
- ShiftAmt
);
2742 MI
.eraseFromParent();
2745 /// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
2746 bool CombinerHelper::matchOverlappingAnd(
2747 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
2748 assert(MI
.getOpcode() == TargetOpcode::G_AND
);
2750 Register Dst
= MI
.getOperand(0).getReg();
2751 LLT Ty
= MRI
.getType(Dst
);
2758 m_GAnd(m_GAnd(m_Reg(R
), m_ICst(C1
)), m_ICst(C2
))))
2761 MatchInfo
= [=](MachineIRBuilder
&B
) {
2763 B
.buildAnd(Dst
, R
, B
.buildConstant(Ty
, C1
& C2
));
2766 auto Zero
= B
.buildConstant(Ty
, 0);
2767 replaceRegWith(MRI
, Dst
, Zero
->getOperand(0).getReg());
2772 bool CombinerHelper::matchRedundantAnd(MachineInstr
&MI
,
2773 Register
&Replacement
) {
2776 // %y:_(sN) = G_SOMETHING
2777 // %x:_(sN) = G_SOMETHING
2778 // %res:_(sN) = G_AND %x, %y
2780 // Eliminate the G_AND when it is known that x & y == x or x & y == y.
2782 // Patterns like this can appear as a result of legalization. E.g.
2784 // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
2785 // %one:_(s32) = G_CONSTANT i32 1
2786 // %and:_(s32) = G_AND %cmp, %one
2788 // In this case, G_ICMP only produces a single bit, so x & 1 == x.
2789 assert(MI
.getOpcode() == TargetOpcode::G_AND
);
2793 Register AndDst
= MI
.getOperand(0).getReg();
2794 LLT DstTy
= MRI
.getType(AndDst
);
2796 // FIXME: This should be removed once GISelKnownBits supports vectors.
2797 if (DstTy
.isVector())
2800 Register LHS
= MI
.getOperand(1).getReg();
2801 Register RHS
= MI
.getOperand(2).getReg();
2802 KnownBits LHSBits
= KB
->getKnownBits(LHS
);
2803 KnownBits RHSBits
= KB
->getKnownBits(RHS
);
2805 // Check that x & Mask == x.
2806 // x & 1 == x, always
2807 // x & 0 == x, only if x is also 0
2808 // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
2810 // Check if we can replace AndDst with the LHS of the G_AND
2811 if (canReplaceReg(AndDst
, LHS
, MRI
) &&
2812 (LHSBits
.Zero
| RHSBits
.One
).isAllOnes()) {
2817 // Check if we can replace AndDst with the RHS of the G_AND
2818 if (canReplaceReg(AndDst
, RHS
, MRI
) &&
2819 (LHSBits
.One
| RHSBits
.Zero
).isAllOnes()) {
2827 bool CombinerHelper::matchRedundantOr(MachineInstr
&MI
, Register
&Replacement
) {
2830 // %y:_(sN) = G_SOMETHING
2831 // %x:_(sN) = G_SOMETHING
2832 // %res:_(sN) = G_OR %x, %y
2834 // Eliminate the G_OR when it is known that x | y == x or x | y == y.
2835 assert(MI
.getOpcode() == TargetOpcode::G_OR
);
2839 Register OrDst
= MI
.getOperand(0).getReg();
2840 LLT DstTy
= MRI
.getType(OrDst
);
2842 // FIXME: This should be removed once GISelKnownBits supports vectors.
2843 if (DstTy
.isVector())
2846 Register LHS
= MI
.getOperand(1).getReg();
2847 Register RHS
= MI
.getOperand(2).getReg();
2848 KnownBits LHSBits
= KB
->getKnownBits(LHS
);
2849 KnownBits RHSBits
= KB
->getKnownBits(RHS
);
2851 // Check that x | Mask == x.
2852 // x | 0 == x, always
2853 // x | 1 == x, only if x is also 1
2854 // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
2856 // Check if we can replace OrDst with the LHS of the G_OR
2857 if (canReplaceReg(OrDst
, LHS
, MRI
) &&
2858 (LHSBits
.One
| RHSBits
.Zero
).isAllOnes()) {
2863 // Check if we can replace OrDst with the RHS of the G_OR
2864 if (canReplaceReg(OrDst
, RHS
, MRI
) &&
2865 (LHSBits
.Zero
| RHSBits
.One
).isAllOnes()) {
2873 bool CombinerHelper::matchRedundantSExtInReg(MachineInstr
&MI
) {
2874 // If the input is already sign extended, just drop the extension.
2875 Register Src
= MI
.getOperand(1).getReg();
2876 unsigned ExtBits
= MI
.getOperand(2).getImm();
2877 unsigned TypeSize
= MRI
.getType(Src
).getScalarSizeInBits();
2878 return KB
->computeNumSignBits(Src
) >= (TypeSize
- ExtBits
+ 1);
2881 static bool isConstValidTrue(const TargetLowering
&TLI
, unsigned ScalarSizeBits
,
2882 int64_t Cst
, bool IsVector
, bool IsFP
) {
2883 // For i1, Cst will always be -1 regardless of boolean contents.
2884 return (ScalarSizeBits
== 1 && Cst
== -1) ||
2885 isConstTrueVal(TLI
, Cst
, IsVector
, IsFP
);
2888 bool CombinerHelper::matchNotCmp(MachineInstr
&MI
,
2889 SmallVectorImpl
<Register
> &RegsToNegate
) {
2890 assert(MI
.getOpcode() == TargetOpcode::G_XOR
);
2891 LLT Ty
= MRI
.getType(MI
.getOperand(0).getReg());
2892 const auto &TLI
= *Builder
.getMF().getSubtarget().getTargetLowering();
2895 // We match xor(src, true) here.
2896 if (!mi_match(MI
.getOperand(0).getReg(), MRI
,
2897 m_GXor(m_Reg(XorSrc
), m_Reg(CstReg
))))
2900 if (!MRI
.hasOneNonDBGUse(XorSrc
))
2903 // Check that XorSrc is the root of a tree of comparisons combined with ANDs
2904 // and ORs. The suffix of RegsToNegate starting from index I is used a work
2905 // list of tree nodes to visit.
2906 RegsToNegate
.push_back(XorSrc
);
2907 // Remember whether the comparisons are all integer or all floating point.
2910 for (unsigned I
= 0; I
< RegsToNegate
.size(); ++I
) {
2911 Register Reg
= RegsToNegate
[I
];
2912 if (!MRI
.hasOneNonDBGUse(Reg
))
2914 MachineInstr
*Def
= MRI
.getVRegDef(Reg
);
2915 switch (Def
->getOpcode()) {
2917 // Don't match if the tree contains anything other than ANDs, ORs and
2920 case TargetOpcode::G_ICMP
:
2924 // When we apply the combine we will invert the predicate.
2926 case TargetOpcode::G_FCMP
:
2930 // When we apply the combine we will invert the predicate.
2932 case TargetOpcode::G_AND
:
2933 case TargetOpcode::G_OR
:
2934 // Implement De Morgan's laws:
2935 // ~(x & y) -> ~x | ~y
2936 // ~(x | y) -> ~x & ~y
2937 // When we apply the combine we will change the opcode and recursively
2938 // negate the operands.
2939 RegsToNegate
.push_back(Def
->getOperand(1).getReg());
2940 RegsToNegate
.push_back(Def
->getOperand(2).getReg());
2945 // Now we know whether the comparisons are integer or floating point, check
2946 // the constant in the xor.
2948 if (Ty
.isVector()) {
2949 MachineInstr
*CstDef
= MRI
.getVRegDef(CstReg
);
2950 auto MaybeCst
= getBuildVectorConstantSplat(*CstDef
, MRI
);
2953 if (!isConstValidTrue(TLI
, Ty
.getScalarSizeInBits(), *MaybeCst
, true, IsFP
))
2956 if (!mi_match(CstReg
, MRI
, m_ICst(Cst
)))
2958 if (!isConstValidTrue(TLI
, Ty
.getSizeInBits(), Cst
, false, IsFP
))
2965 void CombinerHelper::applyNotCmp(MachineInstr
&MI
,
2966 SmallVectorImpl
<Register
> &RegsToNegate
) {
2967 for (Register Reg
: RegsToNegate
) {
2968 MachineInstr
*Def
= MRI
.getVRegDef(Reg
);
2969 Observer
.changingInstr(*Def
);
2970 // For each comparison, invert the opcode. For each AND and OR, change the
2972 switch (Def
->getOpcode()) {
2974 llvm_unreachable("Unexpected opcode");
2975 case TargetOpcode::G_ICMP
:
2976 case TargetOpcode::G_FCMP
: {
2977 MachineOperand
&PredOp
= Def
->getOperand(1);
2978 CmpInst::Predicate NewP
= CmpInst::getInversePredicate(
2979 (CmpInst::Predicate
)PredOp
.getPredicate());
2980 PredOp
.setPredicate(NewP
);
2983 case TargetOpcode::G_AND
:
2984 Def
->setDesc(Builder
.getTII().get(TargetOpcode::G_OR
));
2986 case TargetOpcode::G_OR
:
2987 Def
->setDesc(Builder
.getTII().get(TargetOpcode::G_AND
));
2990 Observer
.changedInstr(*Def
);
2993 replaceRegWith(MRI
, MI
.getOperand(0).getReg(), MI
.getOperand(1).getReg());
2994 MI
.eraseFromParent();
2997 bool CombinerHelper::matchXorOfAndWithSameReg(
2998 MachineInstr
&MI
, std::pair
<Register
, Register
> &MatchInfo
) {
2999 // Match (xor (and x, y), y) (or any of its commuted cases)
3000 assert(MI
.getOpcode() == TargetOpcode::G_XOR
);
3001 Register
&X
= MatchInfo
.first
;
3002 Register
&Y
= MatchInfo
.second
;
3003 Register AndReg
= MI
.getOperand(1).getReg();
3004 Register SharedReg
= MI
.getOperand(2).getReg();
3006 // Find a G_AND on either side of the G_XOR.
3009 // (xor (and x, y), SharedReg)
3010 // (xor SharedReg, (and x, y))
3011 if (!mi_match(AndReg
, MRI
, m_GAnd(m_Reg(X
), m_Reg(Y
)))) {
3012 std::swap(AndReg
, SharedReg
);
3013 if (!mi_match(AndReg
, MRI
, m_GAnd(m_Reg(X
), m_Reg(Y
))))
3017 // Only do this if we'll eliminate the G_AND.
3018 if (!MRI
.hasOneNonDBGUse(AndReg
))
3021 // We can combine if SharedReg is the same as either the LHS or RHS of the
3025 return Y
== SharedReg
;
3028 void CombinerHelper::applyXorOfAndWithSameReg(
3029 MachineInstr
&MI
, std::pair
<Register
, Register
> &MatchInfo
) {
3030 // Fold (xor (and x, y), y) -> (and (not x), y)
3031 Builder
.setInstrAndDebugLoc(MI
);
3033 std::tie(X
, Y
) = MatchInfo
;
3034 auto Not
= Builder
.buildNot(MRI
.getType(X
), X
);
3035 Observer
.changingInstr(MI
);
3036 MI
.setDesc(Builder
.getTII().get(TargetOpcode::G_AND
));
3037 MI
.getOperand(1).setReg(Not
->getOperand(0).getReg());
3038 MI
.getOperand(2).setReg(Y
);
3039 Observer
.changedInstr(MI
);
3042 bool CombinerHelper::matchPtrAddZero(MachineInstr
&MI
) {
3043 auto &PtrAdd
= cast
<GPtrAdd
>(MI
);
3044 Register DstReg
= PtrAdd
.getReg(0);
3045 LLT Ty
= MRI
.getType(DstReg
);
3046 const DataLayout
&DL
= Builder
.getMF().getDataLayout();
3048 if (DL
.isNonIntegralAddressSpace(Ty
.getScalarType().getAddressSpace()))
3051 if (Ty
.isPointer()) {
3052 auto ConstVal
= getIConstantVRegVal(PtrAdd
.getBaseReg(), MRI
);
3053 return ConstVal
&& *ConstVal
== 0;
3056 assert(Ty
.isVector() && "Expecting a vector type");
3057 const MachineInstr
*VecMI
= MRI
.getVRegDef(PtrAdd
.getBaseReg());
3058 return isBuildVectorAllZeros(*VecMI
, MRI
);
3061 void CombinerHelper::applyPtrAddZero(MachineInstr
&MI
) {
3062 auto &PtrAdd
= cast
<GPtrAdd
>(MI
);
3063 Builder
.setInstrAndDebugLoc(PtrAdd
);
3064 Builder
.buildIntToPtr(PtrAdd
.getReg(0), PtrAdd
.getOffsetReg());
3065 PtrAdd
.eraseFromParent();
3068 /// The second source operand is known to be a power of 2.
3069 void CombinerHelper::applySimplifyURemByPow2(MachineInstr
&MI
) {
3070 Register DstReg
= MI
.getOperand(0).getReg();
3071 Register Src0
= MI
.getOperand(1).getReg();
3072 Register Pow2Src1
= MI
.getOperand(2).getReg();
3073 LLT Ty
= MRI
.getType(DstReg
);
3074 Builder
.setInstrAndDebugLoc(MI
);
3076 // Fold (urem x, pow2) -> (and x, pow2-1)
3077 auto NegOne
= Builder
.buildConstant(Ty
, -1);
3078 auto Add
= Builder
.buildAdd(Ty
, Pow2Src1
, NegOne
);
3079 Builder
.buildAnd(DstReg
, Src0
, Add
);
3080 MI
.eraseFromParent();
3083 bool CombinerHelper::matchFoldBinOpIntoSelect(MachineInstr
&MI
,
3084 unsigned &SelectOpNo
) {
3085 Register LHS
= MI
.getOperand(1).getReg();
3086 Register RHS
= MI
.getOperand(2).getReg();
3088 Register OtherOperandReg
= RHS
;
3090 MachineInstr
*Select
= MRI
.getVRegDef(LHS
);
3092 // Don't do this unless the old select is going away. We want to eliminate the
3093 // binary operator, not replace a binop with a select.
3094 if (Select
->getOpcode() != TargetOpcode::G_SELECT
||
3095 !MRI
.hasOneNonDBGUse(LHS
)) {
3096 OtherOperandReg
= LHS
;
3098 Select
= MRI
.getVRegDef(RHS
);
3099 if (Select
->getOpcode() != TargetOpcode::G_SELECT
||
3100 !MRI
.hasOneNonDBGUse(RHS
))
3104 MachineInstr
*SelectLHS
= MRI
.getVRegDef(Select
->getOperand(2).getReg());
3105 MachineInstr
*SelectRHS
= MRI
.getVRegDef(Select
->getOperand(3).getReg());
3107 if (!isConstantOrConstantVector(*SelectLHS
, MRI
,
3109 /*AllowOpaqueConstants*/ false))
3111 if (!isConstantOrConstantVector(*SelectRHS
, MRI
,
3113 /*AllowOpaqueConstants*/ false))
3116 unsigned BinOpcode
= MI
.getOpcode();
3118 // We know know one of the operands is a select of constants. Now verify that
3119 // the other binary operator operand is either a constant, or we can handle a
3121 bool CanFoldNonConst
=
3122 (BinOpcode
== TargetOpcode::G_AND
|| BinOpcode
== TargetOpcode::G_OR
) &&
3123 (isNullOrNullSplat(*SelectLHS
, MRI
) ||
3124 isAllOnesOrAllOnesSplat(*SelectLHS
, MRI
)) &&
3125 (isNullOrNullSplat(*SelectRHS
, MRI
) ||
3126 isAllOnesOrAllOnesSplat(*SelectRHS
, MRI
));
3127 if (CanFoldNonConst
)
3130 return isConstantOrConstantVector(*MRI
.getVRegDef(OtherOperandReg
), MRI
,
3132 /*AllowOpaqueConstants*/ false);
3135 /// \p SelectOperand is the operand in binary operator \p MI that is the select
3137 bool CombinerHelper::applyFoldBinOpIntoSelect(MachineInstr
&MI
,
3138 const unsigned &SelectOperand
) {
3139 Builder
.setInstrAndDebugLoc(MI
);
3141 Register Dst
= MI
.getOperand(0).getReg();
3142 Register LHS
= MI
.getOperand(1).getReg();
3143 Register RHS
= MI
.getOperand(2).getReg();
3144 MachineInstr
*Select
= MRI
.getVRegDef(MI
.getOperand(SelectOperand
).getReg());
3146 Register SelectCond
= Select
->getOperand(1).getReg();
3147 Register SelectTrue
= Select
->getOperand(2).getReg();
3148 Register SelectFalse
= Select
->getOperand(3).getReg();
3150 LLT Ty
= MRI
.getType(Dst
);
3151 unsigned BinOpcode
= MI
.getOpcode();
3153 Register FoldTrue
, FoldFalse
;
3155 // We have a select-of-constants followed by a binary operator with a
3156 // constant. Eliminate the binop by pulling the constant math into the select.
3157 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
3158 if (SelectOperand
== 1) {
3159 // TODO: SelectionDAG verifies this actually constant folds before
3160 // committing to the combine.
3162 FoldTrue
= Builder
.buildInstr(BinOpcode
, {Ty
}, {SelectTrue
, RHS
}).getReg(0);
3164 Builder
.buildInstr(BinOpcode
, {Ty
}, {SelectFalse
, RHS
}).getReg(0);
3166 FoldTrue
= Builder
.buildInstr(BinOpcode
, {Ty
}, {LHS
, SelectTrue
}).getReg(0);
3168 Builder
.buildInstr(BinOpcode
, {Ty
}, {LHS
, SelectFalse
}).getReg(0);
3171 Builder
.buildSelect(Dst
, SelectCond
, FoldTrue
, FoldFalse
, MI
.getFlags());
3172 Observer
.erasingInstr(*Select
);
3173 Select
->eraseFromParent();
3174 MI
.eraseFromParent();
3179 Optional
<SmallVector
<Register
, 8>>
3180 CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr
*Root
) const {
3181 assert(Root
->getOpcode() == TargetOpcode::G_OR
&& "Expected G_OR only!");
3182 // We want to detect if Root is part of a tree which represents a bunch
3183 // of loads being merged into a larger load. We'll try to recognize patterns
3184 // like, for example:
3203 // Each "Reg" may have been produced by a load + some arithmetic. This
3204 // function will save each of them.
3205 SmallVector
<Register
, 8> RegsToVisit
;
3206 SmallVector
<const MachineInstr
*, 7> Ors
= {Root
};
3208 // In the "worst" case, we're dealing with a load for each byte. So, there
3209 // are at most #bytes - 1 ORs.
3210 const unsigned MaxIter
=
3211 MRI
.getType(Root
->getOperand(0).getReg()).getSizeInBytes() - 1;
3212 for (unsigned Iter
= 0; Iter
< MaxIter
; ++Iter
) {
3215 const MachineInstr
*Curr
= Ors
.pop_back_val();
3216 Register OrLHS
= Curr
->getOperand(1).getReg();
3217 Register OrRHS
= Curr
->getOperand(2).getReg();
3219 // In the combine, we want to elimate the entire tree.
3220 if (!MRI
.hasOneNonDBGUse(OrLHS
) || !MRI
.hasOneNonDBGUse(OrRHS
))
3223 // If it's a G_OR, save it and continue to walk. If it's not, then it's
3224 // something that may be a load + arithmetic.
3225 if (const MachineInstr
*Or
= getOpcodeDef(TargetOpcode::G_OR
, OrLHS
, MRI
))
3228 RegsToVisit
.push_back(OrLHS
);
3229 if (const MachineInstr
*Or
= getOpcodeDef(TargetOpcode::G_OR
, OrRHS
, MRI
))
3232 RegsToVisit
.push_back(OrRHS
);
3235 // We're going to try and merge each register into a wider power-of-2 type,
3236 // so we ought to have an even number of registers.
3237 if (RegsToVisit
.empty() || RegsToVisit
.size() % 2 != 0)
3242 /// Helper function for findLoadOffsetsForLoadOrCombine.
3244 /// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
3245 /// and then moving that value into a specific byte offset.
3249 /// \returns The load instruction and the byte offset it is moved into.
3250 static Optional
<std::pair
<GZExtLoad
*, int64_t>>
3251 matchLoadAndBytePosition(Register Reg
, unsigned MemSizeInBits
,
3252 const MachineRegisterInfo
&MRI
) {
3253 assert(MRI
.hasOneNonDBGUse(Reg
) &&
3254 "Expected Reg to only have one non-debug use?");
3257 if (!mi_match(Reg
, MRI
,
3258 m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad
), m_ICst(Shift
))))) {
3263 if (Shift
% MemSizeInBits
!= 0)
3266 // TODO: Handle other types of loads.
3267 auto *Load
= getOpcodeDef
<GZExtLoad
>(MaybeLoad
, MRI
);
3271 if (!Load
->isUnordered() || Load
->getMemSizeInBits() != MemSizeInBits
)
3274 return std::make_pair(Load
, Shift
/ MemSizeInBits
);
3277 Optional
<std::tuple
<GZExtLoad
*, int64_t, GZExtLoad
*>>
3278 CombinerHelper::findLoadOffsetsForLoadOrCombine(
3279 SmallDenseMap
<int64_t, int64_t, 8> &MemOffset2Idx
,
3280 const SmallVector
<Register
, 8> &RegsToVisit
, const unsigned MemSizeInBits
) {
3282 // Each load found for the pattern. There should be one for each RegsToVisit.
3283 SmallSetVector
<const MachineInstr
*, 8> Loads
;
3285 // The lowest index used in any load. (The lowest "i" for each x[i].)
3286 int64_t LowestIdx
= INT64_MAX
;
3288 // The load which uses the lowest index.
3289 GZExtLoad
*LowestIdxLoad
= nullptr;
3291 // Keeps track of the load indices we see. We shouldn't see any indices twice.
3292 SmallSet
<int64_t, 8> SeenIdx
;
3294 // Ensure each load is in the same MBB.
3295 // TODO: Support multiple MachineBasicBlocks.
3296 MachineBasicBlock
*MBB
= nullptr;
3297 const MachineMemOperand
*MMO
= nullptr;
3299 // Earliest instruction-order load in the pattern.
3300 GZExtLoad
*EarliestLoad
= nullptr;
3302 // Latest instruction-order load in the pattern.
3303 GZExtLoad
*LatestLoad
= nullptr;
3305 // Base pointer which every load should share.
3308 // We want to find a load for each register. Each load should have some
3309 // appropriate bit twiddling arithmetic. During this loop, we will also keep
3310 // track of the load which uses the lowest index. Later, we will check if we
3311 // can use its pointer in the final, combined load.
3312 for (auto Reg
: RegsToVisit
) {
3313 // Find the load, and find the position that it will end up in (e.g. a
3315 auto LoadAndPos
= matchLoadAndBytePosition(Reg
, MemSizeInBits
, MRI
);
3320 std::tie(Load
, DstPos
) = *LoadAndPos
;
3322 // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
3323 // it is difficult to check for stores/calls/etc between loads.
3324 MachineBasicBlock
*LoadMBB
= Load
->getParent();
3330 // Make sure that the MachineMemOperands of every seen load are compatible.
3331 auto &LoadMMO
= Load
->getMMO();
3334 if (MMO
->getAddrSpace() != LoadMMO
.getAddrSpace())
3337 // Find out what the base pointer and index for the load is.
3340 if (!mi_match(Load
->getOperand(1).getReg(), MRI
,
3341 m_GPtrAdd(m_Reg(LoadPtr
), m_ICst(Idx
)))) {
3342 LoadPtr
= Load
->getOperand(1).getReg();
3346 // Don't combine things like a[i], a[i] -> a bigger load.
3347 if (!SeenIdx
.insert(Idx
).second
)
3350 // Every load must share the same base pointer; don't combine things like:
3352 // a[i], b[i + 1] -> a bigger load.
3353 if (!BasePtr
.isValid())
3355 if (BasePtr
!= LoadPtr
)
3358 if (Idx
< LowestIdx
) {
3360 LowestIdxLoad
= Load
;
3363 // Keep track of the byte offset that this load ends up at. If we have seen
3364 // the byte offset, then stop here. We do not want to combine:
3366 // a[i] << 16, a[i + k] << 16 -> a bigger load.
3367 if (!MemOffset2Idx
.try_emplace(DstPos
, Idx
).second
)
3371 // Keep track of the position of the earliest/latest loads in the pattern.
3372 // We will check that there are no load fold barriers between them later
3375 // FIXME: Is there a better way to check for load fold barriers?
3376 if (!EarliestLoad
|| dominates(*Load
, *EarliestLoad
))
3377 EarliestLoad
= Load
;
3378 if (!LatestLoad
|| dominates(*LatestLoad
, *Load
))
3382 // We found a load for each register. Let's check if each load satisfies the
3384 assert(Loads
.size() == RegsToVisit
.size() &&
3385 "Expected to find a load for each register?");
3386 assert(EarliestLoad
!= LatestLoad
&& EarliestLoad
&&
3387 LatestLoad
&& "Expected at least two loads?");
3389 // Check if there are any stores, calls, etc. between any of the loads. If
3390 // there are, then we can't safely perform the combine.
3392 // MaxIter is chosen based off the (worst case) number of iterations it
3393 // typically takes to succeed in the LLVM test suite plus some padding.
3395 // FIXME: Is there a better way to check for load fold barriers?
3396 const unsigned MaxIter
= 20;
3398 for (const auto &MI
: instructionsWithoutDebug(EarliestLoad
->getIterator(),
3399 LatestLoad
->getIterator())) {
3400 if (Loads
.count(&MI
))
3402 if (MI
.isLoadFoldBarrier())
3404 if (Iter
++ == MaxIter
)
3408 return std::make_tuple(LowestIdxLoad
, LowestIdx
, LatestLoad
);
3411 bool CombinerHelper::matchLoadOrCombine(
3412 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
3413 assert(MI
.getOpcode() == TargetOpcode::G_OR
);
3414 MachineFunction
&MF
= *MI
.getMF();
3415 // Assuming a little-endian target, transform:
3417 // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
3419 // s32 val = *((i32)a)
3422 // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
3424 // s32 val = BSWAP(*((s32)a))
3425 Register Dst
= MI
.getOperand(0).getReg();
3426 LLT Ty
= MRI
.getType(Dst
);
3430 // We need to combine at least two loads into this type. Since the smallest
3431 // possible load is into a byte, we need at least a 16-bit wide type.
3432 const unsigned WideMemSizeInBits
= Ty
.getSizeInBits();
3433 if (WideMemSizeInBits
< 16 || WideMemSizeInBits
% 8 != 0)
3436 // Match a collection of non-OR instructions in the pattern.
3437 auto RegsToVisit
= findCandidatesForLoadOrCombine(&MI
);
3441 // We have a collection of non-OR instructions. Figure out how wide each of
3442 // the small loads should be based off of the number of potential loads we
3444 const unsigned NarrowMemSizeInBits
= WideMemSizeInBits
/ RegsToVisit
->size();
3445 if (NarrowMemSizeInBits
% 8 != 0)
3448 // Check if each register feeding into each OR is a load from the same
3449 // base pointer + some arithmetic.
3451 // e.g. a[0], a[1] << 8, a[2] << 16, etc.
3453 // Also verify that each of these ends up putting a[i] into the same memory
3454 // offset as a load into a wide type would.
3455 SmallDenseMap
<int64_t, int64_t, 8> MemOffset2Idx
;
3456 GZExtLoad
*LowestIdxLoad
, *LatestLoad
;
3458 auto MaybeLoadInfo
= findLoadOffsetsForLoadOrCombine(
3459 MemOffset2Idx
, *RegsToVisit
, NarrowMemSizeInBits
);
3462 std::tie(LowestIdxLoad
, LowestIdx
, LatestLoad
) = *MaybeLoadInfo
;
3464 // We have a bunch of loads being OR'd together. Using the addresses + offsets
3465 // we found before, check if this corresponds to a big or little endian byte
3466 // pattern. If it does, then we can represent it using a load + possibly a
3468 bool IsBigEndianTarget
= MF
.getDataLayout().isBigEndian();
3469 Optional
<bool> IsBigEndian
= isBigEndian(MemOffset2Idx
, LowestIdx
);
3470 if (!IsBigEndian
.hasValue())
3472 bool NeedsBSwap
= IsBigEndianTarget
!= *IsBigEndian
;
3473 if (NeedsBSwap
&& !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP
, {Ty
}}))
3476 // Make sure that the load from the lowest index produces offset 0 in the
3479 // This ensures that we won't combine something like this:
3481 // load x[i] -> byte 2
3482 // load x[i+1] -> byte 0 ---> wide_load x[i]
3483 // load x[i+2] -> byte 1
3484 const unsigned NumLoadsInTy
= WideMemSizeInBits
/ NarrowMemSizeInBits
;
3485 const unsigned ZeroByteOffset
=
3487 ? bigEndianByteAt(NumLoadsInTy
, 0)
3488 : littleEndianByteAt(NumLoadsInTy
, 0);
3489 auto ZeroOffsetIdx
= MemOffset2Idx
.find(ZeroByteOffset
);
3490 if (ZeroOffsetIdx
== MemOffset2Idx
.end() ||
3491 ZeroOffsetIdx
->second
!= LowestIdx
)
3494 // We wil reuse the pointer from the load which ends up at byte offset 0. It
3495 // may not use index 0.
3496 Register Ptr
= LowestIdxLoad
->getPointerReg();
3497 const MachineMemOperand
&MMO
= LowestIdxLoad
->getMMO();
3498 LegalityQuery::MemDesc
MMDesc(MMO
);
3499 MMDesc
.MemoryTy
= Ty
;
3500 if (!isLegalOrBeforeLegalizer(
3501 {TargetOpcode::G_LOAD
, {Ty
, MRI
.getType(Ptr
)}, {MMDesc
}}))
3503 auto PtrInfo
= MMO
.getPointerInfo();
3504 auto *NewMMO
= MF
.getMachineMemOperand(&MMO
, PtrInfo
, WideMemSizeInBits
/ 8);
3506 // Load must be allowed and fast on the target.
3507 LLVMContext
&C
= MF
.getFunction().getContext();
3508 auto &DL
= MF
.getDataLayout();
3510 if (!getTargetLowering().allowsMemoryAccess(C
, DL
, Ty
, *NewMMO
, &Fast
) ||
3514 MatchInfo
= [=](MachineIRBuilder
&MIB
) {
3515 MIB
.setInstrAndDebugLoc(*LatestLoad
);
3516 Register LoadDst
= NeedsBSwap
? MRI
.cloneVirtualRegister(Dst
) : Dst
;
3517 MIB
.buildLoad(LoadDst
, Ptr
, *NewMMO
);
3519 MIB
.buildBSwap(Dst
, LoadDst
);
3524 /// Check if the store \p Store is a truncstore that can be merged. That is,
3525 /// it's a store of a shifted value of \p SrcVal. If \p SrcVal is an empty
3526 /// Register then it does not need to match and SrcVal is set to the source
3528 /// On match, returns the start byte offset of the \p SrcVal that is being
3530 static Optional
<int64_t> getTruncStoreByteOffset(GStore
&Store
, Register
&SrcVal
,
3531 MachineRegisterInfo
&MRI
) {
3533 if (!mi_match(Store
.getValueReg(), MRI
, m_GTrunc(m_Reg(TruncVal
))))
3536 // The shift amount must be a constant multiple of the narrow type.
3537 // It is translated to the offset address in the wide source value "y".
3539 // x = G_LSHR y, ShiftAmtC
3542 Register FoundSrcVal
;
3544 if (!mi_match(TruncVal
, MRI
,
3545 m_any_of(m_GLShr(m_Reg(FoundSrcVal
), m_ICst(ShiftAmt
)),
3546 m_GAShr(m_Reg(FoundSrcVal
), m_ICst(ShiftAmt
))))) {
3547 if (!SrcVal
.isValid() || TruncVal
== SrcVal
) {
3548 if (!SrcVal
.isValid())
3550 return 0; // If it's the lowest index store.
3555 unsigned NarrowBits
= Store
.getMMO().getMemoryType().getScalarSizeInBits();
3556 if (ShiftAmt
% NarrowBits
!= 0)
3558 const unsigned Offset
= ShiftAmt
/ NarrowBits
;
3560 if (SrcVal
.isValid() && FoundSrcVal
!= SrcVal
)
3563 if (!SrcVal
.isValid())
3564 SrcVal
= FoundSrcVal
;
3565 else if (MRI
.getType(SrcVal
) != MRI
.getType(FoundSrcVal
))
3570 /// Match a pattern where a wide type scalar value is stored by several narrow
3571 /// stores. Fold it into a single store or a BSWAP and a store if the targets
3574 /// Assuming little endian target:
3577 /// p[0] = (val >> 0) & 0xFF;
3578 /// p[1] = (val >> 8) & 0xFF;
3579 /// p[2] = (val >> 16) & 0xFF;
3580 /// p[3] = (val >> 24) & 0xFF;
3582 /// *((i32)p) = val;
3586 /// p[0] = (val >> 24) & 0xFF;
3587 /// p[1] = (val >> 16) & 0xFF;
3588 /// p[2] = (val >> 8) & 0xFF;
3589 /// p[3] = (val >> 0) & 0xFF;
3591 /// *((i32)p) = BSWAP(val);
3592 bool CombinerHelper::matchTruncStoreMerge(MachineInstr
&MI
,
3593 MergeTruncStoresInfo
&MatchInfo
) {
3594 auto &StoreMI
= cast
<GStore
>(MI
);
3595 LLT MemTy
= StoreMI
.getMMO().getMemoryType();
3597 // We only handle merging simple stores of 1-4 bytes.
3598 if (!MemTy
.isScalar())
3600 switch (MemTy
.getSizeInBits()) {
3608 if (!StoreMI
.isSimple())
3611 // We do a simple search for mergeable stores prior to this one.
3612 // Any potential alias hazard along the way terminates the search.
3613 SmallVector
<GStore
*> FoundStores
;
3615 // We're looking for:
3616 // 1) a (store(trunc(...)))
3617 // 2) of an LSHR/ASHR of a single wide value, by the appropriate shift to get
3618 // the partial value stored.
3619 // 3) where the offsets form either a little or big-endian sequence.
3621 auto &LastStore
= StoreMI
;
3623 // The single base pointer that all stores must use.
3626 if (!mi_match(LastStore
.getPointerReg(), MRI
,
3627 m_GPtrAdd(m_Reg(BaseReg
), m_ICst(LastOffset
)))) {
3628 BaseReg
= LastStore
.getPointerReg();
3632 GStore
*LowestIdxStore
= &LastStore
;
3633 int64_t LowestIdxOffset
= LastOffset
;
3635 Register WideSrcVal
;
3636 auto LowestShiftAmt
= getTruncStoreByteOffset(LastStore
, WideSrcVal
, MRI
);
3637 if (!LowestShiftAmt
)
3638 return false; // Didn't match a trunc.
3639 assert(WideSrcVal
.isValid());
3641 LLT WideStoreTy
= MRI
.getType(WideSrcVal
);
3642 // The wide type might not be a multiple of the memory type, e.g. s48 and s32.
3643 if (WideStoreTy
.getSizeInBits() % MemTy
.getSizeInBits() != 0)
3645 const unsigned NumStoresRequired
=
3646 WideStoreTy
.getSizeInBits() / MemTy
.getSizeInBits();
3648 SmallVector
<int64_t, 8> OffsetMap(NumStoresRequired
, INT64_MAX
);
3649 OffsetMap
[*LowestShiftAmt
] = LastOffset
;
3650 FoundStores
.emplace_back(&LastStore
);
3652 // Search the block up for more stores.
3653 // We use a search threshold of 10 instructions here because the combiner
3654 // works top-down within a block, and we don't want to search an unbounded
3655 // number of predecessor instructions trying to find matching stores.
3656 // If we moved this optimization into a separate pass then we could probably
3657 // use a more efficient search without having a hard-coded threshold.
3658 const int MaxInstsToCheck
= 10;
3659 int NumInstsChecked
= 0;
3660 for (auto II
= ++LastStore
.getReverseIterator();
3661 II
!= LastStore
.getParent()->rend() && NumInstsChecked
< MaxInstsToCheck
;
3665 if ((NewStore
= dyn_cast
<GStore
>(&*II
))) {
3666 if (NewStore
->getMMO().getMemoryType() != MemTy
|| !NewStore
->isSimple())
3668 } else if (II
->isLoadFoldBarrier() || II
->mayLoad()) {
3671 continue; // This is a safe instruction we can look past.
3674 Register NewBaseReg
;
3676 // Check we're storing to the same base + some offset.
3677 if (!mi_match(NewStore
->getPointerReg(), MRI
,
3678 m_GPtrAdd(m_Reg(NewBaseReg
), m_ICst(MemOffset
)))) {
3679 NewBaseReg
= NewStore
->getPointerReg();
3682 if (BaseReg
!= NewBaseReg
)
3685 auto ShiftByteOffset
= getTruncStoreByteOffset(*NewStore
, WideSrcVal
, MRI
);
3686 if (!ShiftByteOffset
)
3688 if (MemOffset
< LowestIdxOffset
) {
3689 LowestIdxOffset
= MemOffset
;
3690 LowestIdxStore
= NewStore
;
3693 // Map the offset in the store and the offset in the combined value, and
3694 // early return if it has been set before.
3695 if (*ShiftByteOffset
< 0 || *ShiftByteOffset
>= NumStoresRequired
||
3696 OffsetMap
[*ShiftByteOffset
] != INT64_MAX
)
3698 OffsetMap
[*ShiftByteOffset
] = MemOffset
;
3700 FoundStores
.emplace_back(NewStore
);
3701 // Reset counter since we've found a matching inst.
3702 NumInstsChecked
= 0;
3703 if (FoundStores
.size() == NumStoresRequired
)
3707 if (FoundStores
.size() != NumStoresRequired
) {
3711 const auto &DL
= LastStore
.getMF()->getDataLayout();
3712 auto &C
= LastStore
.getMF()->getFunction().getContext();
3713 // Check that a store of the wide type is both allowed and fast on the target
3715 bool Allowed
= getTargetLowering().allowsMemoryAccess(
3716 C
, DL
, WideStoreTy
, LowestIdxStore
->getMMO(), &Fast
);
3717 if (!Allowed
|| !Fast
)
3720 // Check if the pieces of the value are going to the expected places in memory
3721 // to merge the stores.
3722 unsigned NarrowBits
= MemTy
.getScalarSizeInBits();
3723 auto checkOffsets
= [&](bool MatchLittleEndian
) {
3724 if (MatchLittleEndian
) {
3725 for (unsigned i
= 0; i
!= NumStoresRequired
; ++i
)
3726 if (OffsetMap
[i
] != i
* (NarrowBits
/ 8) + LowestIdxOffset
)
3728 } else { // MatchBigEndian by reversing loop counter.
3729 for (unsigned i
= 0, j
= NumStoresRequired
- 1; i
!= NumStoresRequired
;
3731 if (OffsetMap
[j
] != i
* (NarrowBits
/ 8) + LowestIdxOffset
)
3737 // Check if the offsets line up for the native data layout of this target.
3738 bool NeedBswap
= false;
3739 bool NeedRotate
= false;
3740 if (!checkOffsets(DL
.isLittleEndian())) {
3741 // Special-case: check if byte offsets line up for the opposite endian.
3742 if (NarrowBits
== 8 && checkOffsets(DL
.isBigEndian()))
3744 else if (NumStoresRequired
== 2 && checkOffsets(DL
.isBigEndian()))
3751 !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP
, {WideStoreTy
}}))
3754 !isLegalOrBeforeLegalizer({TargetOpcode::G_ROTR
, {WideStoreTy
}}))
3757 MatchInfo
.NeedBSwap
= NeedBswap
;
3758 MatchInfo
.NeedRotate
= NeedRotate
;
3759 MatchInfo
.LowestIdxStore
= LowestIdxStore
;
3760 MatchInfo
.WideSrcVal
= WideSrcVal
;
3761 MatchInfo
.FoundStores
= std::move(FoundStores
);
3765 void CombinerHelper::applyTruncStoreMerge(MachineInstr
&MI
,
3766 MergeTruncStoresInfo
&MatchInfo
) {
3768 Builder
.setInstrAndDebugLoc(MI
);
3769 Register WideSrcVal
= MatchInfo
.WideSrcVal
;
3770 LLT WideStoreTy
= MRI
.getType(WideSrcVal
);
3772 if (MatchInfo
.NeedBSwap
) {
3773 WideSrcVal
= Builder
.buildBSwap(WideStoreTy
, WideSrcVal
).getReg(0);
3774 } else if (MatchInfo
.NeedRotate
) {
3775 assert(WideStoreTy
.getSizeInBits() % 2 == 0 &&
3776 "Unexpected type for rotate");
3778 Builder
.buildConstant(WideStoreTy
, WideStoreTy
.getSizeInBits() / 2);
3780 Builder
.buildRotateRight(WideStoreTy
, WideSrcVal
, RotAmt
).getReg(0);
3783 Builder
.buildStore(WideSrcVal
, MatchInfo
.LowestIdxStore
->getPointerReg(),
3784 MatchInfo
.LowestIdxStore
->getMMO().getPointerInfo(),
3785 MatchInfo
.LowestIdxStore
->getMMO().getAlign());
3787 // Erase the old stores.
3788 for (auto *ST
: MatchInfo
.FoundStores
)
3789 ST
->eraseFromParent();
3792 bool CombinerHelper::matchExtendThroughPhis(MachineInstr
&MI
,
3793 MachineInstr
*&ExtMI
) {
3794 assert(MI
.getOpcode() == TargetOpcode::G_PHI
);
3796 Register DstReg
= MI
.getOperand(0).getReg();
3798 // TODO: Extending a vector may be expensive, don't do this until heuristics
3800 if (MRI
.getType(DstReg
).isVector())
3803 // Try to match a phi, whose only use is an extend.
3804 if (!MRI
.hasOneNonDBGUse(DstReg
))
3806 ExtMI
= &*MRI
.use_instr_nodbg_begin(DstReg
);
3807 switch (ExtMI
->getOpcode()) {
3808 case TargetOpcode::G_ANYEXT
:
3809 return true; // G_ANYEXT is usually free.
3810 case TargetOpcode::G_ZEXT
:
3811 case TargetOpcode::G_SEXT
:
3817 // If the target is likely to fold this extend away, don't propagate.
3818 if (Builder
.getTII().isExtendLikelyToBeFolded(*ExtMI
, MRI
))
3821 // We don't want to propagate the extends unless there's a good chance that
3822 // they'll be optimized in some way.
3823 // Collect the unique incoming values.
3824 SmallPtrSet
<MachineInstr
*, 4> InSrcs
;
3825 for (unsigned Idx
= 1; Idx
< MI
.getNumOperands(); Idx
+= 2) {
3826 auto *DefMI
= getDefIgnoringCopies(MI
.getOperand(Idx
).getReg(), MRI
);
3827 switch (DefMI
->getOpcode()) {
3828 case TargetOpcode::G_LOAD
:
3829 case TargetOpcode::G_TRUNC
:
3830 case TargetOpcode::G_SEXT
:
3831 case TargetOpcode::G_ZEXT
:
3832 case TargetOpcode::G_ANYEXT
:
3833 case TargetOpcode::G_CONSTANT
:
3834 InSrcs
.insert(getDefIgnoringCopies(MI
.getOperand(Idx
).getReg(), MRI
));
3835 // Don't try to propagate if there are too many places to create new
3836 // extends, chances are it'll increase code size.
3837 if (InSrcs
.size() > 2)
3847 void CombinerHelper::applyExtendThroughPhis(MachineInstr
&MI
,
3848 MachineInstr
*&ExtMI
) {
3849 assert(MI
.getOpcode() == TargetOpcode::G_PHI
);
3850 Register DstReg
= ExtMI
->getOperand(0).getReg();
3851 LLT ExtTy
= MRI
.getType(DstReg
);
3853 // Propagate the extension into the block of each incoming reg's block.
3854 // Use a SetVector here because PHIs can have duplicate edges, and we want
3855 // deterministic iteration order.
3856 SmallSetVector
<MachineInstr
*, 8> SrcMIs
;
3857 SmallDenseMap
<MachineInstr
*, MachineInstr
*, 8> OldToNewSrcMap
;
3858 for (unsigned SrcIdx
= 1; SrcIdx
< MI
.getNumOperands(); SrcIdx
+= 2) {
3859 auto *SrcMI
= MRI
.getVRegDef(MI
.getOperand(SrcIdx
).getReg());
3860 if (!SrcMIs
.insert(SrcMI
))
3863 // Build an extend after each src inst.
3864 auto *MBB
= SrcMI
->getParent();
3865 MachineBasicBlock::iterator InsertPt
= ++SrcMI
->getIterator();
3866 if (InsertPt
!= MBB
->end() && InsertPt
->isPHI())
3867 InsertPt
= MBB
->getFirstNonPHI();
3869 Builder
.setInsertPt(*SrcMI
->getParent(), InsertPt
);
3870 Builder
.setDebugLoc(MI
.getDebugLoc());
3871 auto NewExt
= Builder
.buildExtOrTrunc(ExtMI
->getOpcode(), ExtTy
,
3872 SrcMI
->getOperand(0).getReg());
3873 OldToNewSrcMap
[SrcMI
] = NewExt
;
3876 // Create a new phi with the extended inputs.
3877 Builder
.setInstrAndDebugLoc(MI
);
3878 auto NewPhi
= Builder
.buildInstrNoInsert(TargetOpcode::G_PHI
);
3879 NewPhi
.addDef(DstReg
);
3880 for (const MachineOperand
&MO
: llvm::drop_begin(MI
.operands())) {
3882 NewPhi
.addMBB(MO
.getMBB());
3885 auto *NewSrc
= OldToNewSrcMap
[MRI
.getVRegDef(MO
.getReg())];
3886 NewPhi
.addUse(NewSrc
->getOperand(0).getReg());
3888 Builder
.insertInstr(NewPhi
);
3889 ExtMI
->eraseFromParent();
3892 bool CombinerHelper::matchExtractVecEltBuildVec(MachineInstr
&MI
,
3894 assert(MI
.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT
);
3895 // If we have a constant index, look for a G_BUILD_VECTOR source
3896 // and find the source register that the index maps to.
3897 Register SrcVec
= MI
.getOperand(1).getReg();
3898 LLT SrcTy
= MRI
.getType(SrcVec
);
3899 if (!isLegalOrBeforeLegalizer(
3900 {TargetOpcode::G_BUILD_VECTOR
, {SrcTy
, SrcTy
.getElementType()}}))
3903 auto Cst
= getIConstantVRegValWithLookThrough(MI
.getOperand(2).getReg(), MRI
);
3904 if (!Cst
|| Cst
->Value
.getZExtValue() >= SrcTy
.getNumElements())
3907 unsigned VecIdx
= Cst
->Value
.getZExtValue();
3908 MachineInstr
*BuildVecMI
=
3909 getOpcodeDef(TargetOpcode::G_BUILD_VECTOR
, SrcVec
, MRI
);
3911 BuildVecMI
= getOpcodeDef(TargetOpcode::G_BUILD_VECTOR_TRUNC
, SrcVec
, MRI
);
3914 LLT ScalarTy
= MRI
.getType(BuildVecMI
->getOperand(1).getReg());
3915 if (!isLegalOrBeforeLegalizer(
3916 {TargetOpcode::G_BUILD_VECTOR_TRUNC
, {SrcTy
, ScalarTy
}}))
3920 EVT
Ty(getMVTForLLT(SrcTy
));
3921 if (!MRI
.hasOneNonDBGUse(SrcVec
) &&
3922 !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty
))
3925 Reg
= BuildVecMI
->getOperand(VecIdx
+ 1).getReg();
3929 void CombinerHelper::applyExtractVecEltBuildVec(MachineInstr
&MI
,
3931 // Check the type of the register, since it may have come from a
3932 // G_BUILD_VECTOR_TRUNC.
3933 LLT ScalarTy
= MRI
.getType(Reg
);
3934 Register DstReg
= MI
.getOperand(0).getReg();
3935 LLT DstTy
= MRI
.getType(DstReg
);
3937 Builder
.setInstrAndDebugLoc(MI
);
3938 if (ScalarTy
!= DstTy
) {
3939 assert(ScalarTy
.getSizeInBits() > DstTy
.getSizeInBits());
3940 Builder
.buildTrunc(DstReg
, Reg
);
3941 MI
.eraseFromParent();
3944 replaceSingleDefInstWithReg(MI
, Reg
);
3947 bool CombinerHelper::matchExtractAllEltsFromBuildVector(
3949 SmallVectorImpl
<std::pair
<Register
, MachineInstr
*>> &SrcDstPairs
) {
3950 assert(MI
.getOpcode() == TargetOpcode::G_BUILD_VECTOR
);
3951 // This combine tries to find build_vector's which have every source element
3952 // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
3953 // the masked load scalarization is run late in the pipeline. There's already
3954 // a combine for a similar pattern starting from the extract, but that
3955 // doesn't attempt to do it if there are multiple uses of the build_vector,
3956 // which in this case is true. Starting the combine from the build_vector
3957 // feels more natural than trying to find sibling nodes of extracts.
3959 // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
3960 // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
3961 // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
3962 // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
3963 // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
3965 // replace ext{1,2,3,4} with %s{1,2,3,4}
3967 Register DstReg
= MI
.getOperand(0).getReg();
3968 LLT DstTy
= MRI
.getType(DstReg
);
3969 unsigned NumElts
= DstTy
.getNumElements();
3971 SmallBitVector
ExtractedElts(NumElts
);
3972 for (MachineInstr
&II
: MRI
.use_nodbg_instructions(DstReg
)) {
3973 if (II
.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT
)
3975 auto Cst
= getIConstantVRegVal(II
.getOperand(2).getReg(), MRI
);
3978 unsigned Idx
= Cst
.getValue().getZExtValue();
3980 return false; // Out of range.
3981 ExtractedElts
.set(Idx
);
3982 SrcDstPairs
.emplace_back(
3983 std::make_pair(MI
.getOperand(Idx
+ 1).getReg(), &II
));
3985 // Match if every element was extracted.
3986 return ExtractedElts
.all();
3989 void CombinerHelper::applyExtractAllEltsFromBuildVector(
3991 SmallVectorImpl
<std::pair
<Register
, MachineInstr
*>> &SrcDstPairs
) {
3992 assert(MI
.getOpcode() == TargetOpcode::G_BUILD_VECTOR
);
3993 for (auto &Pair
: SrcDstPairs
) {
3994 auto *ExtMI
= Pair
.second
;
3995 replaceRegWith(MRI
, ExtMI
->getOperand(0).getReg(), Pair
.first
);
3996 ExtMI
->eraseFromParent();
3998 MI
.eraseFromParent();
4001 void CombinerHelper::applyBuildFn(
4002 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
4003 Builder
.setInstrAndDebugLoc(MI
);
4005 MI
.eraseFromParent();
4008 void CombinerHelper::applyBuildFnNoErase(
4009 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
4010 Builder
.setInstrAndDebugLoc(MI
);
4014 bool CombinerHelper::matchOrShiftToFunnelShift(MachineInstr
&MI
,
4015 BuildFnTy
&MatchInfo
) {
4016 assert(MI
.getOpcode() == TargetOpcode::G_OR
);
4018 Register Dst
= MI
.getOperand(0).getReg();
4019 LLT Ty
= MRI
.getType(Dst
);
4020 unsigned BitWidth
= Ty
.getScalarSizeInBits();
4022 Register ShlSrc
, ShlAmt
, LShrSrc
, LShrAmt
, Amt
;
4023 unsigned FshOpc
= 0;
4025 // Match (or (shl ...), (lshr ...)).
4026 if (!mi_match(Dst
, MRI
,
4027 // m_GOr() handles the commuted version as well.
4028 m_GOr(m_GShl(m_Reg(ShlSrc
), m_Reg(ShlAmt
)),
4029 m_GLShr(m_Reg(LShrSrc
), m_Reg(LShrAmt
)))))
4032 // Given constants C0 and C1 such that C0 + C1 is bit-width:
4033 // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1)
4034 // TODO: Match constant splat.
4035 int64_t CstShlAmt
, CstLShrAmt
;
4036 if (mi_match(ShlAmt
, MRI
, m_ICst(CstShlAmt
)) &&
4037 mi_match(LShrAmt
, MRI
, m_ICst(CstLShrAmt
)) &&
4038 CstShlAmt
+ CstLShrAmt
== BitWidth
) {
4039 FshOpc
= TargetOpcode::G_FSHR
;
4042 } else if (mi_match(LShrAmt
, MRI
,
4043 m_GSub(m_SpecificICstOrSplat(BitWidth
), m_Reg(Amt
))) &&
4045 // (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt)
4046 FshOpc
= TargetOpcode::G_FSHL
;
4048 } else if (mi_match(ShlAmt
, MRI
,
4049 m_GSub(m_SpecificICstOrSplat(BitWidth
), m_Reg(Amt
))) &&
4051 // (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt)
4052 FshOpc
= TargetOpcode::G_FSHR
;
4058 LLT AmtTy
= MRI
.getType(Amt
);
4059 if (!isLegalOrBeforeLegalizer({FshOpc
, {Ty
, AmtTy
}}))
4062 MatchInfo
= [=](MachineIRBuilder
&B
) {
4063 B
.buildInstr(FshOpc
, {Dst
}, {ShlSrc
, LShrSrc
, Amt
});
4068 /// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
4069 bool CombinerHelper::matchFunnelShiftToRotate(MachineInstr
&MI
) {
4070 unsigned Opc
= MI
.getOpcode();
4071 assert(Opc
== TargetOpcode::G_FSHL
|| Opc
== TargetOpcode::G_FSHR
);
4072 Register X
= MI
.getOperand(1).getReg();
4073 Register Y
= MI
.getOperand(2).getReg();
4076 unsigned RotateOpc
=
4077 Opc
== TargetOpcode::G_FSHL
? TargetOpcode::G_ROTL
: TargetOpcode::G_ROTR
;
4078 return isLegalOrBeforeLegalizer({RotateOpc
, {MRI
.getType(X
), MRI
.getType(Y
)}});
4081 void CombinerHelper::applyFunnelShiftToRotate(MachineInstr
&MI
) {
4082 unsigned Opc
= MI
.getOpcode();
4083 assert(Opc
== TargetOpcode::G_FSHL
|| Opc
== TargetOpcode::G_FSHR
);
4084 bool IsFSHL
= Opc
== TargetOpcode::G_FSHL
;
4085 Observer
.changingInstr(MI
);
4086 MI
.setDesc(Builder
.getTII().get(IsFSHL
? TargetOpcode::G_ROTL
4087 : TargetOpcode::G_ROTR
));
4088 MI
.RemoveOperand(2);
4089 Observer
.changedInstr(MI
);
4092 // Fold (rot x, c) -> (rot x, c % BitSize)
4093 bool CombinerHelper::matchRotateOutOfRange(MachineInstr
&MI
) {
4094 assert(MI
.getOpcode() == TargetOpcode::G_ROTL
||
4095 MI
.getOpcode() == TargetOpcode::G_ROTR
);
4097 MRI
.getType(MI
.getOperand(0).getReg()).getScalarSizeInBits();
4098 Register AmtReg
= MI
.getOperand(2).getReg();
4099 bool OutOfRange
= false;
4100 auto MatchOutOfRange
= [Bitsize
, &OutOfRange
](const Constant
*C
) {
4101 if (auto *CI
= dyn_cast
<ConstantInt
>(C
))
4102 OutOfRange
|= CI
->getValue().uge(Bitsize
);
4105 return matchUnaryPredicate(MRI
, AmtReg
, MatchOutOfRange
) && OutOfRange
;
4108 void CombinerHelper::applyRotateOutOfRange(MachineInstr
&MI
) {
4109 assert(MI
.getOpcode() == TargetOpcode::G_ROTL
||
4110 MI
.getOpcode() == TargetOpcode::G_ROTR
);
4112 MRI
.getType(MI
.getOperand(0).getReg()).getScalarSizeInBits();
4113 Builder
.setInstrAndDebugLoc(MI
);
4114 Register Amt
= MI
.getOperand(2).getReg();
4115 LLT AmtTy
= MRI
.getType(Amt
);
4116 auto Bits
= Builder
.buildConstant(AmtTy
, Bitsize
);
4117 Amt
= Builder
.buildURem(AmtTy
, MI
.getOperand(2).getReg(), Bits
).getReg(0);
4118 Observer
.changingInstr(MI
);
4119 MI
.getOperand(2).setReg(Amt
);
4120 Observer
.changedInstr(MI
);
4123 bool CombinerHelper::matchICmpToTrueFalseKnownBits(MachineInstr
&MI
,
4124 int64_t &MatchInfo
) {
4125 assert(MI
.getOpcode() == TargetOpcode::G_ICMP
);
4126 auto Pred
= static_cast<CmpInst::Predicate
>(MI
.getOperand(1).getPredicate());
4127 auto KnownLHS
= KB
->getKnownBits(MI
.getOperand(2).getReg());
4128 auto KnownRHS
= KB
->getKnownBits(MI
.getOperand(3).getReg());
4129 Optional
<bool> KnownVal
;
4132 llvm_unreachable("Unexpected G_ICMP predicate?");
4133 case CmpInst::ICMP_EQ
:
4134 KnownVal
= KnownBits::eq(KnownLHS
, KnownRHS
);
4136 case CmpInst::ICMP_NE
:
4137 KnownVal
= KnownBits::ne(KnownLHS
, KnownRHS
);
4139 case CmpInst::ICMP_SGE
:
4140 KnownVal
= KnownBits::sge(KnownLHS
, KnownRHS
);
4142 case CmpInst::ICMP_SGT
:
4143 KnownVal
= KnownBits::sgt(KnownLHS
, KnownRHS
);
4145 case CmpInst::ICMP_SLE
:
4146 KnownVal
= KnownBits::sle(KnownLHS
, KnownRHS
);
4148 case CmpInst::ICMP_SLT
:
4149 KnownVal
= KnownBits::slt(KnownLHS
, KnownRHS
);
4151 case CmpInst::ICMP_UGE
:
4152 KnownVal
= KnownBits::uge(KnownLHS
, KnownRHS
);
4154 case CmpInst::ICMP_UGT
:
4155 KnownVal
= KnownBits::ugt(KnownLHS
, KnownRHS
);
4157 case CmpInst::ICMP_ULE
:
4158 KnownVal
= KnownBits::ule(KnownLHS
, KnownRHS
);
4160 case CmpInst::ICMP_ULT
:
4161 KnownVal
= KnownBits::ult(KnownLHS
, KnownRHS
);
4168 ? getICmpTrueVal(getTargetLowering(),
4170 MRI
.getType(MI
.getOperand(0).getReg()).isVector(),
4176 bool CombinerHelper::matchICmpToLHSKnownBits(
4177 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
4178 assert(MI
.getOpcode() == TargetOpcode::G_ICMP
);
4181 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4182 // %cmp = G_ICMP ne %x, 0
4186 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4187 // %cmp = G_ICMP eq %x, 1
4189 // We can replace %cmp with %x assuming true is 1 on the target.
4190 auto Pred
= static_cast<CmpInst::Predicate
>(MI
.getOperand(1).getPredicate());
4191 if (!CmpInst::isEquality(Pred
))
4193 Register Dst
= MI
.getOperand(0).getReg();
4194 LLT DstTy
= MRI
.getType(Dst
);
4195 if (getICmpTrueVal(getTargetLowering(), DstTy
.isVector(),
4196 /* IsFP = */ false) != 1)
4198 int64_t OneOrZero
= Pred
== CmpInst::ICMP_EQ
;
4199 if (!mi_match(MI
.getOperand(3).getReg(), MRI
, m_SpecificICst(OneOrZero
)))
4201 Register LHS
= MI
.getOperand(2).getReg();
4202 auto KnownLHS
= KB
->getKnownBits(LHS
);
4203 if (KnownLHS
.getMinValue() != 0 || KnownLHS
.getMaxValue() != 1)
4205 // Make sure replacing Dst with the LHS is a legal operation.
4206 LLT LHSTy
= MRI
.getType(LHS
);
4207 unsigned LHSSize
= LHSTy
.getSizeInBits();
4208 unsigned DstSize
= DstTy
.getSizeInBits();
4209 unsigned Op
= TargetOpcode::COPY
;
4210 if (DstSize
!= LHSSize
)
4211 Op
= DstSize
< LHSSize
? TargetOpcode::G_TRUNC
: TargetOpcode::G_ZEXT
;
4212 if (!isLegalOrBeforeLegalizer({Op
, {DstTy
, LHSTy
}}))
4214 MatchInfo
= [=](MachineIRBuilder
&B
) { B
.buildInstr(Op
, {Dst
}, {LHS
}); };
4218 // Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0
4219 bool CombinerHelper::matchAndOrDisjointMask(
4220 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
4221 assert(MI
.getOpcode() == TargetOpcode::G_AND
);
4223 // Ignore vector types to simplify matching the two constants.
4224 // TODO: do this for vectors and scalars via a demanded bits analysis.
4225 LLT Ty
= MRI
.getType(MI
.getOperand(0).getReg());
4232 if (!mi_match(MI
, MRI
,
4233 m_GAnd(m_GOr(m_Reg(Src
), m_ICst(MaskOr
)), m_ICst(MaskAnd
))))
4236 // Check if MaskOr could turn on any bits in Src.
4237 if (MaskAnd
& MaskOr
)
4240 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
4241 Observer
.changingInstr(MI
);
4242 MI
.getOperand(1).setReg(Src
);
4243 Observer
.changedInstr(MI
);
4248 /// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
4249 bool CombinerHelper::matchBitfieldExtractFromSExtInReg(
4250 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
4251 assert(MI
.getOpcode() == TargetOpcode::G_SEXT_INREG
);
4252 Register Dst
= MI
.getOperand(0).getReg();
4253 Register Src
= MI
.getOperand(1).getReg();
4254 LLT Ty
= MRI
.getType(Src
);
4255 LLT ExtractTy
= getTargetLowering().getPreferredShiftAmountTy(Ty
);
4256 if (!LI
|| !LI
->isLegalOrCustom({TargetOpcode::G_SBFX
, {Ty
, ExtractTy
}}))
4258 int64_t Width
= MI
.getOperand(2).getImm();
4263 m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc
), m_ICst(ShiftImm
)),
4264 m_GLShr(m_Reg(ShiftSrc
), m_ICst(ShiftImm
))))))
4266 if (ShiftImm
< 0 || ShiftImm
+ Width
> Ty
.getScalarSizeInBits())
4269 MatchInfo
= [=](MachineIRBuilder
&B
) {
4270 auto Cst1
= B
.buildConstant(ExtractTy
, ShiftImm
);
4271 auto Cst2
= B
.buildConstant(ExtractTy
, Width
);
4272 B
.buildSbfx(Dst
, ShiftSrc
, Cst1
, Cst2
);
4277 /// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants.
4278 bool CombinerHelper::matchBitfieldExtractFromAnd(
4279 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
4280 assert(MI
.getOpcode() == TargetOpcode::G_AND
);
4281 Register Dst
= MI
.getOperand(0).getReg();
4282 LLT Ty
= MRI
.getType(Dst
);
4283 LLT ExtractTy
= getTargetLowering().getPreferredShiftAmountTy(Ty
);
4284 if (!getTargetLowering().isConstantUnsignedBitfieldExtractLegal(
4285 TargetOpcode::G_UBFX
, Ty
, ExtractTy
))
4288 int64_t AndImm
, LSBImm
;
4290 const unsigned Size
= Ty
.getScalarSizeInBits();
4291 if (!mi_match(MI
.getOperand(0).getReg(), MRI
,
4292 m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc
), m_ICst(LSBImm
))),
4296 // The mask is a mask of the low bits iff imm & (imm+1) == 0.
4297 auto MaybeMask
= static_cast<uint64_t>(AndImm
);
4298 if (MaybeMask
& (MaybeMask
+ 1))
4301 // LSB must fit within the register.
4302 if (static_cast<uint64_t>(LSBImm
) >= Size
)
4305 uint64_t Width
= APInt(Size
, AndImm
).countTrailingOnes();
4306 MatchInfo
= [=](MachineIRBuilder
&B
) {
4307 auto WidthCst
= B
.buildConstant(ExtractTy
, Width
);
4308 auto LSBCst
= B
.buildConstant(ExtractTy
, LSBImm
);
4309 B
.buildInstr(TargetOpcode::G_UBFX
, {Dst
}, {ShiftSrc
, LSBCst
, WidthCst
});
4314 bool CombinerHelper::matchBitfieldExtractFromShr(
4315 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
4316 const unsigned Opcode
= MI
.getOpcode();
4317 assert(Opcode
== TargetOpcode::G_ASHR
|| Opcode
== TargetOpcode::G_LSHR
);
4319 const Register Dst
= MI
.getOperand(0).getReg();
4321 const unsigned ExtrOpcode
= Opcode
== TargetOpcode::G_ASHR
4322 ? TargetOpcode::G_SBFX
4323 : TargetOpcode::G_UBFX
;
4325 // Check if the type we would use for the extract is legal
4326 LLT Ty
= MRI
.getType(Dst
);
4327 LLT ExtractTy
= getTargetLowering().getPreferredShiftAmountTy(Ty
);
4328 if (!LI
|| !LI
->isLegalOrCustom({ExtrOpcode
, {Ty
, ExtractTy
}}))
4334 const unsigned Size
= Ty
.getScalarSizeInBits();
4336 // Try to match shr (shl x, c1), c2
4337 if (!mi_match(Dst
, MRI
,
4339 m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc
), m_ICst(ShlAmt
))),
4343 // Make sure that the shift sizes can fit a bitfield extract
4344 if (ShlAmt
< 0 || ShlAmt
> ShrAmt
|| ShrAmt
>= Size
)
4347 // Skip this combine if the G_SEXT_INREG combine could handle it
4348 if (Opcode
== TargetOpcode::G_ASHR
&& ShlAmt
== ShrAmt
)
4351 // Calculate start position and width of the extract
4352 const int64_t Pos
= ShrAmt
- ShlAmt
;
4353 const int64_t Width
= Size
- ShrAmt
;
4355 MatchInfo
= [=](MachineIRBuilder
&B
) {
4356 auto WidthCst
= B
.buildConstant(ExtractTy
, Width
);
4357 auto PosCst
= B
.buildConstant(ExtractTy
, Pos
);
4358 B
.buildInstr(ExtrOpcode
, {Dst
}, {ShlSrc
, PosCst
, WidthCst
});
4363 bool CombinerHelper::matchBitfieldExtractFromShrAnd(
4364 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
4365 const unsigned Opcode
= MI
.getOpcode();
4366 assert(Opcode
== TargetOpcode::G_LSHR
|| Opcode
== TargetOpcode::G_ASHR
);
4368 const Register Dst
= MI
.getOperand(0).getReg();
4369 LLT Ty
= MRI
.getType(Dst
);
4370 LLT ExtractTy
= getTargetLowering().getPreferredShiftAmountTy(Ty
);
4371 if (!getTargetLowering().isConstantUnsignedBitfieldExtractLegal(
4372 TargetOpcode::G_UBFX
, Ty
, ExtractTy
))
4375 // Try to match shr (and x, c1), c2
4379 if (!mi_match(Dst
, MRI
,
4381 m_OneNonDBGUse(m_GAnd(m_Reg(AndSrc
), m_ICst(SMask
))),
4385 const unsigned Size
= Ty
.getScalarSizeInBits();
4386 if (ShrAmt
< 0 || ShrAmt
>= Size
)
4389 // Check that ubfx can do the extraction, with no holes in the mask.
4390 uint64_t UMask
= SMask
;
4391 UMask
|= maskTrailingOnes
<uint64_t>(ShrAmt
);
4392 UMask
&= maskTrailingOnes
<uint64_t>(Size
);
4393 if (!isMask_64(UMask
))
4396 // Calculate start position and width of the extract.
4397 const int64_t Pos
= ShrAmt
;
4398 const int64_t Width
= countTrailingOnes(UMask
) - ShrAmt
;
4400 // It's preferable to keep the shift, rather than form G_SBFX.
4401 // TODO: remove the G_AND via demanded bits analysis.
4402 if (Opcode
== TargetOpcode::G_ASHR
&& Width
+ ShrAmt
== Size
)
4405 MatchInfo
= [=](MachineIRBuilder
&B
) {
4406 auto WidthCst
= B
.buildConstant(ExtractTy
, Width
);
4407 auto PosCst
= B
.buildConstant(ExtractTy
, Pos
);
4408 B
.buildInstr(TargetOpcode::G_UBFX
, {Dst
}, {AndSrc
, PosCst
, WidthCst
});
4413 bool CombinerHelper::reassociationCanBreakAddressingModePattern(
4414 MachineInstr
&PtrAdd
) {
4415 assert(PtrAdd
.getOpcode() == TargetOpcode::G_PTR_ADD
);
4417 Register Src1Reg
= PtrAdd
.getOperand(1).getReg();
4418 MachineInstr
*Src1Def
= getOpcodeDef(TargetOpcode::G_PTR_ADD
, Src1Reg
, MRI
);
4422 Register Src2Reg
= PtrAdd
.getOperand(2).getReg();
4424 if (MRI
.hasOneNonDBGUse(Src1Reg
))
4427 auto C1
= getIConstantVRegVal(Src1Def
->getOperand(2).getReg(), MRI
);
4430 auto C2
= getIConstantVRegVal(Src2Reg
, MRI
);
4434 const APInt
&C1APIntVal
= *C1
;
4435 const APInt
&C2APIntVal
= *C2
;
4436 const int64_t CombinedValue
= (C1APIntVal
+ C2APIntVal
).getSExtValue();
4438 for (auto &UseMI
: MRI
.use_nodbg_instructions(Src1Reg
)) {
4439 // This combine may end up running before ptrtoint/inttoptr combines
4440 // manage to eliminate redundant conversions, so try to look through them.
4441 MachineInstr
*ConvUseMI
= &UseMI
;
4442 unsigned ConvUseOpc
= ConvUseMI
->getOpcode();
4443 while (ConvUseOpc
== TargetOpcode::G_INTTOPTR
||
4444 ConvUseOpc
== TargetOpcode::G_PTRTOINT
) {
4445 Register DefReg
= ConvUseMI
->getOperand(0).getReg();
4446 if (!MRI
.hasOneNonDBGUse(DefReg
))
4448 ConvUseMI
= &*MRI
.use_instr_nodbg_begin(DefReg
);
4449 ConvUseOpc
= ConvUseMI
->getOpcode();
4451 auto LoadStore
= ConvUseOpc
== TargetOpcode::G_LOAD
||
4452 ConvUseOpc
== TargetOpcode::G_STORE
;
4455 // Is x[offset2] already not a legal addressing mode? If so then
4456 // reassociating the constants breaks nothing (we test offset2 because
4457 // that's the one we hope to fold into the load or store).
4458 TargetLoweringBase::AddrMode AM
;
4459 AM
.HasBaseReg
= true;
4460 AM
.BaseOffs
= C2APIntVal
.getSExtValue();
4462 MRI
.getType(ConvUseMI
->getOperand(1).getReg()).getAddressSpace();
4464 getTypeForLLT(MRI
.getType(ConvUseMI
->getOperand(0).getReg()),
4465 PtrAdd
.getMF()->getFunction().getContext());
4466 const auto &TLI
= *PtrAdd
.getMF()->getSubtarget().getTargetLowering();
4467 if (!TLI
.isLegalAddressingMode(PtrAdd
.getMF()->getDataLayout(), AM
,
4471 // Would x[offset1+offset2] still be a legal addressing mode?
4472 AM
.BaseOffs
= CombinedValue
;
4473 if (!TLI
.isLegalAddressingMode(PtrAdd
.getMF()->getDataLayout(), AM
,
4481 bool CombinerHelper::matchReassocConstantInnerRHS(GPtrAdd
&MI
,
4483 BuildFnTy
&MatchInfo
) {
4484 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
4485 Register Src1Reg
= MI
.getOperand(1).getReg();
4486 if (RHS
->getOpcode() != TargetOpcode::G_ADD
)
4488 auto C2
= getIConstantVRegVal(RHS
->getOperand(2).getReg(), MRI
);
4492 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
4493 LLT PtrTy
= MRI
.getType(MI
.getOperand(0).getReg());
4496 Builder
.buildPtrAdd(PtrTy
, Src1Reg
, RHS
->getOperand(1).getReg());
4497 Observer
.changingInstr(MI
);
4498 MI
.getOperand(1).setReg(NewBase
.getReg(0));
4499 MI
.getOperand(2).setReg(RHS
->getOperand(2).getReg());
4500 Observer
.changedInstr(MI
);
4502 return !reassociationCanBreakAddressingModePattern(MI
);
4505 bool CombinerHelper::matchReassocConstantInnerLHS(GPtrAdd
&MI
,
4508 BuildFnTy
&MatchInfo
) {
4509 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C)
4510 // if and only if (G_PTR_ADD X, C) has one use.
4512 Optional
<ValueAndVReg
> LHSCstOff
;
4513 if (!mi_match(MI
.getBaseReg(), MRI
,
4514 m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase
), m_GCst(LHSCstOff
)))))
4517 auto *LHSPtrAdd
= cast
<GPtrAdd
>(LHS
);
4518 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
4519 // When we change LHSPtrAdd's offset register we might cause it to use a reg
4520 // before its def. Sink the instruction so the outer PTR_ADD to ensure this
4522 LHSPtrAdd
->moveBefore(&MI
);
4523 Register RHSReg
= MI
.getOffsetReg();
4524 Observer
.changingInstr(MI
);
4525 MI
.getOperand(2).setReg(LHSCstOff
->VReg
);
4526 Observer
.changedInstr(MI
);
4527 Observer
.changingInstr(*LHSPtrAdd
);
4528 LHSPtrAdd
->getOperand(2).setReg(RHSReg
);
4529 Observer
.changedInstr(*LHSPtrAdd
);
4531 return !reassociationCanBreakAddressingModePattern(MI
);
4534 bool CombinerHelper::matchReassocFoldConstantsInSubTree(GPtrAdd
&MI
,
4537 BuildFnTy
&MatchInfo
) {
4538 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
4539 auto *LHSPtrAdd
= dyn_cast
<GPtrAdd
>(LHS
);
4543 Register Src2Reg
= MI
.getOperand(2).getReg();
4544 Register LHSSrc1
= LHSPtrAdd
->getBaseReg();
4545 Register LHSSrc2
= LHSPtrAdd
->getOffsetReg();
4546 auto C1
= getIConstantVRegVal(LHSSrc2
, MRI
);
4549 auto C2
= getIConstantVRegVal(Src2Reg
, MRI
);
4553 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
4554 auto NewCst
= B
.buildConstant(MRI
.getType(Src2Reg
), *C1
+ *C2
);
4555 Observer
.changingInstr(MI
);
4556 MI
.getOperand(1).setReg(LHSSrc1
);
4557 MI
.getOperand(2).setReg(NewCst
.getReg(0));
4558 Observer
.changedInstr(MI
);
4560 return !reassociationCanBreakAddressingModePattern(MI
);
4563 bool CombinerHelper::matchReassocPtrAdd(MachineInstr
&MI
,
4564 BuildFnTy
&MatchInfo
) {
4565 auto &PtrAdd
= cast
<GPtrAdd
>(MI
);
4566 // We're trying to match a few pointer computation patterns here for
4567 // re-association opportunities.
4568 // 1) Isolating a constant operand to be on the RHS, e.g.:
4569 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
4571 // 2) Folding two constants in each sub-tree as long as such folding
4572 // doesn't break a legal addressing mode.
4573 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
4575 // 3) Move a constant from the LHS of an inner op to the RHS of the outer.
4576 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C)
4577 // iif (G_PTR_ADD X, C) has one use.
4578 MachineInstr
*LHS
= MRI
.getVRegDef(PtrAdd
.getBaseReg());
4579 MachineInstr
*RHS
= MRI
.getVRegDef(PtrAdd
.getOffsetReg());
4581 // Try to match example 2.
4582 if (matchReassocFoldConstantsInSubTree(PtrAdd
, LHS
, RHS
, MatchInfo
))
4585 // Try to match example 3.
4586 if (matchReassocConstantInnerLHS(PtrAdd
, LHS
, RHS
, MatchInfo
))
4589 // Try to match example 1.
4590 if (matchReassocConstantInnerRHS(PtrAdd
, RHS
, MatchInfo
))
4596 bool CombinerHelper::matchConstantFold(MachineInstr
&MI
, APInt
&MatchInfo
) {
4597 Register Op1
= MI
.getOperand(1).getReg();
4598 Register Op2
= MI
.getOperand(2).getReg();
4599 auto MaybeCst
= ConstantFoldBinOp(MI
.getOpcode(), Op1
, Op2
, MRI
);
4602 MatchInfo
= *MaybeCst
;
4606 bool CombinerHelper::matchNarrowBinopFeedingAnd(
4607 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
4608 // Look for a binop feeding into an AND with a mask:
4610 // %add = G_ADD %lhs, %rhs
4611 // %and = G_AND %add, 000...11111111
4613 // Check if it's possible to perform the binop at a narrower width and zext
4614 // back to the original width like so:
4616 // %narrow_lhs = G_TRUNC %lhs
4617 // %narrow_rhs = G_TRUNC %rhs
4618 // %narrow_add = G_ADD %narrow_lhs, %narrow_rhs
4619 // %new_add = G_ZEXT %narrow_add
4620 // %and = G_AND %new_add, 000...11111111
4622 // This can allow later combines to eliminate the G_AND if it turns out
4623 // that the mask is irrelevant.
4624 assert(MI
.getOpcode() == TargetOpcode::G_AND
);
4625 Register Dst
= MI
.getOperand(0).getReg();
4626 Register AndLHS
= MI
.getOperand(1).getReg();
4627 Register AndRHS
= MI
.getOperand(2).getReg();
4628 LLT WideTy
= MRI
.getType(Dst
);
4630 // If the potential binop has more than one use, then it's possible that one
4631 // of those uses will need its full width.
4632 if (!WideTy
.isScalar() || !MRI
.hasOneNonDBGUse(AndLHS
))
4635 // Check if the LHS feeding the AND is impacted by the high bits that we're
4638 // e.g. for 64-bit x, y:
4640 // add_64(x, y) & 65535 == zext(add_16(trunc(x), trunc(y))) & 65535
4641 MachineInstr
*LHSInst
= getDefIgnoringCopies(AndLHS
, MRI
);
4644 unsigned LHSOpc
= LHSInst
->getOpcode();
4648 case TargetOpcode::G_ADD
:
4649 case TargetOpcode::G_SUB
:
4650 case TargetOpcode::G_MUL
:
4651 case TargetOpcode::G_AND
:
4652 case TargetOpcode::G_OR
:
4653 case TargetOpcode::G_XOR
:
4657 // Find the mask on the RHS.
4658 auto Cst
= getIConstantVRegValWithLookThrough(AndRHS
, MRI
);
4661 auto Mask
= Cst
->Value
;
4665 // No point in combining if there's nothing to truncate.
4666 unsigned NarrowWidth
= Mask
.countTrailingOnes();
4667 if (NarrowWidth
== WideTy
.getSizeInBits())
4669 LLT NarrowTy
= LLT::scalar(NarrowWidth
);
4671 // Check if adding the zext + truncates could be harmful.
4672 auto &MF
= *MI
.getMF();
4673 const auto &TLI
= getTargetLowering();
4674 LLVMContext
&Ctx
= MF
.getFunction().getContext();
4675 auto &DL
= MF
.getDataLayout();
4676 if (!TLI
.isTruncateFree(WideTy
, NarrowTy
, DL
, Ctx
) ||
4677 !TLI
.isZExtFree(NarrowTy
, WideTy
, DL
, Ctx
))
4679 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC
, {NarrowTy
, WideTy
}}) ||
4680 !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT
, {WideTy
, NarrowTy
}}))
4682 Register BinOpLHS
= LHSInst
->getOperand(1).getReg();
4683 Register BinOpRHS
= LHSInst
->getOperand(2).getReg();
4684 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
4685 auto NarrowLHS
= Builder
.buildTrunc(NarrowTy
, BinOpLHS
);
4686 auto NarrowRHS
= Builder
.buildTrunc(NarrowTy
, BinOpRHS
);
4688 Builder
.buildInstr(LHSOpc
, {NarrowTy
}, {NarrowLHS
, NarrowRHS
});
4689 auto Ext
= Builder
.buildZExt(WideTy
, NarrowBinOp
);
4690 Observer
.changingInstr(MI
);
4691 MI
.getOperand(1).setReg(Ext
.getReg(0));
4692 Observer
.changedInstr(MI
);
4697 bool CombinerHelper::matchMulOBy2(MachineInstr
&MI
, BuildFnTy
&MatchInfo
) {
4698 unsigned Opc
= MI
.getOpcode();
4699 assert(Opc
== TargetOpcode::G_UMULO
|| Opc
== TargetOpcode::G_SMULO
);
4701 if (!mi_match(MI
.getOperand(3).getReg(), MRI
, m_SpecificICstOrSplat(2)))
4704 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
4705 Observer
.changingInstr(MI
);
4706 unsigned NewOpc
= Opc
== TargetOpcode::G_UMULO
? TargetOpcode::G_UADDO
4707 : TargetOpcode::G_SADDO
;
4708 MI
.setDesc(Builder
.getTII().get(NewOpc
));
4709 MI
.getOperand(3).setReg(MI
.getOperand(2).getReg());
4710 Observer
.changedInstr(MI
);
4715 bool CombinerHelper::matchMulOBy0(MachineInstr
&MI
, BuildFnTy
&MatchInfo
) {
4716 // (G_*MULO x, 0) -> 0 + no carry out
4717 assert(MI
.getOpcode() == TargetOpcode::G_UMULO
||
4718 MI
.getOpcode() == TargetOpcode::G_SMULO
);
4719 if (!mi_match(MI
.getOperand(3).getReg(), MRI
, m_SpecificICstOrSplat(0)))
4721 Register Dst
= MI
.getOperand(0).getReg();
4722 Register Carry
= MI
.getOperand(1).getReg();
4723 if (!isConstantLegalOrBeforeLegalizer(MRI
.getType(Dst
)) ||
4724 !isConstantLegalOrBeforeLegalizer(MRI
.getType(Carry
)))
4726 MatchInfo
= [=](MachineIRBuilder
&B
) {
4727 B
.buildConstant(Dst
, 0);
4728 B
.buildConstant(Carry
, 0);
4733 bool CombinerHelper::matchAddOBy0(MachineInstr
&MI
, BuildFnTy
&MatchInfo
) {
4734 // (G_*ADDO x, 0) -> x + no carry out
4735 assert(MI
.getOpcode() == TargetOpcode::G_UADDO
||
4736 MI
.getOpcode() == TargetOpcode::G_SADDO
);
4737 if (!mi_match(MI
.getOperand(3).getReg(), MRI
, m_SpecificICstOrSplat(0)))
4739 Register Carry
= MI
.getOperand(1).getReg();
4740 if (!isConstantLegalOrBeforeLegalizer(MRI
.getType(Carry
)))
4742 Register Dst
= MI
.getOperand(0).getReg();
4743 Register LHS
= MI
.getOperand(2).getReg();
4744 MatchInfo
= [=](MachineIRBuilder
&B
) {
4745 B
.buildCopy(Dst
, LHS
);
4746 B
.buildConstant(Carry
, 0);
4751 MachineInstr
*CombinerHelper::buildUDivUsingMul(MachineInstr
&MI
) {
4752 assert(MI
.getOpcode() == TargetOpcode::G_UDIV
);
4753 auto &UDiv
= cast
<GenericMachineInstr
>(MI
);
4754 Register Dst
= UDiv
.getReg(0);
4755 Register LHS
= UDiv
.getReg(1);
4756 Register RHS
= UDiv
.getReg(2);
4757 LLT Ty
= MRI
.getType(Dst
);
4758 LLT ScalarTy
= Ty
.getScalarType();
4759 const unsigned EltBits
= ScalarTy
.getScalarSizeInBits();
4760 LLT ShiftAmtTy
= getTargetLowering().getPreferredShiftAmountTy(Ty
);
4761 LLT ScalarShiftAmtTy
= ShiftAmtTy
.getScalarType();
4762 auto &MIB
= Builder
;
4763 MIB
.setInstrAndDebugLoc(MI
);
4765 bool UseNPQ
= false;
4766 SmallVector
<Register
, 16> PreShifts
, PostShifts
, MagicFactors
, NPQFactors
;
4768 auto BuildUDIVPattern
= [&](const Constant
*C
) {
4769 auto *CI
= cast
<ConstantInt
>(C
);
4770 const APInt
&Divisor
= CI
->getValue();
4771 UnsignedDivisonByConstantInfo magics
=
4772 UnsignedDivisonByConstantInfo::get(Divisor
);
4773 unsigned PreShift
= 0, PostShift
= 0;
4775 // If the divisor is even, we can avoid using the expensive fixup by
4776 // shifting the divided value upfront.
4777 if (magics
.IsAdd
!= 0 && !Divisor
[0]) {
4778 PreShift
= Divisor
.countTrailingZeros();
4779 // Get magic number for the shifted divisor.
4781 UnsignedDivisonByConstantInfo::get(Divisor
.lshr(PreShift
), PreShift
);
4782 assert(magics
.IsAdd
== 0 && "Should use cheap fixup now");
4785 APInt Magic
= magics
.Magic
;
4788 if (magics
.IsAdd
== 0 || Divisor
.isOneValue()) {
4789 assert(magics
.ShiftAmount
< Divisor
.getBitWidth() &&
4790 "We shouldn't generate an undefined shift!");
4791 PostShift
= magics
.ShiftAmount
;
4794 PostShift
= magics
.ShiftAmount
- 1;
4798 PreShifts
.push_back(
4799 MIB
.buildConstant(ScalarShiftAmtTy
, PreShift
).getReg(0));
4800 MagicFactors
.push_back(MIB
.buildConstant(ScalarTy
, Magic
).getReg(0));
4801 NPQFactors
.push_back(
4802 MIB
.buildConstant(ScalarTy
,
4803 SelNPQ
? APInt::getOneBitSet(EltBits
, EltBits
- 1)
4804 : APInt::getZero(EltBits
))
4806 PostShifts
.push_back(
4807 MIB
.buildConstant(ScalarShiftAmtTy
, PostShift
).getReg(0));
4812 // Collect the shifts/magic values from each element.
4813 bool Matched
= matchUnaryPredicate(MRI
, RHS
, BuildUDIVPattern
);
4815 assert(Matched
&& "Expected unary predicate match to succeed");
4817 Register PreShift
, PostShift
, MagicFactor
, NPQFactor
;
4818 auto *RHSDef
= getOpcodeDef
<GBuildVector
>(RHS
, MRI
);
4820 PreShift
= MIB
.buildBuildVector(ShiftAmtTy
, PreShifts
).getReg(0);
4821 MagicFactor
= MIB
.buildBuildVector(Ty
, MagicFactors
).getReg(0);
4822 NPQFactor
= MIB
.buildBuildVector(Ty
, NPQFactors
).getReg(0);
4823 PostShift
= MIB
.buildBuildVector(ShiftAmtTy
, PostShifts
).getReg(0);
4825 assert(MRI
.getType(RHS
).isScalar() &&
4826 "Non-build_vector operation should have been a scalar");
4827 PreShift
= PreShifts
[0];
4828 MagicFactor
= MagicFactors
[0];
4829 PostShift
= PostShifts
[0];
4833 Q
= MIB
.buildLShr(Ty
, Q
, PreShift
).getReg(0);
4835 // Multiply the numerator (operand 0) by the magic value.
4836 Q
= MIB
.buildUMulH(Ty
, Q
, MagicFactor
).getReg(0);
4839 Register NPQ
= MIB
.buildSub(Ty
, LHS
, Q
).getReg(0);
4841 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
4842 // G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero.
4844 NPQ
= MIB
.buildUMulH(Ty
, NPQ
, NPQFactor
).getReg(0);
4846 NPQ
= MIB
.buildLShr(Ty
, NPQ
, MIB
.buildConstant(ShiftAmtTy
, 1)).getReg(0);
4848 Q
= MIB
.buildAdd(Ty
, NPQ
, Q
).getReg(0);
4851 Q
= MIB
.buildLShr(Ty
, Q
, PostShift
).getReg(0);
4852 auto One
= MIB
.buildConstant(Ty
, 1);
4853 auto IsOne
= MIB
.buildICmp(
4854 CmpInst::Predicate::ICMP_EQ
,
4855 Ty
.isScalar() ? LLT::scalar(1) : Ty
.changeElementSize(1), RHS
, One
);
4856 return MIB
.buildSelect(Ty
, IsOne
, LHS
, Q
);
4859 bool CombinerHelper::matchUDivByConst(MachineInstr
&MI
) {
4860 assert(MI
.getOpcode() == TargetOpcode::G_UDIV
);
4861 Register Dst
= MI
.getOperand(0).getReg();
4862 Register RHS
= MI
.getOperand(2).getReg();
4863 LLT DstTy
= MRI
.getType(Dst
);
4864 auto *RHSDef
= MRI
.getVRegDef(RHS
);
4865 if (!isConstantOrConstantVector(*RHSDef
, MRI
))
4868 auto &MF
= *MI
.getMF();
4869 AttributeList Attr
= MF
.getFunction().getAttributes();
4870 const auto &TLI
= getTargetLowering();
4871 LLVMContext
&Ctx
= MF
.getFunction().getContext();
4872 auto &DL
= MF
.getDataLayout();
4873 if (TLI
.isIntDivCheap(getApproximateEVTForLLT(DstTy
, DL
, Ctx
), Attr
))
4876 // Don't do this for minsize because the instruction sequence is usually
4878 if (MF
.getFunction().hasMinSize())
4881 // Don't do this if the types are not going to be legal.
4883 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL
, {DstTy
, DstTy
}}))
4885 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH
, {DstTy
}}))
4887 if (!isLegalOrBeforeLegalizer(
4888 {TargetOpcode::G_ICMP
,
4889 {DstTy
.isVector() ? DstTy
.changeElementSize(1) : LLT::scalar(1),
4894 auto CheckEltValue
= [&](const Constant
*C
) {
4895 if (auto *CI
= dyn_cast_or_null
<ConstantInt
>(C
))
4896 return !CI
->isZero();
4899 return matchUnaryPredicate(MRI
, RHS
, CheckEltValue
);
4902 void CombinerHelper::applyUDivByConst(MachineInstr
&MI
) {
4903 auto *NewMI
= buildUDivUsingMul(MI
);
4904 replaceSingleDefInstWithReg(MI
, NewMI
->getOperand(0).getReg());
4907 bool CombinerHelper::matchUMulHToLShr(MachineInstr
&MI
) {
4908 assert(MI
.getOpcode() == TargetOpcode::G_UMULH
);
4909 Register RHS
= MI
.getOperand(2).getReg();
4910 Register Dst
= MI
.getOperand(0).getReg();
4911 LLT Ty
= MRI
.getType(Dst
);
4912 LLT ShiftAmtTy
= getTargetLowering().getPreferredShiftAmountTy(Ty
);
4913 auto MatchPow2ExceptOne
= [&](const Constant
*C
) {
4914 if (auto *CI
= dyn_cast
<ConstantInt
>(C
))
4915 return CI
->getValue().isPowerOf2() && !CI
->getValue().isOne();
4918 if (!matchUnaryPredicate(MRI
, RHS
, MatchPow2ExceptOne
, false))
4920 return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR
, {Ty
, ShiftAmtTy
}});
4923 void CombinerHelper::applyUMulHToLShr(MachineInstr
&MI
) {
4924 Register LHS
= MI
.getOperand(1).getReg();
4925 Register RHS
= MI
.getOperand(2).getReg();
4926 Register Dst
= MI
.getOperand(0).getReg();
4927 LLT Ty
= MRI
.getType(Dst
);
4928 LLT ShiftAmtTy
= getTargetLowering().getPreferredShiftAmountTy(Ty
);
4929 unsigned NumEltBits
= Ty
.getScalarSizeInBits();
4931 Builder
.setInstrAndDebugLoc(MI
);
4932 auto LogBase2
= buildLogBase2(RHS
, Builder
);
4934 Builder
.buildSub(Ty
, Builder
.buildConstant(Ty
, NumEltBits
), LogBase2
);
4935 auto Trunc
= Builder
.buildZExtOrTrunc(ShiftAmtTy
, ShiftAmt
);
4936 Builder
.buildLShr(Dst
, LHS
, Trunc
);
4937 MI
.eraseFromParent();
4940 bool CombinerHelper::matchRedundantNegOperands(MachineInstr
&MI
,
4941 BuildFnTy
&MatchInfo
) {
4942 unsigned Opc
= MI
.getOpcode();
4943 assert(Opc
== TargetOpcode::G_FADD
|| Opc
== TargetOpcode::G_FSUB
||
4944 Opc
== TargetOpcode::G_FMUL
|| Opc
== TargetOpcode::G_FDIV
||
4945 Opc
== TargetOpcode::G_FMAD
|| Opc
== TargetOpcode::G_FMA
);
4947 Register Dst
= MI
.getOperand(0).getReg();
4948 Register X
= MI
.getOperand(1).getReg();
4949 Register Y
= MI
.getOperand(2).getReg();
4950 LLT Type
= MRI
.getType(Dst
);
4952 // fold (fadd x, fneg(y)) -> (fsub x, y)
4953 // fold (fadd fneg(y), x) -> (fsub x, y)
4954 // G_ADD is commutative so both cases are checked by m_GFAdd
4955 if (mi_match(Dst
, MRI
, m_GFAdd(m_Reg(X
), m_GFNeg(m_Reg(Y
)))) &&
4956 isLegalOrBeforeLegalizer({TargetOpcode::G_FSUB
, {Type
}})) {
4957 Opc
= TargetOpcode::G_FSUB
;
4959 /// fold (fsub x, fneg(y)) -> (fadd x, y)
4960 else if (mi_match(Dst
, MRI
, m_GFSub(m_Reg(X
), m_GFNeg(m_Reg(Y
)))) &&
4961 isLegalOrBeforeLegalizer({TargetOpcode::G_FADD
, {Type
}})) {
4962 Opc
= TargetOpcode::G_FADD
;
4964 // fold (fmul fneg(x), fneg(y)) -> (fmul x, y)
4965 // fold (fdiv fneg(x), fneg(y)) -> (fdiv x, y)
4966 // fold (fmad fneg(x), fneg(y), z) -> (fmad x, y, z)
4967 // fold (fma fneg(x), fneg(y), z) -> (fma x, y, z)
4968 else if ((Opc
== TargetOpcode::G_FMUL
|| Opc
== TargetOpcode::G_FDIV
||
4969 Opc
== TargetOpcode::G_FMAD
|| Opc
== TargetOpcode::G_FMA
) &&
4970 mi_match(X
, MRI
, m_GFNeg(m_Reg(X
))) &&
4971 mi_match(Y
, MRI
, m_GFNeg(m_Reg(Y
)))) {
4976 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
4977 Observer
.changingInstr(MI
);
4978 MI
.setDesc(B
.getTII().get(Opc
));
4979 MI
.getOperand(1).setReg(X
);
4980 MI
.getOperand(2).setReg(Y
);
4981 Observer
.changedInstr(MI
);
4986 /// Checks if \p MI is TargetOpcode::G_FMUL and contractable either
4987 /// due to global flags or MachineInstr flags.
4988 static bool isContractableFMul(MachineInstr
&MI
, bool AllowFusionGlobally
) {
4989 if (MI
.getOpcode() != TargetOpcode::G_FMUL
)
4991 return AllowFusionGlobally
|| MI
.getFlag(MachineInstr::MIFlag::FmContract
);
4994 static bool hasMoreUses(const MachineInstr
&MI0
, const MachineInstr
&MI1
,
4995 const MachineRegisterInfo
&MRI
) {
4996 return std::distance(MRI
.use_instr_nodbg_begin(MI0
.getOperand(0).getReg()),
4997 MRI
.use_instr_nodbg_end()) >
4998 std::distance(MRI
.use_instr_nodbg_begin(MI1
.getOperand(0).getReg()),
4999 MRI
.use_instr_nodbg_end());
5002 bool CombinerHelper::canCombineFMadOrFMA(MachineInstr
&MI
,
5003 bool &AllowFusionGlobally
,
5004 bool &HasFMAD
, bool &Aggressive
,
5005 bool CanReassociate
) {
5007 auto *MF
= MI
.getMF();
5008 const auto &TLI
= *MF
->getSubtarget().getTargetLowering();
5009 const TargetOptions
&Options
= MF
->getTarget().Options
;
5010 LLT DstType
= MRI
.getType(MI
.getOperand(0).getReg());
5012 if (CanReassociate
&&
5013 !(Options
.UnsafeFPMath
|| MI
.getFlag(MachineInstr::MIFlag::FmReassoc
)))
5016 // Floating-point multiply-add with intermediate rounding.
5017 HasFMAD
= (LI
&& TLI
.isFMADLegal(MI
, DstType
));
5018 // Floating-point multiply-add without intermediate rounding.
5019 bool HasFMA
= TLI
.isFMAFasterThanFMulAndFAdd(*MF
, DstType
) &&
5020 isLegalOrBeforeLegalizer({TargetOpcode::G_FMA
, {DstType
}});
5021 // No valid opcode, do not combine.
5022 if (!HasFMAD
&& !HasFMA
)
5025 AllowFusionGlobally
= Options
.AllowFPOpFusion
== FPOpFusion::Fast
||
5026 Options
.UnsafeFPMath
|| HasFMAD
;
5027 // If the addition is not contractable, do not combine.
5028 if (!AllowFusionGlobally
&& !MI
.getFlag(MachineInstr::MIFlag::FmContract
))
5031 Aggressive
= TLI
.enableAggressiveFMAFusion(DstType
);
5035 bool CombinerHelper::matchCombineFAddFMulToFMadOrFMA(
5036 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
5037 assert(MI
.getOpcode() == TargetOpcode::G_FADD
);
5039 bool AllowFusionGlobally
, HasFMAD
, Aggressive
;
5040 if (!canCombineFMadOrFMA(MI
, AllowFusionGlobally
, HasFMAD
, Aggressive
))
5043 Register Op1
= MI
.getOperand(1).getReg();
5044 Register Op2
= MI
.getOperand(2).getReg();
5045 DefinitionAndSourceRegister LHS
= {MRI
.getVRegDef(Op1
), Op1
};
5046 DefinitionAndSourceRegister RHS
= {MRI
.getVRegDef(Op2
), Op2
};
5047 unsigned PreferredFusedOpcode
=
5048 HasFMAD
? TargetOpcode::G_FMAD
: TargetOpcode::G_FMA
;
5050 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5051 // prefer to fold the multiply with fewer uses.
5052 if (Aggressive
&& isContractableFMul(*LHS
.MI
, AllowFusionGlobally
) &&
5053 isContractableFMul(*RHS
.MI
, AllowFusionGlobally
)) {
5054 if (hasMoreUses(*LHS
.MI
, *RHS
.MI
, MRI
))
5055 std::swap(LHS
, RHS
);
5058 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
5059 if (isContractableFMul(*LHS
.MI
, AllowFusionGlobally
) &&
5060 (Aggressive
|| MRI
.hasOneNonDBGUse(LHS
.Reg
))) {
5061 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
5062 B
.buildInstr(PreferredFusedOpcode
, {MI
.getOperand(0).getReg()},
5063 {LHS
.MI
->getOperand(1).getReg(),
5064 LHS
.MI
->getOperand(2).getReg(), RHS
.Reg
});
5069 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
5070 if (isContractableFMul(*RHS
.MI
, AllowFusionGlobally
) &&
5071 (Aggressive
|| MRI
.hasOneNonDBGUse(RHS
.Reg
))) {
5072 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
5073 B
.buildInstr(PreferredFusedOpcode
, {MI
.getOperand(0).getReg()},
5074 {RHS
.MI
->getOperand(1).getReg(),
5075 RHS
.MI
->getOperand(2).getReg(), LHS
.Reg
});
5083 bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMA(
5084 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
5085 assert(MI
.getOpcode() == TargetOpcode::G_FADD
);
5087 bool AllowFusionGlobally
, HasFMAD
, Aggressive
;
5088 if (!canCombineFMadOrFMA(MI
, AllowFusionGlobally
, HasFMAD
, Aggressive
))
5091 const auto &TLI
= *MI
.getMF()->getSubtarget().getTargetLowering();
5092 Register Op1
= MI
.getOperand(1).getReg();
5093 Register Op2
= MI
.getOperand(2).getReg();
5094 DefinitionAndSourceRegister LHS
= {MRI
.getVRegDef(Op1
), Op1
};
5095 DefinitionAndSourceRegister RHS
= {MRI
.getVRegDef(Op2
), Op2
};
5096 LLT DstType
= MRI
.getType(MI
.getOperand(0).getReg());
5098 unsigned PreferredFusedOpcode
=
5099 HasFMAD
? TargetOpcode::G_FMAD
: TargetOpcode::G_FMA
;
5101 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5102 // prefer to fold the multiply with fewer uses.
5103 if (Aggressive
&& isContractableFMul(*LHS
.MI
, AllowFusionGlobally
) &&
5104 isContractableFMul(*RHS
.MI
, AllowFusionGlobally
)) {
5105 if (hasMoreUses(*LHS
.MI
, *RHS
.MI
, MRI
))
5106 std::swap(LHS
, RHS
);
5109 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
5110 MachineInstr
*FpExtSrc
;
5111 if (mi_match(LHS
.Reg
, MRI
, m_GFPExt(m_MInstr(FpExtSrc
))) &&
5112 isContractableFMul(*FpExtSrc
, AllowFusionGlobally
) &&
5113 TLI
.isFPExtFoldable(MI
, PreferredFusedOpcode
, DstType
,
5114 MRI
.getType(FpExtSrc
->getOperand(1).getReg()))) {
5115 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
5116 auto FpExtX
= B
.buildFPExt(DstType
, FpExtSrc
->getOperand(1).getReg());
5117 auto FpExtY
= B
.buildFPExt(DstType
, FpExtSrc
->getOperand(2).getReg());
5118 B
.buildInstr(PreferredFusedOpcode
, {MI
.getOperand(0).getReg()},
5119 {FpExtX
.getReg(0), FpExtY
.getReg(0), RHS
.Reg
});
5124 // fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z)
5125 // Note: Commutes FADD operands.
5126 if (mi_match(RHS
.Reg
, MRI
, m_GFPExt(m_MInstr(FpExtSrc
))) &&
5127 isContractableFMul(*FpExtSrc
, AllowFusionGlobally
) &&
5128 TLI
.isFPExtFoldable(MI
, PreferredFusedOpcode
, DstType
,
5129 MRI
.getType(FpExtSrc
->getOperand(1).getReg()))) {
5130 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
5131 auto FpExtX
= B
.buildFPExt(DstType
, FpExtSrc
->getOperand(1).getReg());
5132 auto FpExtY
= B
.buildFPExt(DstType
, FpExtSrc
->getOperand(2).getReg());
5133 B
.buildInstr(PreferredFusedOpcode
, {MI
.getOperand(0).getReg()},
5134 {FpExtX
.getReg(0), FpExtY
.getReg(0), LHS
.Reg
});
5142 bool CombinerHelper::matchCombineFAddFMAFMulToFMadOrFMA(
5143 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
5144 assert(MI
.getOpcode() == TargetOpcode::G_FADD
);
5146 bool AllowFusionGlobally
, HasFMAD
, Aggressive
;
5147 if (!canCombineFMadOrFMA(MI
, AllowFusionGlobally
, HasFMAD
, Aggressive
, true))
5150 Register Op1
= MI
.getOperand(1).getReg();
5151 Register Op2
= MI
.getOperand(2).getReg();
5152 DefinitionAndSourceRegister LHS
= {MRI
.getVRegDef(Op1
), Op1
};
5153 DefinitionAndSourceRegister RHS
= {MRI
.getVRegDef(Op2
), Op2
};
5154 LLT DstTy
= MRI
.getType(MI
.getOperand(0).getReg());
5156 unsigned PreferredFusedOpcode
=
5157 HasFMAD
? TargetOpcode::G_FMAD
: TargetOpcode::G_FMA
;
5159 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5160 // prefer to fold the multiply with fewer uses.
5161 if (Aggressive
&& isContractableFMul(*LHS
.MI
, AllowFusionGlobally
) &&
5162 isContractableFMul(*RHS
.MI
, AllowFusionGlobally
)) {
5163 if (hasMoreUses(*LHS
.MI
, *RHS
.MI
, MRI
))
5164 std::swap(LHS
, RHS
);
5167 MachineInstr
*FMA
= nullptr;
5169 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z))
5170 if (LHS
.MI
->getOpcode() == PreferredFusedOpcode
&&
5171 (MRI
.getVRegDef(LHS
.MI
->getOperand(3).getReg())->getOpcode() ==
5172 TargetOpcode::G_FMUL
) &&
5173 MRI
.hasOneNonDBGUse(LHS
.MI
->getOperand(0).getReg()) &&
5174 MRI
.hasOneNonDBGUse(LHS
.MI
->getOperand(3).getReg())) {
5178 // fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z))
5179 else if (RHS
.MI
->getOpcode() == PreferredFusedOpcode
&&
5180 (MRI
.getVRegDef(RHS
.MI
->getOperand(3).getReg())->getOpcode() ==
5181 TargetOpcode::G_FMUL
) &&
5182 MRI
.hasOneNonDBGUse(RHS
.MI
->getOperand(0).getReg()) &&
5183 MRI
.hasOneNonDBGUse(RHS
.MI
->getOperand(3).getReg())) {
5189 MachineInstr
*FMulMI
= MRI
.getVRegDef(FMA
->getOperand(3).getReg());
5190 Register X
= FMA
->getOperand(1).getReg();
5191 Register Y
= FMA
->getOperand(2).getReg();
5192 Register U
= FMulMI
->getOperand(1).getReg();
5193 Register V
= FMulMI
->getOperand(2).getReg();
5195 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
5196 Register InnerFMA
= MRI
.createGenericVirtualRegister(DstTy
);
5197 B
.buildInstr(PreferredFusedOpcode
, {InnerFMA
}, {U
, V
, Z
});
5198 B
.buildInstr(PreferredFusedOpcode
, {MI
.getOperand(0).getReg()},
5207 bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMAAggressive(
5208 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
5209 assert(MI
.getOpcode() == TargetOpcode::G_FADD
);
5211 bool AllowFusionGlobally
, HasFMAD
, Aggressive
;
5212 if (!canCombineFMadOrFMA(MI
, AllowFusionGlobally
, HasFMAD
, Aggressive
))
5218 const auto &TLI
= *MI
.getMF()->getSubtarget().getTargetLowering();
5219 LLT DstType
= MRI
.getType(MI
.getOperand(0).getReg());
5220 Register Op1
= MI
.getOperand(1).getReg();
5221 Register Op2
= MI
.getOperand(2).getReg();
5222 DefinitionAndSourceRegister LHS
= {MRI
.getVRegDef(Op1
), Op1
};
5223 DefinitionAndSourceRegister RHS
= {MRI
.getVRegDef(Op2
), Op2
};
5225 unsigned PreferredFusedOpcode
=
5226 HasFMAD
? TargetOpcode::G_FMAD
: TargetOpcode::G_FMA
;
5228 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5229 // prefer to fold the multiply with fewer uses.
5230 if (Aggressive
&& isContractableFMul(*LHS
.MI
, AllowFusionGlobally
) &&
5231 isContractableFMul(*RHS
.MI
, AllowFusionGlobally
)) {
5232 if (hasMoreUses(*LHS
.MI
, *RHS
.MI
, MRI
))
5233 std::swap(LHS
, RHS
);
5236 // Builds: (fma x, y, (fma (fpext u), (fpext v), z))
5237 auto buildMatchInfo
= [=, &MI
](Register U
, Register V
, Register Z
, Register X
,
5238 Register Y
, MachineIRBuilder
&B
) {
5239 Register FpExtU
= B
.buildFPExt(DstType
, U
).getReg(0);
5240 Register FpExtV
= B
.buildFPExt(DstType
, V
).getReg(0);
5242 B
.buildInstr(PreferredFusedOpcode
, {DstType
}, {FpExtU
, FpExtV
, Z
})
5244 B
.buildInstr(PreferredFusedOpcode
, {MI
.getOperand(0).getReg()},
5248 MachineInstr
*FMulMI
, *FMAMI
;
5249 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
5250 // -> (fma x, y, (fma (fpext u), (fpext v), z))
5251 if (LHS
.MI
->getOpcode() == PreferredFusedOpcode
&&
5252 mi_match(LHS
.MI
->getOperand(3).getReg(), MRI
,
5253 m_GFPExt(m_MInstr(FMulMI
))) &&
5254 isContractableFMul(*FMulMI
, AllowFusionGlobally
) &&
5255 TLI
.isFPExtFoldable(MI
, PreferredFusedOpcode
, DstType
,
5256 MRI
.getType(FMulMI
->getOperand(0).getReg()))) {
5257 MatchInfo
= [=](MachineIRBuilder
&B
) {
5258 buildMatchInfo(FMulMI
->getOperand(1).getReg(),
5259 FMulMI
->getOperand(2).getReg(), RHS
.Reg
,
5260 LHS
.MI
->getOperand(1).getReg(),
5261 LHS
.MI
->getOperand(2).getReg(), B
);
5266 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
5267 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
5268 // FIXME: This turns two single-precision and one double-precision
5269 // operation into two double-precision operations, which might not be
5270 // interesting for all targets, especially GPUs.
5271 if (mi_match(LHS
.Reg
, MRI
, m_GFPExt(m_MInstr(FMAMI
))) &&
5272 FMAMI
->getOpcode() == PreferredFusedOpcode
) {
5273 MachineInstr
*FMulMI
= MRI
.getVRegDef(FMAMI
->getOperand(3).getReg());
5274 if (isContractableFMul(*FMulMI
, AllowFusionGlobally
) &&
5275 TLI
.isFPExtFoldable(MI
, PreferredFusedOpcode
, DstType
,
5276 MRI
.getType(FMAMI
->getOperand(0).getReg()))) {
5277 MatchInfo
= [=](MachineIRBuilder
&B
) {
5278 Register X
= FMAMI
->getOperand(1).getReg();
5279 Register Y
= FMAMI
->getOperand(2).getReg();
5280 X
= B
.buildFPExt(DstType
, X
).getReg(0);
5281 Y
= B
.buildFPExt(DstType
, Y
).getReg(0);
5282 buildMatchInfo(FMulMI
->getOperand(1).getReg(),
5283 FMulMI
->getOperand(2).getReg(), RHS
.Reg
, X
, Y
, B
);
5290 // fold (fadd z, (fma x, y, (fpext (fmul u, v)))
5291 // -> (fma x, y, (fma (fpext u), (fpext v), z))
5292 if (RHS
.MI
->getOpcode() == PreferredFusedOpcode
&&
5293 mi_match(RHS
.MI
->getOperand(3).getReg(), MRI
,
5294 m_GFPExt(m_MInstr(FMulMI
))) &&
5295 isContractableFMul(*FMulMI
, AllowFusionGlobally
) &&
5296 TLI
.isFPExtFoldable(MI
, PreferredFusedOpcode
, DstType
,
5297 MRI
.getType(FMulMI
->getOperand(0).getReg()))) {
5298 MatchInfo
= [=](MachineIRBuilder
&B
) {
5299 buildMatchInfo(FMulMI
->getOperand(1).getReg(),
5300 FMulMI
->getOperand(2).getReg(), LHS
.Reg
,
5301 RHS
.MI
->getOperand(1).getReg(),
5302 RHS
.MI
->getOperand(2).getReg(), B
);
5307 // fold (fadd z, (fpext (fma x, y, (fmul u, v)))
5308 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
5309 // FIXME: This turns two single-precision and one double-precision
5310 // operation into two double-precision operations, which might not be
5311 // interesting for all targets, especially GPUs.
5312 if (mi_match(RHS
.Reg
, MRI
, m_GFPExt(m_MInstr(FMAMI
))) &&
5313 FMAMI
->getOpcode() == PreferredFusedOpcode
) {
5314 MachineInstr
*FMulMI
= MRI
.getVRegDef(FMAMI
->getOperand(3).getReg());
5315 if (isContractableFMul(*FMulMI
, AllowFusionGlobally
) &&
5316 TLI
.isFPExtFoldable(MI
, PreferredFusedOpcode
, DstType
,
5317 MRI
.getType(FMAMI
->getOperand(0).getReg()))) {
5318 MatchInfo
= [=](MachineIRBuilder
&B
) {
5319 Register X
= FMAMI
->getOperand(1).getReg();
5320 Register Y
= FMAMI
->getOperand(2).getReg();
5321 X
= B
.buildFPExt(DstType
, X
).getReg(0);
5322 Y
= B
.buildFPExt(DstType
, Y
).getReg(0);
5323 buildMatchInfo(FMulMI
->getOperand(1).getReg(),
5324 FMulMI
->getOperand(2).getReg(), LHS
.Reg
, X
, Y
, B
);
5333 bool CombinerHelper::matchCombineFSubFMulToFMadOrFMA(
5334 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
5335 assert(MI
.getOpcode() == TargetOpcode::G_FSUB
);
5337 bool AllowFusionGlobally
, HasFMAD
, Aggressive
;
5338 if (!canCombineFMadOrFMA(MI
, AllowFusionGlobally
, HasFMAD
, Aggressive
))
5341 Register Op1
= MI
.getOperand(1).getReg();
5342 Register Op2
= MI
.getOperand(2).getReg();
5343 DefinitionAndSourceRegister LHS
= {MRI
.getVRegDef(Op1
), Op1
};
5344 DefinitionAndSourceRegister RHS
= {MRI
.getVRegDef(Op2
), Op2
};
5345 LLT DstTy
= MRI
.getType(MI
.getOperand(0).getReg());
5347 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5348 // prefer to fold the multiply with fewer uses.
5349 int FirstMulHasFewerUses
= true;
5350 if (isContractableFMul(*LHS
.MI
, AllowFusionGlobally
) &&
5351 isContractableFMul(*RHS
.MI
, AllowFusionGlobally
) &&
5352 hasMoreUses(*LHS
.MI
, *RHS
.MI
, MRI
))
5353 FirstMulHasFewerUses
= false;
5355 unsigned PreferredFusedOpcode
=
5356 HasFMAD
? TargetOpcode::G_FMAD
: TargetOpcode::G_FMA
;
5358 // fold (fsub (fmul x, y), z) -> (fma x, y, -z)
5359 if (FirstMulHasFewerUses
&&
5360 (isContractableFMul(*LHS
.MI
, AllowFusionGlobally
) &&
5361 (Aggressive
|| MRI
.hasOneNonDBGUse(LHS
.Reg
)))) {
5362 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
5363 Register NegZ
= B
.buildFNeg(DstTy
, RHS
.Reg
).getReg(0);
5364 B
.buildInstr(PreferredFusedOpcode
, {MI
.getOperand(0).getReg()},
5365 {LHS
.MI
->getOperand(1).getReg(),
5366 LHS
.MI
->getOperand(2).getReg(), NegZ
});
5370 // fold (fsub x, (fmul y, z)) -> (fma -y, z, x)
5371 else if ((isContractableFMul(*RHS
.MI
, AllowFusionGlobally
) &&
5372 (Aggressive
|| MRI
.hasOneNonDBGUse(RHS
.Reg
)))) {
5373 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
5375 B
.buildFNeg(DstTy
, RHS
.MI
->getOperand(1).getReg()).getReg(0);
5376 B
.buildInstr(PreferredFusedOpcode
, {MI
.getOperand(0).getReg()},
5377 {NegY
, RHS
.MI
->getOperand(2).getReg(), LHS
.Reg
});
5385 bool CombinerHelper::matchCombineFSubFNegFMulToFMadOrFMA(
5386 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
5387 assert(MI
.getOpcode() == TargetOpcode::G_FSUB
);
5389 bool AllowFusionGlobally
, HasFMAD
, Aggressive
;
5390 if (!canCombineFMadOrFMA(MI
, AllowFusionGlobally
, HasFMAD
, Aggressive
))
5393 Register LHSReg
= MI
.getOperand(1).getReg();
5394 Register RHSReg
= MI
.getOperand(2).getReg();
5395 LLT DstTy
= MRI
.getType(MI
.getOperand(0).getReg());
5397 unsigned PreferredFusedOpcode
=
5398 HasFMAD
? TargetOpcode::G_FMAD
: TargetOpcode::G_FMA
;
5400 MachineInstr
*FMulMI
;
5401 // fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
5402 if (mi_match(LHSReg
, MRI
, m_GFNeg(m_MInstr(FMulMI
))) &&
5403 (Aggressive
|| (MRI
.hasOneNonDBGUse(LHSReg
) &&
5404 MRI
.hasOneNonDBGUse(FMulMI
->getOperand(0).getReg()))) &&
5405 isContractableFMul(*FMulMI
, AllowFusionGlobally
)) {
5406 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
5408 B
.buildFNeg(DstTy
, FMulMI
->getOperand(1).getReg()).getReg(0);
5409 Register NegZ
= B
.buildFNeg(DstTy
, RHSReg
).getReg(0);
5410 B
.buildInstr(PreferredFusedOpcode
, {MI
.getOperand(0).getReg()},
5411 {NegX
, FMulMI
->getOperand(2).getReg(), NegZ
});
5416 // fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x)
5417 if (mi_match(RHSReg
, MRI
, m_GFNeg(m_MInstr(FMulMI
))) &&
5418 (Aggressive
|| (MRI
.hasOneNonDBGUse(RHSReg
) &&
5419 MRI
.hasOneNonDBGUse(FMulMI
->getOperand(0).getReg()))) &&
5420 isContractableFMul(*FMulMI
, AllowFusionGlobally
)) {
5421 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
5422 B
.buildInstr(PreferredFusedOpcode
, {MI
.getOperand(0).getReg()},
5423 {FMulMI
->getOperand(1).getReg(),
5424 FMulMI
->getOperand(2).getReg(), LHSReg
});
5432 bool CombinerHelper::matchCombineFSubFpExtFMulToFMadOrFMA(
5433 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
5434 assert(MI
.getOpcode() == TargetOpcode::G_FSUB
);
5436 bool AllowFusionGlobally
, HasFMAD
, Aggressive
;
5437 if (!canCombineFMadOrFMA(MI
, AllowFusionGlobally
, HasFMAD
, Aggressive
))
5440 Register LHSReg
= MI
.getOperand(1).getReg();
5441 Register RHSReg
= MI
.getOperand(2).getReg();
5442 LLT DstTy
= MRI
.getType(MI
.getOperand(0).getReg());
5444 unsigned PreferredFusedOpcode
=
5445 HasFMAD
? TargetOpcode::G_FMAD
: TargetOpcode::G_FMA
;
5447 MachineInstr
*FMulMI
;
5448 // fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z))
5449 if (mi_match(LHSReg
, MRI
, m_GFPExt(m_MInstr(FMulMI
))) &&
5450 isContractableFMul(*FMulMI
, AllowFusionGlobally
) &&
5451 (Aggressive
|| MRI
.hasOneNonDBGUse(LHSReg
))) {
5452 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
5454 B
.buildFPExt(DstTy
, FMulMI
->getOperand(1).getReg()).getReg(0);
5456 B
.buildFPExt(DstTy
, FMulMI
->getOperand(2).getReg()).getReg(0);
5457 Register NegZ
= B
.buildFNeg(DstTy
, RHSReg
).getReg(0);
5458 B
.buildInstr(PreferredFusedOpcode
, {MI
.getOperand(0).getReg()},
5459 {FpExtX
, FpExtY
, NegZ
});
5464 // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x)
5465 if (mi_match(RHSReg
, MRI
, m_GFPExt(m_MInstr(FMulMI
))) &&
5466 isContractableFMul(*FMulMI
, AllowFusionGlobally
) &&
5467 (Aggressive
|| MRI
.hasOneNonDBGUse(RHSReg
))) {
5468 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
5470 B
.buildFPExt(DstTy
, FMulMI
->getOperand(1).getReg()).getReg(0);
5471 Register NegY
= B
.buildFNeg(DstTy
, FpExtY
).getReg(0);
5473 B
.buildFPExt(DstTy
, FMulMI
->getOperand(2).getReg()).getReg(0);
5474 B
.buildInstr(PreferredFusedOpcode
, {MI
.getOperand(0).getReg()},
5475 {NegY
, FpExtZ
, LHSReg
});
5483 bool CombinerHelper::matchCombineFSubFpExtFNegFMulToFMadOrFMA(
5484 MachineInstr
&MI
, std::function
<void(MachineIRBuilder
&)> &MatchInfo
) {
5485 assert(MI
.getOpcode() == TargetOpcode::G_FSUB
);
5487 bool AllowFusionGlobally
, HasFMAD
, Aggressive
;
5488 if (!canCombineFMadOrFMA(MI
, AllowFusionGlobally
, HasFMAD
, Aggressive
))
5491 const auto &TLI
= *MI
.getMF()->getSubtarget().getTargetLowering();
5492 LLT DstTy
= MRI
.getType(MI
.getOperand(0).getReg());
5493 Register LHSReg
= MI
.getOperand(1).getReg();
5494 Register RHSReg
= MI
.getOperand(2).getReg();
5496 unsigned PreferredFusedOpcode
=
5497 HasFMAD
? TargetOpcode::G_FMAD
: TargetOpcode::G_FMA
;
5499 auto buildMatchInfo
= [=](Register Dst
, Register X
, Register Y
, Register Z
,
5500 MachineIRBuilder
&B
) {
5501 Register FpExtX
= B
.buildFPExt(DstTy
, X
).getReg(0);
5502 Register FpExtY
= B
.buildFPExt(DstTy
, Y
).getReg(0);
5503 B
.buildInstr(PreferredFusedOpcode
, {Dst
}, {FpExtX
, FpExtY
, Z
});
5506 MachineInstr
*FMulMI
;
5507 // fold (fsub (fpext (fneg (fmul x, y))), z) ->
5508 // (fneg (fma (fpext x), (fpext y), z))
5509 // fold (fsub (fneg (fpext (fmul x, y))), z) ->
5510 // (fneg (fma (fpext x), (fpext y), z))
5511 if ((mi_match(LHSReg
, MRI
, m_GFPExt(m_GFNeg(m_MInstr(FMulMI
)))) ||
5512 mi_match(LHSReg
, MRI
, m_GFNeg(m_GFPExt(m_MInstr(FMulMI
))))) &&
5513 isContractableFMul(*FMulMI
, AllowFusionGlobally
) &&
5514 TLI
.isFPExtFoldable(MI
, PreferredFusedOpcode
, DstTy
,
5515 MRI
.getType(FMulMI
->getOperand(0).getReg()))) {
5516 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
5517 Register FMAReg
= MRI
.createGenericVirtualRegister(DstTy
);
5518 buildMatchInfo(FMAReg
, FMulMI
->getOperand(1).getReg(),
5519 FMulMI
->getOperand(2).getReg(), RHSReg
, B
);
5520 B
.buildFNeg(MI
.getOperand(0).getReg(), FMAReg
);
5525 // fold (fsub x, (fpext (fneg (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
5526 // fold (fsub x, (fneg (fpext (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
5527 if ((mi_match(RHSReg
, MRI
, m_GFPExt(m_GFNeg(m_MInstr(FMulMI
)))) ||
5528 mi_match(RHSReg
, MRI
, m_GFNeg(m_GFPExt(m_MInstr(FMulMI
))))) &&
5529 isContractableFMul(*FMulMI
, AllowFusionGlobally
) &&
5530 TLI
.isFPExtFoldable(MI
, PreferredFusedOpcode
, DstTy
,
5531 MRI
.getType(FMulMI
->getOperand(0).getReg()))) {
5532 MatchInfo
= [=, &MI
](MachineIRBuilder
&B
) {
5533 buildMatchInfo(MI
.getOperand(0).getReg(), FMulMI
->getOperand(1).getReg(),
5534 FMulMI
->getOperand(2).getReg(), LHSReg
, B
);
5542 bool CombinerHelper::matchSelectToLogical(MachineInstr
&MI
,
5543 BuildFnTy
&MatchInfo
) {
5544 GSelect
&Sel
= cast
<GSelect
>(MI
);
5545 Register DstReg
= Sel
.getReg(0);
5546 Register Cond
= Sel
.getCondReg();
5547 Register TrueReg
= Sel
.getTrueReg();
5548 Register FalseReg
= Sel
.getFalseReg();
5550 auto *TrueDef
= getDefIgnoringCopies(TrueReg
, MRI
);
5551 auto *FalseDef
= getDefIgnoringCopies(FalseReg
, MRI
);
5553 const LLT CondTy
= MRI
.getType(Cond
);
5554 const LLT OpTy
= MRI
.getType(TrueReg
);
5555 if (CondTy
!= OpTy
|| OpTy
.getScalarSizeInBits() != 1)
5558 // We have a boolean select.
5560 // select Cond, Cond, F --> or Cond, F
5561 // select Cond, 1, F --> or Cond, F
5562 auto MaybeCstTrue
= isConstantOrConstantSplatVector(*TrueDef
, MRI
);
5563 if (Cond
== TrueReg
|| (MaybeCstTrue
&& MaybeCstTrue
->isOne())) {
5564 MatchInfo
= [=](MachineIRBuilder
&MIB
) {
5565 MIB
.buildOr(DstReg
, Cond
, FalseReg
);
5570 // select Cond, T, Cond --> and Cond, T
5571 // select Cond, T, 0 --> and Cond, T
5572 auto MaybeCstFalse
= isConstantOrConstantSplatVector(*FalseDef
, MRI
);
5573 if (Cond
== FalseReg
|| (MaybeCstFalse
&& MaybeCstFalse
->isZero())) {
5574 MatchInfo
= [=](MachineIRBuilder
&MIB
) {
5575 MIB
.buildAnd(DstReg
, Cond
, TrueReg
);
5580 // select Cond, T, 1 --> or (not Cond), T
5581 if (MaybeCstFalse
&& MaybeCstFalse
->isOne()) {
5582 MatchInfo
= [=](MachineIRBuilder
&MIB
) {
5583 MIB
.buildOr(DstReg
, MIB
.buildNot(OpTy
, Cond
), TrueReg
);
5588 // select Cond, 0, F --> and (not Cond), F
5589 if (MaybeCstTrue
&& MaybeCstTrue
->isZero()) {
5590 MatchInfo
= [=](MachineIRBuilder
&MIB
) {
5591 MIB
.buildAnd(DstReg
, MIB
.buildNot(OpTy
, Cond
), FalseReg
);
5599 bool CombinerHelper::tryCombine(MachineInstr
&MI
) {
5600 if (tryCombineCopy(MI
))
5602 if (tryCombineExtendingLoads(MI
))
5604 if (tryCombineIndexedLoadStore(MI
))