1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// \file This file implements the LegalizerHelper class to legalize
10 /// individual instructions and the LegalizeMachineIR wrapper pass for the
11 /// primary legalization.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
16 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
17 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
18 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
19 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
20 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
21 #include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h"
22 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
23 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
24 #include "llvm/CodeGen/GlobalISel/Utils.h"
25 #include "llvm/CodeGen/LowLevelTypeUtils.h"
26 #include "llvm/CodeGen/MachineConstantPool.h"
27 #include "llvm/CodeGen/MachineFrameInfo.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/CodeGen/RuntimeLibcallUtil.h"
30 #include "llvm/CodeGen/TargetFrameLowering.h"
31 #include "llvm/CodeGen/TargetInstrInfo.h"
32 #include "llvm/CodeGen/TargetLowering.h"
33 #include "llvm/CodeGen/TargetOpcodes.h"
34 #include "llvm/CodeGen/TargetSubtargetInfo.h"
35 #include "llvm/IR/Instructions.h"
36 #include "llvm/Support/Debug.h"
37 #include "llvm/Support/MathExtras.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include "llvm/Target/TargetMachine.h"
43 #define DEBUG_TYPE "legalizer"
46 using namespace LegalizeActions
;
47 using namespace MIPatternMatch
;
49 /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
51 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
52 /// with any leftover piece as type \p LeftoverTy
54 /// Returns -1 in the first element of the pair if the breakdown is not
56 static std::pair
<int, int>
57 getNarrowTypeBreakDown(LLT OrigTy
, LLT NarrowTy
, LLT
&LeftoverTy
) {
58 assert(!LeftoverTy
.isValid() && "this is an out argument");
60 unsigned Size
= OrigTy
.getSizeInBits();
61 unsigned NarrowSize
= NarrowTy
.getSizeInBits();
62 unsigned NumParts
= Size
/ NarrowSize
;
63 unsigned LeftoverSize
= Size
- NumParts
* NarrowSize
;
64 assert(Size
> NarrowSize
);
66 if (LeftoverSize
== 0)
69 if (NarrowTy
.isVector()) {
70 unsigned EltSize
= OrigTy
.getScalarSizeInBits();
71 if (LeftoverSize
% EltSize
!= 0)
74 LLT::scalarOrVector(ElementCount::getFixed(LeftoverSize
/ EltSize
),
75 OrigTy
.getElementType());
77 LeftoverTy
= LLT::scalar(LeftoverSize
);
80 int NumLeftover
= LeftoverSize
/ LeftoverTy
.getSizeInBits();
81 return std::make_pair(NumParts
, NumLeftover
);
84 static Type
*getFloatTypeForLLT(LLVMContext
&Ctx
, LLT Ty
) {
89 switch (Ty
.getSizeInBits()) {
91 return Type::getHalfTy(Ctx
);
93 return Type::getFloatTy(Ctx
);
95 return Type::getDoubleTy(Ctx
);
97 return Type::getX86_FP80Ty(Ctx
);
99 return Type::getFP128Ty(Ctx
);
105 LegalizerHelper::LegalizerHelper(MachineFunction
&MF
,
106 GISelChangeObserver
&Observer
,
107 MachineIRBuilder
&Builder
)
108 : MIRBuilder(Builder
), Observer(Observer
), MRI(MF
.getRegInfo()),
109 LI(*MF
.getSubtarget().getLegalizerInfo()),
110 TLI(*MF
.getSubtarget().getTargetLowering()), KB(nullptr) {}
112 LegalizerHelper::LegalizerHelper(MachineFunction
&MF
, const LegalizerInfo
&LI
,
113 GISelChangeObserver
&Observer
,
114 MachineIRBuilder
&B
, GISelKnownBits
*KB
)
115 : MIRBuilder(B
), Observer(Observer
), MRI(MF
.getRegInfo()), LI(LI
),
116 TLI(*MF
.getSubtarget().getTargetLowering()), KB(KB
) {}
118 LegalizerHelper::LegalizeResult
119 LegalizerHelper::legalizeInstrStep(MachineInstr
&MI
,
120 LostDebugLocObserver
&LocObserver
) {
121 LLVM_DEBUG(dbgs() << "Legalizing: " << MI
);
123 MIRBuilder
.setInstrAndDebugLoc(MI
);
125 if (isa
<GIntrinsic
>(MI
))
126 return LI
.legalizeIntrinsic(*this, MI
) ? Legalized
: UnableToLegalize
;
127 auto Step
= LI
.getAction(MI
, MRI
);
128 switch (Step
.Action
) {
130 LLVM_DEBUG(dbgs() << ".. Already legal\n");
133 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
134 return libcall(MI
, LocObserver
);
136 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
137 return narrowScalar(MI
, Step
.TypeIdx
, Step
.NewType
);
139 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
140 return widenScalar(MI
, Step
.TypeIdx
, Step
.NewType
);
142 LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
143 return bitcast(MI
, Step
.TypeIdx
, Step
.NewType
);
145 LLVM_DEBUG(dbgs() << ".. Lower\n");
146 return lower(MI
, Step
.TypeIdx
, Step
.NewType
);
148 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
149 return fewerElementsVector(MI
, Step
.TypeIdx
, Step
.NewType
);
151 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
152 return moreElementsVector(MI
, Step
.TypeIdx
, Step
.NewType
);
154 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
155 return LI
.legalizeCustom(*this, MI
, LocObserver
) ? Legalized
158 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
159 return UnableToLegalize
;
163 void LegalizerHelper::insertParts(Register DstReg
,
164 LLT ResultTy
, LLT PartTy
,
165 ArrayRef
<Register
> PartRegs
,
167 ArrayRef
<Register
> LeftoverRegs
) {
168 if (!LeftoverTy
.isValid()) {
169 assert(LeftoverRegs
.empty());
171 if (!ResultTy
.isVector()) {
172 MIRBuilder
.buildMergeLikeInstr(DstReg
, PartRegs
);
176 if (PartTy
.isVector())
177 MIRBuilder
.buildConcatVectors(DstReg
, PartRegs
);
179 MIRBuilder
.buildBuildVector(DstReg
, PartRegs
);
183 // Merge sub-vectors with different number of elements and insert into DstReg.
184 if (ResultTy
.isVector()) {
185 assert(LeftoverRegs
.size() == 1 && "Expected one leftover register");
186 SmallVector
<Register
, 8> AllRegs(PartRegs
.begin(), PartRegs
.end());
187 AllRegs
.append(LeftoverRegs
.begin(), LeftoverRegs
.end());
188 return mergeMixedSubvectors(DstReg
, AllRegs
);
191 SmallVector
<Register
> GCDRegs
;
192 LLT GCDTy
= getGCDType(getGCDType(ResultTy
, LeftoverTy
), PartTy
);
193 for (auto PartReg
: concat
<const Register
>(PartRegs
, LeftoverRegs
))
194 extractGCDType(GCDRegs
, GCDTy
, PartReg
);
195 LLT ResultLCMTy
= buildLCMMergePieces(ResultTy
, LeftoverTy
, GCDTy
, GCDRegs
);
196 buildWidenedRemergeToDst(DstReg
, ResultLCMTy
, GCDRegs
);
199 void LegalizerHelper::appendVectorElts(SmallVectorImpl
<Register
> &Elts
,
201 LLT Ty
= MRI
.getType(Reg
);
202 SmallVector
<Register
, 8> RegElts
;
203 extractParts(Reg
, Ty
.getScalarType(), Ty
.getNumElements(), RegElts
,
205 Elts
.append(RegElts
);
208 /// Merge \p PartRegs with different types into \p DstReg.
209 void LegalizerHelper::mergeMixedSubvectors(Register DstReg
,
210 ArrayRef
<Register
> PartRegs
) {
211 SmallVector
<Register
, 8> AllElts
;
212 for (unsigned i
= 0; i
< PartRegs
.size() - 1; ++i
)
213 appendVectorElts(AllElts
, PartRegs
[i
]);
215 Register Leftover
= PartRegs
[PartRegs
.size() - 1];
216 if (!MRI
.getType(Leftover
).isVector())
217 AllElts
.push_back(Leftover
);
219 appendVectorElts(AllElts
, Leftover
);
221 MIRBuilder
.buildMergeLikeInstr(DstReg
, AllElts
);
224 /// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
225 static void getUnmergeResults(SmallVectorImpl
<Register
> &Regs
,
226 const MachineInstr
&MI
) {
227 assert(MI
.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
);
229 const int StartIdx
= Regs
.size();
230 const int NumResults
= MI
.getNumOperands() - 1;
231 Regs
.resize(Regs
.size() + NumResults
);
232 for (int I
= 0; I
!= NumResults
; ++I
)
233 Regs
[StartIdx
+ I
] = MI
.getOperand(I
).getReg();
236 void LegalizerHelper::extractGCDType(SmallVectorImpl
<Register
> &Parts
,
237 LLT GCDTy
, Register SrcReg
) {
238 LLT SrcTy
= MRI
.getType(SrcReg
);
239 if (SrcTy
== GCDTy
) {
240 // If the source already evenly divides the result type, we don't need to do
242 Parts
.push_back(SrcReg
);
244 // Need to split into common type sized pieces.
245 auto Unmerge
= MIRBuilder
.buildUnmerge(GCDTy
, SrcReg
);
246 getUnmergeResults(Parts
, *Unmerge
);
250 LLT
LegalizerHelper::extractGCDType(SmallVectorImpl
<Register
> &Parts
, LLT DstTy
,
251 LLT NarrowTy
, Register SrcReg
) {
252 LLT SrcTy
= MRI
.getType(SrcReg
);
253 LLT GCDTy
= getGCDType(getGCDType(SrcTy
, NarrowTy
), DstTy
);
254 extractGCDType(Parts
, GCDTy
, SrcReg
);
258 LLT
LegalizerHelper::buildLCMMergePieces(LLT DstTy
, LLT NarrowTy
, LLT GCDTy
,
259 SmallVectorImpl
<Register
> &VRegs
,
260 unsigned PadStrategy
) {
261 LLT LCMTy
= getLCMType(DstTy
, NarrowTy
);
263 int NumParts
= LCMTy
.getSizeInBits() / NarrowTy
.getSizeInBits();
264 int NumSubParts
= NarrowTy
.getSizeInBits() / GCDTy
.getSizeInBits();
265 int NumOrigSrc
= VRegs
.size();
269 // Get a value we can use to pad the source value if the sources won't evenly
270 // cover the result type.
271 if (NumOrigSrc
< NumParts
* NumSubParts
) {
272 if (PadStrategy
== TargetOpcode::G_ZEXT
)
273 PadReg
= MIRBuilder
.buildConstant(GCDTy
, 0).getReg(0);
274 else if (PadStrategy
== TargetOpcode::G_ANYEXT
)
275 PadReg
= MIRBuilder
.buildUndef(GCDTy
).getReg(0);
277 assert(PadStrategy
== TargetOpcode::G_SEXT
);
279 // Shift the sign bit of the low register through the high register.
281 MIRBuilder
.buildConstant(LLT::scalar(64), GCDTy
.getSizeInBits() - 1);
282 PadReg
= MIRBuilder
.buildAShr(GCDTy
, VRegs
.back(), ShiftAmt
).getReg(0);
286 // Registers for the final merge to be produced.
287 SmallVector
<Register
, 4> Remerge(NumParts
);
289 // Registers needed for intermediate merges, which will be merged into a
290 // source for Remerge.
291 SmallVector
<Register
, 4> SubMerge(NumSubParts
);
293 // Once we've fully read off the end of the original source bits, we can reuse
294 // the same high bits for remaining padding elements.
297 // Build merges to the LCM type to cover the original result type.
298 for (int I
= 0; I
!= NumParts
; ++I
) {
299 bool AllMergePartsArePadding
= true;
301 // Build the requested merges to the requested type.
302 for (int J
= 0; J
!= NumSubParts
; ++J
) {
303 int Idx
= I
* NumSubParts
+ J
;
304 if (Idx
>= NumOrigSrc
) {
305 SubMerge
[J
] = PadReg
;
309 SubMerge
[J
] = VRegs
[Idx
];
311 // There are meaningful bits here we can't reuse later.
312 AllMergePartsArePadding
= false;
315 // If we've filled up a complete piece with padding bits, we can directly
316 // emit the natural sized constant if applicable, rather than a merge of
317 // smaller constants.
318 if (AllMergePartsArePadding
&& !AllPadReg
) {
319 if (PadStrategy
== TargetOpcode::G_ANYEXT
)
320 AllPadReg
= MIRBuilder
.buildUndef(NarrowTy
).getReg(0);
321 else if (PadStrategy
== TargetOpcode::G_ZEXT
)
322 AllPadReg
= MIRBuilder
.buildConstant(NarrowTy
, 0).getReg(0);
324 // If this is a sign extension, we can't materialize a trivial constant
325 // with the right type and have to produce a merge.
329 // Avoid creating additional instructions if we're just adding additional
330 // copies of padding bits.
331 Remerge
[I
] = AllPadReg
;
335 if (NumSubParts
== 1)
336 Remerge
[I
] = SubMerge
[0];
338 Remerge
[I
] = MIRBuilder
.buildMergeLikeInstr(NarrowTy
, SubMerge
).getReg(0);
340 // In the sign extend padding case, re-use the first all-signbit merge.
341 if (AllMergePartsArePadding
&& !AllPadReg
)
342 AllPadReg
= Remerge
[I
];
345 VRegs
= std::move(Remerge
);
349 void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg
, LLT LCMTy
,
350 ArrayRef
<Register
> RemergeRegs
) {
351 LLT DstTy
= MRI
.getType(DstReg
);
353 // Create the merge to the widened source, and extract the relevant bits into
356 if (DstTy
== LCMTy
) {
357 MIRBuilder
.buildMergeLikeInstr(DstReg
, RemergeRegs
);
361 auto Remerge
= MIRBuilder
.buildMergeLikeInstr(LCMTy
, RemergeRegs
);
362 if (DstTy
.isScalar() && LCMTy
.isScalar()) {
363 MIRBuilder
.buildTrunc(DstReg
, Remerge
);
367 if (LCMTy
.isVector()) {
368 unsigned NumDefs
= LCMTy
.getSizeInBits() / DstTy
.getSizeInBits();
369 SmallVector
<Register
, 8> UnmergeDefs(NumDefs
);
370 UnmergeDefs
[0] = DstReg
;
371 for (unsigned I
= 1; I
!= NumDefs
; ++I
)
372 UnmergeDefs
[I
] = MRI
.createGenericVirtualRegister(DstTy
);
374 MIRBuilder
.buildUnmerge(UnmergeDefs
,
375 MIRBuilder
.buildMergeLikeInstr(LCMTy
, RemergeRegs
));
379 llvm_unreachable("unhandled case");
382 static RTLIB::Libcall
getRTLibDesc(unsigned Opcode
, unsigned Size
) {
383 #define RTLIBCASE_INT(LibcallPrefix) \
387 return RTLIB::LibcallPrefix##32; \
389 return RTLIB::LibcallPrefix##64; \
391 return RTLIB::LibcallPrefix##128; \
393 llvm_unreachable("unexpected size"); \
397 #define RTLIBCASE(LibcallPrefix) \
401 return RTLIB::LibcallPrefix##32; \
403 return RTLIB::LibcallPrefix##64; \
405 return RTLIB::LibcallPrefix##80; \
407 return RTLIB::LibcallPrefix##128; \
409 llvm_unreachable("unexpected size"); \
414 case TargetOpcode::G_MUL
:
415 RTLIBCASE_INT(MUL_I
);
416 case TargetOpcode::G_SDIV
:
417 RTLIBCASE_INT(SDIV_I
);
418 case TargetOpcode::G_UDIV
:
419 RTLIBCASE_INT(UDIV_I
);
420 case TargetOpcode::G_SREM
:
421 RTLIBCASE_INT(SREM_I
);
422 case TargetOpcode::G_UREM
:
423 RTLIBCASE_INT(UREM_I
);
424 case TargetOpcode::G_CTLZ_ZERO_UNDEF
:
425 RTLIBCASE_INT(CTLZ_I
);
426 case TargetOpcode::G_FADD
:
428 case TargetOpcode::G_FSUB
:
430 case TargetOpcode::G_FMUL
:
432 case TargetOpcode::G_FDIV
:
434 case TargetOpcode::G_FEXP
:
436 case TargetOpcode::G_FEXP2
:
438 case TargetOpcode::G_FEXP10
:
440 case TargetOpcode::G_FREM
:
442 case TargetOpcode::G_FPOW
:
444 case TargetOpcode::G_FPOWI
:
446 case TargetOpcode::G_FMA
:
448 case TargetOpcode::G_FSIN
:
450 case TargetOpcode::G_FCOS
:
452 case TargetOpcode::G_FTAN
:
454 case TargetOpcode::G_FASIN
:
456 case TargetOpcode::G_FACOS
:
458 case TargetOpcode::G_FATAN
:
460 case TargetOpcode::G_FATAN2
:
462 case TargetOpcode::G_FSINH
:
464 case TargetOpcode::G_FCOSH
:
466 case TargetOpcode::G_FTANH
:
468 case TargetOpcode::G_FLOG10
:
470 case TargetOpcode::G_FLOG
:
472 case TargetOpcode::G_FLOG2
:
474 case TargetOpcode::G_FLDEXP
:
476 case TargetOpcode::G_FCEIL
:
478 case TargetOpcode::G_FFLOOR
:
480 case TargetOpcode::G_FMINNUM
:
482 case TargetOpcode::G_FMAXNUM
:
484 case TargetOpcode::G_FSQRT
:
486 case TargetOpcode::G_FRINT
:
488 case TargetOpcode::G_FNEARBYINT
:
489 RTLIBCASE(NEARBYINT_F
);
490 case TargetOpcode::G_INTRINSIC_TRUNC
:
492 case TargetOpcode::G_INTRINSIC_ROUND
:
494 case TargetOpcode::G_INTRINSIC_ROUNDEVEN
:
495 RTLIBCASE(ROUNDEVEN_F
);
496 case TargetOpcode::G_INTRINSIC_LRINT
:
498 case TargetOpcode::G_INTRINSIC_LLRINT
:
501 llvm_unreachable("Unknown libcall function");
506 /// True if an instruction is in tail position in its caller. Intended for
507 /// legalizing libcalls as tail calls when possible.
508 static bool isLibCallInTailPosition(const CallLowering::ArgInfo
&Result
,
510 const TargetInstrInfo
&TII
,
511 MachineRegisterInfo
&MRI
) {
512 MachineBasicBlock
&MBB
= *MI
.getParent();
513 const Function
&F
= MBB
.getParent()->getFunction();
515 // Conservatively require the attributes of the call to match those of
516 // the return. Ignore NoAlias and NonNull because they don't affect the
518 AttributeList CallerAttrs
= F
.getAttributes();
519 if (AttrBuilder(F
.getContext(), CallerAttrs
.getRetAttrs())
520 .removeAttribute(Attribute::NoAlias
)
521 .removeAttribute(Attribute::NonNull
)
525 // It's not safe to eliminate the sign / zero extension of the return value.
526 if (CallerAttrs
.hasRetAttr(Attribute::ZExt
) ||
527 CallerAttrs
.hasRetAttr(Attribute::SExt
))
530 // Only tail call if the following instruction is a standard return or if we
531 // have a `thisreturn` callee, and a sequence like:
533 // G_MEMCPY %0, %1, %2
535 // RET_ReallyLR implicit $x0
536 auto Next
= next_nodbg(MI
.getIterator(), MBB
.instr_end());
537 if (Next
!= MBB
.instr_end() && Next
->isCopy()) {
538 if (MI
.getOpcode() == TargetOpcode::G_BZERO
)
541 // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the
542 // mempy/etc routines return the same parameter. For other it will be the
544 Register VReg
= MI
.getOperand(0).getReg();
545 if (!VReg
.isVirtual() || VReg
!= Next
->getOperand(1).getReg())
548 Register PReg
= Next
->getOperand(0).getReg();
549 if (!PReg
.isPhysical())
552 auto Ret
= next_nodbg(Next
, MBB
.instr_end());
553 if (Ret
== MBB
.instr_end() || !Ret
->isReturn())
556 if (Ret
->getNumImplicitOperands() != 1)
559 if (!Ret
->getOperand(0).isReg() || PReg
!= Ret
->getOperand(0).getReg())
562 // Skip over the COPY that we just validated.
566 if (Next
== MBB
.instr_end() || TII
.isTailCall(*Next
) || !Next
->isReturn())
572 LegalizerHelper::LegalizeResult
573 llvm::createLibcall(MachineIRBuilder
&MIRBuilder
, const char *Name
,
574 const CallLowering::ArgInfo
&Result
,
575 ArrayRef
<CallLowering::ArgInfo
> Args
,
576 const CallingConv::ID CC
, LostDebugLocObserver
&LocObserver
,
578 auto &CLI
= *MIRBuilder
.getMF().getSubtarget().getCallLowering();
580 CallLowering::CallLoweringInfo Info
;
582 Info
.Callee
= MachineOperand::CreateES(Name
);
583 Info
.OrigRet
= Result
;
586 (Result
.Ty
->isVoidTy() ||
587 Result
.Ty
== MIRBuilder
.getMF().getFunction().getReturnType()) &&
588 isLibCallInTailPosition(Result
, *MI
, MIRBuilder
.getTII(),
589 *MIRBuilder
.getMRI());
591 std::copy(Args
.begin(), Args
.end(), std::back_inserter(Info
.OrigArgs
));
592 if (!CLI
.lowerCall(MIRBuilder
, Info
))
593 return LegalizerHelper::UnableToLegalize
;
595 if (MI
&& Info
.LoweredTailCall
) {
596 assert(Info
.IsTailCall
&& "Lowered tail call when it wasn't a tail call?");
598 // Check debug locations before removing the return.
599 LocObserver
.checkpoint(true);
601 // We must have a return following the call (or debug insts) to get past
602 // isLibCallInTailPosition.
604 MachineInstr
*Next
= MI
->getNextNode();
606 (Next
->isCopy() || Next
->isReturn() || Next
->isDebugInstr()) &&
607 "Expected instr following MI to be return or debug inst?");
608 // We lowered a tail call, so the call is now the return from the block.
609 // Delete the old return.
610 Next
->eraseFromParent();
611 } while (MI
->getNextNode());
613 // We expect to lose the debug location from the return.
614 LocObserver
.checkpoint(false);
616 return LegalizerHelper::Legalized
;
619 LegalizerHelper::LegalizeResult
620 llvm::createLibcall(MachineIRBuilder
&MIRBuilder
, RTLIB::Libcall Libcall
,
621 const CallLowering::ArgInfo
&Result
,
622 ArrayRef
<CallLowering::ArgInfo
> Args
,
623 LostDebugLocObserver
&LocObserver
, MachineInstr
*MI
) {
624 auto &TLI
= *MIRBuilder
.getMF().getSubtarget().getTargetLowering();
625 const char *Name
= TLI
.getLibcallName(Libcall
);
627 return LegalizerHelper::UnableToLegalize
;
628 const CallingConv::ID CC
= TLI
.getLibcallCallingConv(Libcall
);
629 return createLibcall(MIRBuilder
, Name
, Result
, Args
, CC
, LocObserver
, MI
);
632 // Useful for libcalls where all operands have the same type.
633 static LegalizerHelper::LegalizeResult
634 simpleLibcall(MachineInstr
&MI
, MachineIRBuilder
&MIRBuilder
, unsigned Size
,
635 Type
*OpType
, LostDebugLocObserver
&LocObserver
) {
636 auto Libcall
= getRTLibDesc(MI
.getOpcode(), Size
);
638 // FIXME: What does the original arg index mean here?
639 SmallVector
<CallLowering::ArgInfo
, 3> Args
;
640 for (const MachineOperand
&MO
: llvm::drop_begin(MI
.operands()))
641 Args
.push_back({MO
.getReg(), OpType
, 0});
642 return createLibcall(MIRBuilder
, Libcall
,
643 {MI
.getOperand(0).getReg(), OpType
, 0}, Args
,
647 LegalizerHelper::LegalizeResult
648 llvm::createMemLibcall(MachineIRBuilder
&MIRBuilder
, MachineRegisterInfo
&MRI
,
649 MachineInstr
&MI
, LostDebugLocObserver
&LocObserver
) {
650 auto &Ctx
= MIRBuilder
.getMF().getFunction().getContext();
652 SmallVector
<CallLowering::ArgInfo
, 3> Args
;
653 // Add all the args, except for the last which is an imm denoting 'tail'.
654 for (unsigned i
= 0; i
< MI
.getNumOperands() - 1; ++i
) {
655 Register Reg
= MI
.getOperand(i
).getReg();
657 // Need derive an IR type for call lowering.
658 LLT OpLLT
= MRI
.getType(Reg
);
659 Type
*OpTy
= nullptr;
660 if (OpLLT
.isPointer())
661 OpTy
= PointerType::get(Ctx
, OpLLT
.getAddressSpace());
663 OpTy
= IntegerType::get(Ctx
, OpLLT
.getSizeInBits());
664 Args
.push_back({Reg
, OpTy
, 0});
667 auto &CLI
= *MIRBuilder
.getMF().getSubtarget().getCallLowering();
668 auto &TLI
= *MIRBuilder
.getMF().getSubtarget().getTargetLowering();
669 RTLIB::Libcall RTLibcall
;
670 unsigned Opc
= MI
.getOpcode();
672 case TargetOpcode::G_BZERO
:
673 RTLibcall
= RTLIB::BZERO
;
675 case TargetOpcode::G_MEMCPY
:
676 RTLibcall
= RTLIB::MEMCPY
;
677 Args
[0].Flags
[0].setReturned();
679 case TargetOpcode::G_MEMMOVE
:
680 RTLibcall
= RTLIB::MEMMOVE
;
681 Args
[0].Flags
[0].setReturned();
683 case TargetOpcode::G_MEMSET
:
684 RTLibcall
= RTLIB::MEMSET
;
685 Args
[0].Flags
[0].setReturned();
688 llvm_unreachable("unsupported opcode");
690 const char *Name
= TLI
.getLibcallName(RTLibcall
);
692 // Unsupported libcall on the target.
694 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
695 << MIRBuilder
.getTII().getName(Opc
) << "\n");
696 return LegalizerHelper::UnableToLegalize
;
699 CallLowering::CallLoweringInfo Info
;
700 Info
.CallConv
= TLI
.getLibcallCallingConv(RTLibcall
);
701 Info
.Callee
= MachineOperand::CreateES(Name
);
702 Info
.OrigRet
= CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx
), 0);
704 MI
.getOperand(MI
.getNumOperands() - 1).getImm() &&
705 isLibCallInTailPosition(Info
.OrigRet
, MI
, MIRBuilder
.getTII(), MRI
);
707 std::copy(Args
.begin(), Args
.end(), std::back_inserter(Info
.OrigArgs
));
708 if (!CLI
.lowerCall(MIRBuilder
, Info
))
709 return LegalizerHelper::UnableToLegalize
;
711 if (Info
.LoweredTailCall
) {
712 assert(Info
.IsTailCall
&& "Lowered tail call when it wasn't a tail call?");
714 // Check debug locations before removing the return.
715 LocObserver
.checkpoint(true);
717 // We must have a return following the call (or debug insts) to get past
718 // isLibCallInTailPosition.
720 MachineInstr
*Next
= MI
.getNextNode();
722 (Next
->isCopy() || Next
->isReturn() || Next
->isDebugInstr()) &&
723 "Expected instr following MI to be return or debug inst?");
724 // We lowered a tail call, so the call is now the return from the block.
725 // Delete the old return.
726 Next
->eraseFromParent();
727 } while (MI
.getNextNode());
729 // We expect to lose the debug location from the return.
730 LocObserver
.checkpoint(false);
733 return LegalizerHelper::Legalized
;
736 static RTLIB::Libcall
getOutlineAtomicLibcall(MachineInstr
&MI
) {
737 unsigned Opc
= MI
.getOpcode();
738 auto &AtomicMI
= cast
<GMemOperation
>(MI
);
739 auto &MMO
= AtomicMI
.getMMO();
740 auto Ordering
= MMO
.getMergedOrdering();
741 LLT MemType
= MMO
.getMemoryType();
742 uint64_t MemSize
= MemType
.getSizeInBytes();
743 if (MemType
.isVector())
744 return RTLIB::UNKNOWN_LIBCALL
;
746 #define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
748 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
750 case TargetOpcode::G_ATOMIC_CMPXCHG
:
751 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS
: {
752 const RTLIB::Libcall LC
[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS
)};
753 return getOutlineAtomicHelper(LC
, Ordering
, MemSize
);
755 case TargetOpcode::G_ATOMICRMW_XCHG
: {
756 const RTLIB::Libcall LC
[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP
)};
757 return getOutlineAtomicHelper(LC
, Ordering
, MemSize
);
759 case TargetOpcode::G_ATOMICRMW_ADD
:
760 case TargetOpcode::G_ATOMICRMW_SUB
: {
761 const RTLIB::Libcall LC
[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD
)};
762 return getOutlineAtomicHelper(LC
, Ordering
, MemSize
);
764 case TargetOpcode::G_ATOMICRMW_AND
: {
765 const RTLIB::Libcall LC
[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR
)};
766 return getOutlineAtomicHelper(LC
, Ordering
, MemSize
);
768 case TargetOpcode::G_ATOMICRMW_OR
: {
769 const RTLIB::Libcall LC
[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET
)};
770 return getOutlineAtomicHelper(LC
, Ordering
, MemSize
);
772 case TargetOpcode::G_ATOMICRMW_XOR
: {
773 const RTLIB::Libcall LC
[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR
)};
774 return getOutlineAtomicHelper(LC
, Ordering
, MemSize
);
777 return RTLIB::UNKNOWN_LIBCALL
;
783 static LegalizerHelper::LegalizeResult
784 createAtomicLibcall(MachineIRBuilder
&MIRBuilder
, MachineInstr
&MI
) {
785 auto &Ctx
= MIRBuilder
.getMF().getFunction().getContext();
788 SmallVector
<Register
> RetRegs
;
789 SmallVector
<CallLowering::ArgInfo
, 3> Args
;
790 unsigned Opc
= MI
.getOpcode();
792 case TargetOpcode::G_ATOMIC_CMPXCHG
:
793 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS
: {
796 auto [Ret
, RetLLT
, Mem
, MemLLT
, Cmp
, CmpLLT
, New
, NewLLT
] =
797 MI
.getFirst4RegLLTs();
798 RetRegs
.push_back(Ret
);
799 RetTy
= IntegerType::get(Ctx
, RetLLT
.getSizeInBits());
800 if (Opc
== TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS
) {
801 std::tie(Ret
, RetLLT
, Success
, SuccessLLT
, Mem
, MemLLT
, Cmp
, CmpLLT
, New
,
802 NewLLT
) = MI
.getFirst5RegLLTs();
803 RetRegs
.push_back(Success
);
804 RetTy
= StructType::get(
805 Ctx
, {RetTy
, IntegerType::get(Ctx
, SuccessLLT
.getSizeInBits())});
807 Args
.push_back({Cmp
, IntegerType::get(Ctx
, CmpLLT
.getSizeInBits()), 0});
808 Args
.push_back({New
, IntegerType::get(Ctx
, NewLLT
.getSizeInBits()), 0});
809 Args
.push_back({Mem
, PointerType::get(Ctx
, MemLLT
.getAddressSpace()), 0});
812 case TargetOpcode::G_ATOMICRMW_XCHG
:
813 case TargetOpcode::G_ATOMICRMW_ADD
:
814 case TargetOpcode::G_ATOMICRMW_SUB
:
815 case TargetOpcode::G_ATOMICRMW_AND
:
816 case TargetOpcode::G_ATOMICRMW_OR
:
817 case TargetOpcode::G_ATOMICRMW_XOR
: {
818 auto [Ret
, RetLLT
, Mem
, MemLLT
, Val
, ValLLT
] = MI
.getFirst3RegLLTs();
819 RetRegs
.push_back(Ret
);
820 RetTy
= IntegerType::get(Ctx
, RetLLT
.getSizeInBits());
821 if (Opc
== TargetOpcode::G_ATOMICRMW_AND
)
823 MIRBuilder
.buildXor(ValLLT
, MIRBuilder
.buildConstant(ValLLT
, -1), Val
)
825 else if (Opc
== TargetOpcode::G_ATOMICRMW_SUB
)
827 MIRBuilder
.buildSub(ValLLT
, MIRBuilder
.buildConstant(ValLLT
, 0), Val
)
829 Args
.push_back({Val
, IntegerType::get(Ctx
, ValLLT
.getSizeInBits()), 0});
830 Args
.push_back({Mem
, PointerType::get(Ctx
, MemLLT
.getAddressSpace()), 0});
834 llvm_unreachable("unsupported opcode");
837 auto &CLI
= *MIRBuilder
.getMF().getSubtarget().getCallLowering();
838 auto &TLI
= *MIRBuilder
.getMF().getSubtarget().getTargetLowering();
839 RTLIB::Libcall RTLibcall
= getOutlineAtomicLibcall(MI
);
840 const char *Name
= TLI
.getLibcallName(RTLibcall
);
842 // Unsupported libcall on the target.
844 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
845 << MIRBuilder
.getTII().getName(Opc
) << "\n");
846 return LegalizerHelper::UnableToLegalize
;
849 CallLowering::CallLoweringInfo Info
;
850 Info
.CallConv
= TLI
.getLibcallCallingConv(RTLibcall
);
851 Info
.Callee
= MachineOperand::CreateES(Name
);
852 Info
.OrigRet
= CallLowering::ArgInfo(RetRegs
, RetTy
, 0);
854 std::copy(Args
.begin(), Args
.end(), std::back_inserter(Info
.OrigArgs
));
855 if (!CLI
.lowerCall(MIRBuilder
, Info
))
856 return LegalizerHelper::UnableToLegalize
;
858 return LegalizerHelper::Legalized
;
861 static RTLIB::Libcall
getConvRTLibDesc(unsigned Opcode
, Type
*ToType
,
863 auto ToMVT
= MVT::getVT(ToType
);
864 auto FromMVT
= MVT::getVT(FromType
);
867 case TargetOpcode::G_FPEXT
:
868 return RTLIB::getFPEXT(FromMVT
, ToMVT
);
869 case TargetOpcode::G_FPTRUNC
:
870 return RTLIB::getFPROUND(FromMVT
, ToMVT
);
871 case TargetOpcode::G_FPTOSI
:
872 return RTLIB::getFPTOSINT(FromMVT
, ToMVT
);
873 case TargetOpcode::G_FPTOUI
:
874 return RTLIB::getFPTOUINT(FromMVT
, ToMVT
);
875 case TargetOpcode::G_SITOFP
:
876 return RTLIB::getSINTTOFP(FromMVT
, ToMVT
);
877 case TargetOpcode::G_UITOFP
:
878 return RTLIB::getUINTTOFP(FromMVT
, ToMVT
);
880 llvm_unreachable("Unsupported libcall function");
883 static LegalizerHelper::LegalizeResult
884 conversionLibcall(MachineInstr
&MI
, MachineIRBuilder
&MIRBuilder
, Type
*ToType
,
885 Type
*FromType
, LostDebugLocObserver
&LocObserver
,
886 const TargetLowering
&TLI
, bool IsSigned
= false) {
887 CallLowering::ArgInfo Arg
= {MI
.getOperand(1).getReg(), FromType
, 0};
888 if (FromType
->isIntegerTy()) {
889 if (TLI
.shouldSignExtendTypeInLibCall(FromType
, IsSigned
))
890 Arg
.Flags
[0].setSExt();
892 Arg
.Flags
[0].setZExt();
895 RTLIB::Libcall Libcall
= getConvRTLibDesc(MI
.getOpcode(), ToType
, FromType
);
896 return createLibcall(MIRBuilder
, Libcall
,
897 {MI
.getOperand(0).getReg(), ToType
, 0}, Arg
, LocObserver
,
901 static RTLIB::Libcall
902 getStateLibraryFunctionFor(MachineInstr
&MI
, const TargetLowering
&TLI
) {
903 RTLIB::Libcall RTLibcall
;
904 switch (MI
.getOpcode()) {
905 case TargetOpcode::G_GET_FPENV
:
906 RTLibcall
= RTLIB::FEGETENV
;
908 case TargetOpcode::G_SET_FPENV
:
909 case TargetOpcode::G_RESET_FPENV
:
910 RTLibcall
= RTLIB::FESETENV
;
912 case TargetOpcode::G_GET_FPMODE
:
913 RTLibcall
= RTLIB::FEGETMODE
;
915 case TargetOpcode::G_SET_FPMODE
:
916 case TargetOpcode::G_RESET_FPMODE
:
917 RTLibcall
= RTLIB::FESETMODE
;
920 llvm_unreachable("Unexpected opcode");
925 // Some library functions that read FP state (fegetmode, fegetenv) write the
926 // state into a region in memory. IR intrinsics that do the same operations
927 // (get_fpmode, get_fpenv) return the state as integer value. To implement these
928 // intrinsics via the library functions, we need to use temporary variable,
931 // %0:_(s32) = G_GET_FPMODE
933 // is transformed to:
935 // %1:_(p0) = G_FRAME_INDEX %stack.0
937 // %0:_(s32) = G_LOAD % 1
939 LegalizerHelper::LegalizeResult
940 LegalizerHelper::createGetStateLibcall(MachineIRBuilder
&MIRBuilder
,
942 LostDebugLocObserver
&LocObserver
) {
943 const DataLayout
&DL
= MIRBuilder
.getDataLayout();
944 auto &MF
= MIRBuilder
.getMF();
945 auto &MRI
= *MIRBuilder
.getMRI();
946 auto &Ctx
= MF
.getFunction().getContext();
948 // Create temporary, where library function will put the read state.
949 Register Dst
= MI
.getOperand(0).getReg();
950 LLT StateTy
= MRI
.getType(Dst
);
951 TypeSize StateSize
= StateTy
.getSizeInBytes();
952 Align TempAlign
= getStackTemporaryAlignment(StateTy
);
953 MachinePointerInfo TempPtrInfo
;
954 auto Temp
= createStackTemporary(StateSize
, TempAlign
, TempPtrInfo
);
956 // Create a call to library function, with the temporary as an argument.
957 unsigned TempAddrSpace
= DL
.getAllocaAddrSpace();
958 Type
*StatePtrTy
= PointerType::get(Ctx
, TempAddrSpace
);
959 RTLIB::Libcall RTLibcall
= getStateLibraryFunctionFor(MI
, TLI
);
961 createLibcall(MIRBuilder
, RTLibcall
,
962 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx
), 0),
963 CallLowering::ArgInfo({Temp
.getReg(0), StatePtrTy
, 0}),
964 LocObserver
, nullptr);
965 if (Res
!= LegalizerHelper::Legalized
)
968 // Create a load from the temporary.
969 MachineMemOperand
*MMO
= MF
.getMachineMemOperand(
970 TempPtrInfo
, MachineMemOperand::MOLoad
, StateTy
, TempAlign
);
971 MIRBuilder
.buildLoadInstr(TargetOpcode::G_LOAD
, Dst
, Temp
, *MMO
);
973 return LegalizerHelper::Legalized
;
976 // Similar to `createGetStateLibcall` the function calls a library function
977 // using transient space in stack. In this case the library function reads
978 // content of memory region.
979 LegalizerHelper::LegalizeResult
980 LegalizerHelper::createSetStateLibcall(MachineIRBuilder
&MIRBuilder
,
982 LostDebugLocObserver
&LocObserver
) {
983 const DataLayout
&DL
= MIRBuilder
.getDataLayout();
984 auto &MF
= MIRBuilder
.getMF();
985 auto &MRI
= *MIRBuilder
.getMRI();
986 auto &Ctx
= MF
.getFunction().getContext();
988 // Create temporary, where library function will get the new state.
989 Register Src
= MI
.getOperand(0).getReg();
990 LLT StateTy
= MRI
.getType(Src
);
991 TypeSize StateSize
= StateTy
.getSizeInBytes();
992 Align TempAlign
= getStackTemporaryAlignment(StateTy
);
993 MachinePointerInfo TempPtrInfo
;
994 auto Temp
= createStackTemporary(StateSize
, TempAlign
, TempPtrInfo
);
996 // Put the new state into the temporary.
997 MachineMemOperand
*MMO
= MF
.getMachineMemOperand(
998 TempPtrInfo
, MachineMemOperand::MOStore
, StateTy
, TempAlign
);
999 MIRBuilder
.buildStore(Src
, Temp
, *MMO
);
1001 // Create a call to library function, with the temporary as an argument.
1002 unsigned TempAddrSpace
= DL
.getAllocaAddrSpace();
1003 Type
*StatePtrTy
= PointerType::get(Ctx
, TempAddrSpace
);
1004 RTLIB::Libcall RTLibcall
= getStateLibraryFunctionFor(MI
, TLI
);
1005 return createLibcall(MIRBuilder
, RTLibcall
,
1006 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx
), 0),
1007 CallLowering::ArgInfo({Temp
.getReg(0), StatePtrTy
, 0}),
1008 LocObserver
, nullptr);
1011 /// Returns the corresponding libcall for the given Pred and
1012 /// the ICMP predicate that should be generated to compare with #0
1013 /// after the libcall.
1014 static std::pair
<RTLIB::Libcall
, CmpInst::Predicate
>
1015 getFCMPLibcallDesc(const CmpInst::Predicate Pred
, unsigned Size
) {
1016 #define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
1020 return {RTLIB::LibcallPrefix##32, ICmpPred}; \
1022 return {RTLIB::LibcallPrefix##64, ICmpPred}; \
1024 return {RTLIB::LibcallPrefix##128, ICmpPred}; \
1026 llvm_unreachable("unexpected size"); \
1031 case CmpInst::FCMP_OEQ
:
1032 RTLIBCASE_CMP(OEQ_F
, CmpInst::ICMP_EQ
);
1033 case CmpInst::FCMP_UNE
:
1034 RTLIBCASE_CMP(UNE_F
, CmpInst::ICMP_NE
);
1035 case CmpInst::FCMP_OGE
:
1036 RTLIBCASE_CMP(OGE_F
, CmpInst::ICMP_SGE
);
1037 case CmpInst::FCMP_OLT
:
1038 RTLIBCASE_CMP(OLT_F
, CmpInst::ICMP_SLT
);
1039 case CmpInst::FCMP_OLE
:
1040 RTLIBCASE_CMP(OLE_F
, CmpInst::ICMP_SLE
);
1041 case CmpInst::FCMP_OGT
:
1042 RTLIBCASE_CMP(OGT_F
, CmpInst::ICMP_SGT
);
1043 case CmpInst::FCMP_UNO
:
1044 RTLIBCASE_CMP(UO_F
, CmpInst::ICMP_NE
);
1046 return {RTLIB::UNKNOWN_LIBCALL
, CmpInst::BAD_ICMP_PREDICATE
};
1050 LegalizerHelper::LegalizeResult
1051 LegalizerHelper::createFCMPLibcall(MachineIRBuilder
&MIRBuilder
,
1053 LostDebugLocObserver
&LocObserver
) {
1054 auto &MF
= MIRBuilder
.getMF();
1055 auto &Ctx
= MF
.getFunction().getContext();
1056 const GFCmp
*Cmp
= cast
<GFCmp
>(&MI
);
1058 LLT OpLLT
= MRI
.getType(Cmp
->getLHSReg());
1059 unsigned Size
= OpLLT
.getSizeInBits();
1060 if ((Size
!= 32 && Size
!= 64 && Size
!= 128) ||
1061 OpLLT
!= MRI
.getType(Cmp
->getRHSReg()))
1062 return UnableToLegalize
;
1064 Type
*OpType
= getFloatTypeForLLT(Ctx
, OpLLT
);
1066 // DstReg type is s32
1067 const Register DstReg
= Cmp
->getReg(0);
1068 LLT DstTy
= MRI
.getType(DstReg
);
1069 const auto Cond
= Cmp
->getCond();
1072 // https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html#Comparison-functions-1
1073 // Generates a libcall followed by ICMP.
1074 const auto BuildLibcall
= [&](const RTLIB::Libcall Libcall
,
1075 const CmpInst::Predicate ICmpPred
,
1076 const DstOp
&Res
) -> Register
{
1077 // FCMP libcall always returns an i32, and needs an ICMP with #0.
1078 constexpr LLT TempLLT
= LLT::scalar(32);
1079 Register Temp
= MRI
.createGenericVirtualRegister(TempLLT
);
1080 // Generate libcall, holding result in Temp
1081 const auto Status
= createLibcall(
1082 MIRBuilder
, Libcall
, {Temp
, Type::getInt32Ty(Ctx
), 0},
1083 {{Cmp
->getLHSReg(), OpType
, 0}, {Cmp
->getRHSReg(), OpType
, 1}},
1088 // Compare temp with #0 to get the final result.
1090 .buildICmp(ICmpPred
, Res
, Temp
, MIRBuilder
.buildConstant(TempLLT
, 0))
1094 // Simple case if we have a direct mapping from predicate to libcall
1095 if (const auto [Libcall
, ICmpPred
] = getFCMPLibcallDesc(Cond
, Size
);
1096 Libcall
!= RTLIB::UNKNOWN_LIBCALL
&&
1097 ICmpPred
!= CmpInst::BAD_ICMP_PREDICATE
) {
1098 if (BuildLibcall(Libcall
, ICmpPred
, DstReg
)) {
1101 return UnableToLegalize
;
1104 // No direct mapping found, should be generated as combination of libcalls.
1107 case CmpInst::FCMP_UEQ
: {
1108 // FCMP_UEQ: unordered or equal
1109 // Convert into (FCMP_OEQ || FCMP_UNO).
1111 const auto [OeqLibcall
, OeqPred
] =
1112 getFCMPLibcallDesc(CmpInst::FCMP_OEQ
, Size
);
1113 const auto Oeq
= BuildLibcall(OeqLibcall
, OeqPred
, DstTy
);
1115 const auto [UnoLibcall
, UnoPred
] =
1116 getFCMPLibcallDesc(CmpInst::FCMP_UNO
, Size
);
1117 const auto Uno
= BuildLibcall(UnoLibcall
, UnoPred
, DstTy
);
1119 MIRBuilder
.buildOr(DstReg
, Oeq
, Uno
);
1121 return UnableToLegalize
;
1125 case CmpInst::FCMP_ONE
: {
1126 // FCMP_ONE: ordered and operands are unequal
1127 // Convert into (!FCMP_OEQ && !FCMP_UNO).
1129 // We inverse the predicate instead of generating a NOT
1130 // to save one instruction.
1131 // On AArch64 isel can even select two cmp into a single ccmp.
1132 const auto [OeqLibcall
, OeqPred
] =
1133 getFCMPLibcallDesc(CmpInst::FCMP_OEQ
, Size
);
1135 BuildLibcall(OeqLibcall
, CmpInst::getInversePredicate(OeqPred
), DstTy
);
1137 const auto [UnoLibcall
, UnoPred
] =
1138 getFCMPLibcallDesc(CmpInst::FCMP_UNO
, Size
);
1140 BuildLibcall(UnoLibcall
, CmpInst::getInversePredicate(UnoPred
), DstTy
);
1142 if (NotOeq
&& NotUno
)
1143 MIRBuilder
.buildAnd(DstReg
, NotOeq
, NotUno
);
1145 return UnableToLegalize
;
1149 case CmpInst::FCMP_ULT
:
1150 case CmpInst::FCMP_UGE
:
1151 case CmpInst::FCMP_UGT
:
1152 case CmpInst::FCMP_ULE
:
1153 case CmpInst::FCMP_ORD
: {
1154 // Convert into: !(inverse(Pred))
1155 // E.g. FCMP_ULT becomes !FCMP_OGE
1156 // This is equivalent to the following, but saves some instructions.
1157 // MIRBuilder.buildNot(
1159 // MIRBuilder.buildFCmp(CmpInst::getInversePredicate(Pred), PredTy,
1161 const auto [InversedLibcall
, InversedPred
] =
1162 getFCMPLibcallDesc(CmpInst::getInversePredicate(Cond
), Size
);
1163 if (!BuildLibcall(InversedLibcall
,
1164 CmpInst::getInversePredicate(InversedPred
), DstReg
))
1165 return UnableToLegalize
;
1169 return UnableToLegalize
;
1175 // The function is used to legalize operations that set default environment
1176 // state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
1177 // On most targets supported in glibc FE_DFL_MODE is defined as
1178 // `((const femode_t *) -1)`. Such assumption is used here. If for some target
1179 // it is not true, the target must provide custom lowering.
1180 LegalizerHelper::LegalizeResult
1181 LegalizerHelper::createResetStateLibcall(MachineIRBuilder
&MIRBuilder
,
1183 LostDebugLocObserver
&LocObserver
) {
1184 const DataLayout
&DL
= MIRBuilder
.getDataLayout();
1185 auto &MF
= MIRBuilder
.getMF();
1186 auto &Ctx
= MF
.getFunction().getContext();
1188 // Create an argument for the library function.
1189 unsigned AddrSpace
= DL
.getDefaultGlobalsAddressSpace();
1190 Type
*StatePtrTy
= PointerType::get(Ctx
, AddrSpace
);
1191 unsigned PtrSize
= DL
.getPointerSizeInBits(AddrSpace
);
1192 LLT MemTy
= LLT::pointer(AddrSpace
, PtrSize
);
1193 auto DefValue
= MIRBuilder
.buildConstant(LLT::scalar(PtrSize
), -1LL);
1194 DstOp
Dest(MRI
.createGenericVirtualRegister(MemTy
));
1195 MIRBuilder
.buildIntToPtr(Dest
, DefValue
);
1197 RTLIB::Libcall RTLibcall
= getStateLibraryFunctionFor(MI
, TLI
);
1198 return createLibcall(MIRBuilder
, RTLibcall
,
1199 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx
), 0),
1200 CallLowering::ArgInfo({Dest
.getReg(), StatePtrTy
, 0}),
1204 LegalizerHelper::LegalizeResult
1205 LegalizerHelper::libcall(MachineInstr
&MI
, LostDebugLocObserver
&LocObserver
) {
1206 auto &Ctx
= MIRBuilder
.getMF().getFunction().getContext();
1208 switch (MI
.getOpcode()) {
1210 return UnableToLegalize
;
1211 case TargetOpcode::G_MUL
:
1212 case TargetOpcode::G_SDIV
:
1213 case TargetOpcode::G_UDIV
:
1214 case TargetOpcode::G_SREM
:
1215 case TargetOpcode::G_UREM
:
1216 case TargetOpcode::G_CTLZ_ZERO_UNDEF
: {
1217 LLT LLTy
= MRI
.getType(MI
.getOperand(0).getReg());
1218 unsigned Size
= LLTy
.getSizeInBits();
1219 Type
*HLTy
= IntegerType::get(Ctx
, Size
);
1220 auto Status
= simpleLibcall(MI
, MIRBuilder
, Size
, HLTy
, LocObserver
);
1221 if (Status
!= Legalized
)
1225 case TargetOpcode::G_FADD
:
1226 case TargetOpcode::G_FSUB
:
1227 case TargetOpcode::G_FMUL
:
1228 case TargetOpcode::G_FDIV
:
1229 case TargetOpcode::G_FMA
:
1230 case TargetOpcode::G_FPOW
:
1231 case TargetOpcode::G_FREM
:
1232 case TargetOpcode::G_FCOS
:
1233 case TargetOpcode::G_FSIN
:
1234 case TargetOpcode::G_FTAN
:
1235 case TargetOpcode::G_FACOS
:
1236 case TargetOpcode::G_FASIN
:
1237 case TargetOpcode::G_FATAN
:
1238 case TargetOpcode::G_FATAN2
:
1239 case TargetOpcode::G_FCOSH
:
1240 case TargetOpcode::G_FSINH
:
1241 case TargetOpcode::G_FTANH
:
1242 case TargetOpcode::G_FLOG10
:
1243 case TargetOpcode::G_FLOG
:
1244 case TargetOpcode::G_FLOG2
:
1245 case TargetOpcode::G_FEXP
:
1246 case TargetOpcode::G_FEXP2
:
1247 case TargetOpcode::G_FEXP10
:
1248 case TargetOpcode::G_FCEIL
:
1249 case TargetOpcode::G_FFLOOR
:
1250 case TargetOpcode::G_FMINNUM
:
1251 case TargetOpcode::G_FMAXNUM
:
1252 case TargetOpcode::G_FSQRT
:
1253 case TargetOpcode::G_FRINT
:
1254 case TargetOpcode::G_FNEARBYINT
:
1255 case TargetOpcode::G_INTRINSIC_TRUNC
:
1256 case TargetOpcode::G_INTRINSIC_ROUND
:
1257 case TargetOpcode::G_INTRINSIC_ROUNDEVEN
: {
1258 LLT LLTy
= MRI
.getType(MI
.getOperand(0).getReg());
1259 unsigned Size
= LLTy
.getSizeInBits();
1260 Type
*HLTy
= getFloatTypeForLLT(Ctx
, LLTy
);
1261 if (!HLTy
|| (Size
!= 32 && Size
!= 64 && Size
!= 80 && Size
!= 128)) {
1262 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy
<< ".\n");
1263 return UnableToLegalize
;
1265 auto Status
= simpleLibcall(MI
, MIRBuilder
, Size
, HLTy
, LocObserver
);
1266 if (Status
!= Legalized
)
1270 case TargetOpcode::G_INTRINSIC_LRINT
:
1271 case TargetOpcode::G_INTRINSIC_LLRINT
: {
1272 LLT LLTy
= MRI
.getType(MI
.getOperand(1).getReg());
1273 unsigned Size
= LLTy
.getSizeInBits();
1274 Type
*HLTy
= getFloatTypeForLLT(Ctx
, LLTy
);
1275 Type
*ITy
= IntegerType::get(
1276 Ctx
, MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits());
1277 if (!HLTy
|| (Size
!= 32 && Size
!= 64 && Size
!= 80 && Size
!= 128)) {
1278 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy
<< ".\n");
1279 return UnableToLegalize
;
1281 auto Libcall
= getRTLibDesc(MI
.getOpcode(), Size
);
1282 LegalizeResult Status
=
1283 createLibcall(MIRBuilder
, Libcall
, {MI
.getOperand(0).getReg(), ITy
, 0},
1284 {{MI
.getOperand(1).getReg(), HLTy
, 0}}, LocObserver
, &MI
);
1285 if (Status
!= Legalized
)
1287 MI
.eraseFromParent();
1290 case TargetOpcode::G_FPOWI
:
1291 case TargetOpcode::G_FLDEXP
: {
1292 LLT LLTy
= MRI
.getType(MI
.getOperand(0).getReg());
1293 unsigned Size
= LLTy
.getSizeInBits();
1294 Type
*HLTy
= getFloatTypeForLLT(Ctx
, LLTy
);
1295 Type
*ITy
= IntegerType::get(
1296 Ctx
, MRI
.getType(MI
.getOperand(2).getReg()).getSizeInBits());
1297 if (!HLTy
|| (Size
!= 32 && Size
!= 64 && Size
!= 80 && Size
!= 128)) {
1298 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy
<< ".\n");
1299 return UnableToLegalize
;
1301 auto Libcall
= getRTLibDesc(MI
.getOpcode(), Size
);
1302 SmallVector
<CallLowering::ArgInfo
, 2> Args
= {
1303 {MI
.getOperand(1).getReg(), HLTy
, 0},
1304 {MI
.getOperand(2).getReg(), ITy
, 1}};
1305 Args
[1].Flags
[0].setSExt();
1306 LegalizeResult Status
=
1307 createLibcall(MIRBuilder
, Libcall
, {MI
.getOperand(0).getReg(), HLTy
, 0},
1308 Args
, LocObserver
, &MI
);
1309 if (Status
!= Legalized
)
1313 case TargetOpcode::G_FPEXT
:
1314 case TargetOpcode::G_FPTRUNC
: {
1315 Type
*FromTy
= getFloatTypeForLLT(Ctx
, MRI
.getType(MI
.getOperand(1).getReg()));
1316 Type
*ToTy
= getFloatTypeForLLT(Ctx
, MRI
.getType(MI
.getOperand(0).getReg()));
1317 if (!FromTy
|| !ToTy
)
1318 return UnableToLegalize
;
1319 LegalizeResult Status
=
1320 conversionLibcall(MI
, MIRBuilder
, ToTy
, FromTy
, LocObserver
, TLI
);
1321 if (Status
!= Legalized
)
1325 case TargetOpcode::G_FCMP
: {
1326 LegalizeResult Status
= createFCMPLibcall(MIRBuilder
, MI
, LocObserver
);
1327 if (Status
!= Legalized
)
1329 MI
.eraseFromParent();
1332 case TargetOpcode::G_FPTOSI
:
1333 case TargetOpcode::G_FPTOUI
: {
1334 // FIXME: Support other types
1336 getFloatTypeForLLT(Ctx
, MRI
.getType(MI
.getOperand(1).getReg()));
1337 unsigned ToSize
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
1338 if ((ToSize
!= 32 && ToSize
!= 64 && ToSize
!= 128) || !FromTy
)
1339 return UnableToLegalize
;
1340 LegalizeResult Status
= conversionLibcall(
1341 MI
, MIRBuilder
, Type::getIntNTy(Ctx
, ToSize
), FromTy
, LocObserver
, TLI
);
1342 if (Status
!= Legalized
)
1346 case TargetOpcode::G_SITOFP
:
1347 case TargetOpcode::G_UITOFP
: {
1348 unsigned FromSize
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
1350 getFloatTypeForLLT(Ctx
, MRI
.getType(MI
.getOperand(0).getReg()));
1351 if ((FromSize
!= 32 && FromSize
!= 64 && FromSize
!= 128) || !ToTy
)
1352 return UnableToLegalize
;
1353 bool IsSigned
= MI
.getOpcode() == TargetOpcode::G_SITOFP
;
1354 LegalizeResult Status
=
1355 conversionLibcall(MI
, MIRBuilder
, ToTy
, Type::getIntNTy(Ctx
, FromSize
),
1356 LocObserver
, TLI
, IsSigned
);
1357 if (Status
!= Legalized
)
1361 case TargetOpcode::G_ATOMICRMW_XCHG
:
1362 case TargetOpcode::G_ATOMICRMW_ADD
:
1363 case TargetOpcode::G_ATOMICRMW_SUB
:
1364 case TargetOpcode::G_ATOMICRMW_AND
:
1365 case TargetOpcode::G_ATOMICRMW_OR
:
1366 case TargetOpcode::G_ATOMICRMW_XOR
:
1367 case TargetOpcode::G_ATOMIC_CMPXCHG
:
1368 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS
: {
1369 auto Status
= createAtomicLibcall(MIRBuilder
, MI
);
1370 if (Status
!= Legalized
)
1374 case TargetOpcode::G_BZERO
:
1375 case TargetOpcode::G_MEMCPY
:
1376 case TargetOpcode::G_MEMMOVE
:
1377 case TargetOpcode::G_MEMSET
: {
1378 LegalizeResult Result
=
1379 createMemLibcall(MIRBuilder
, *MIRBuilder
.getMRI(), MI
, LocObserver
);
1380 if (Result
!= Legalized
)
1382 MI
.eraseFromParent();
1385 case TargetOpcode::G_GET_FPENV
:
1386 case TargetOpcode::G_GET_FPMODE
: {
1387 LegalizeResult Result
= createGetStateLibcall(MIRBuilder
, MI
, LocObserver
);
1388 if (Result
!= Legalized
)
1392 case TargetOpcode::G_SET_FPENV
:
1393 case TargetOpcode::G_SET_FPMODE
: {
1394 LegalizeResult Result
= createSetStateLibcall(MIRBuilder
, MI
, LocObserver
);
1395 if (Result
!= Legalized
)
1399 case TargetOpcode::G_RESET_FPENV
:
1400 case TargetOpcode::G_RESET_FPMODE
: {
1401 LegalizeResult Result
=
1402 createResetStateLibcall(MIRBuilder
, MI
, LocObserver
);
1403 if (Result
!= Legalized
)
1409 MI
.eraseFromParent();
1413 LegalizerHelper::LegalizeResult
LegalizerHelper::narrowScalar(MachineInstr
&MI
,
1416 uint64_t SizeOp0
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
1417 uint64_t NarrowSize
= NarrowTy
.getSizeInBits();
1419 switch (MI
.getOpcode()) {
1421 return UnableToLegalize
;
1422 case TargetOpcode::G_IMPLICIT_DEF
: {
1423 Register DstReg
= MI
.getOperand(0).getReg();
1424 LLT DstTy
= MRI
.getType(DstReg
);
1426 // If SizeOp0 is not an exact multiple of NarrowSize, emit
1427 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
1428 // FIXME: Although this would also be legal for the general case, it causes
1429 // a lot of regressions in the emitted code (superfluous COPYs, artifact
1430 // combines not being hit). This seems to be a problem related to the
1431 // artifact combiner.
1432 if (SizeOp0
% NarrowSize
!= 0) {
1433 LLT ImplicitTy
= NarrowTy
;
1434 if (DstTy
.isVector())
1435 ImplicitTy
= LLT::vector(DstTy
.getElementCount(), ImplicitTy
);
1437 Register ImplicitReg
= MIRBuilder
.buildUndef(ImplicitTy
).getReg(0);
1438 MIRBuilder
.buildAnyExt(DstReg
, ImplicitReg
);
1440 MI
.eraseFromParent();
1444 int NumParts
= SizeOp0
/ NarrowSize
;
1446 SmallVector
<Register
, 2> DstRegs
;
1447 for (int i
= 0; i
< NumParts
; ++i
)
1448 DstRegs
.push_back(MIRBuilder
.buildUndef(NarrowTy
).getReg(0));
1450 if (DstTy
.isVector())
1451 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
1453 MIRBuilder
.buildMergeLikeInstr(DstReg
, DstRegs
);
1454 MI
.eraseFromParent();
1457 case TargetOpcode::G_CONSTANT
: {
1458 LLT Ty
= MRI
.getType(MI
.getOperand(0).getReg());
1459 const APInt
&Val
= MI
.getOperand(1).getCImm()->getValue();
1460 unsigned TotalSize
= Ty
.getSizeInBits();
1461 unsigned NarrowSize
= NarrowTy
.getSizeInBits();
1462 int NumParts
= TotalSize
/ NarrowSize
;
1464 SmallVector
<Register
, 4> PartRegs
;
1465 for (int I
= 0; I
!= NumParts
; ++I
) {
1466 unsigned Offset
= I
* NarrowSize
;
1467 auto K
= MIRBuilder
.buildConstant(NarrowTy
,
1468 Val
.lshr(Offset
).trunc(NarrowSize
));
1469 PartRegs
.push_back(K
.getReg(0));
1473 unsigned LeftoverBits
= TotalSize
- NumParts
* NarrowSize
;
1474 SmallVector
<Register
, 1> LeftoverRegs
;
1475 if (LeftoverBits
!= 0) {
1476 LeftoverTy
= LLT::scalar(LeftoverBits
);
1477 auto K
= MIRBuilder
.buildConstant(
1479 Val
.lshr(NumParts
* NarrowSize
).trunc(LeftoverBits
));
1480 LeftoverRegs
.push_back(K
.getReg(0));
1483 insertParts(MI
.getOperand(0).getReg(),
1484 Ty
, NarrowTy
, PartRegs
, LeftoverTy
, LeftoverRegs
);
1486 MI
.eraseFromParent();
1489 case TargetOpcode::G_SEXT
:
1490 case TargetOpcode::G_ZEXT
:
1491 case TargetOpcode::G_ANYEXT
:
1492 return narrowScalarExt(MI
, TypeIdx
, NarrowTy
);
1493 case TargetOpcode::G_TRUNC
: {
1495 return UnableToLegalize
;
1497 uint64_t SizeOp1
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
1498 if (NarrowTy
.getSizeInBits() * 2 != SizeOp1
) {
1499 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy
<< "\n");
1500 return UnableToLegalize
;
1503 auto Unmerge
= MIRBuilder
.buildUnmerge(NarrowTy
, MI
.getOperand(1));
1504 MIRBuilder
.buildCopy(MI
.getOperand(0), Unmerge
.getReg(0));
1505 MI
.eraseFromParent();
1508 case TargetOpcode::G_CONSTANT_FOLD_BARRIER
:
1509 case TargetOpcode::G_FREEZE
: {
1511 return UnableToLegalize
;
1513 LLT Ty
= MRI
.getType(MI
.getOperand(0).getReg());
1514 // Should widen scalar first
1515 if (Ty
.getSizeInBits() % NarrowTy
.getSizeInBits() != 0)
1516 return UnableToLegalize
;
1518 auto Unmerge
= MIRBuilder
.buildUnmerge(NarrowTy
, MI
.getOperand(1).getReg());
1519 SmallVector
<Register
, 8> Parts
;
1520 for (unsigned i
= 0; i
< Unmerge
->getNumDefs(); ++i
) {
1522 MIRBuilder
.buildInstr(MI
.getOpcode(), {NarrowTy
}, {Unmerge
.getReg(i
)})
1526 MIRBuilder
.buildMergeLikeInstr(MI
.getOperand(0).getReg(), Parts
);
1527 MI
.eraseFromParent();
1530 case TargetOpcode::G_ADD
:
1531 case TargetOpcode::G_SUB
:
1532 case TargetOpcode::G_SADDO
:
1533 case TargetOpcode::G_SSUBO
:
1534 case TargetOpcode::G_SADDE
:
1535 case TargetOpcode::G_SSUBE
:
1536 case TargetOpcode::G_UADDO
:
1537 case TargetOpcode::G_USUBO
:
1538 case TargetOpcode::G_UADDE
:
1539 case TargetOpcode::G_USUBE
:
1540 return narrowScalarAddSub(MI
, TypeIdx
, NarrowTy
);
1541 case TargetOpcode::G_MUL
:
1542 case TargetOpcode::G_UMULH
:
1543 return narrowScalarMul(MI
, NarrowTy
);
1544 case TargetOpcode::G_EXTRACT
:
1545 return narrowScalarExtract(MI
, TypeIdx
, NarrowTy
);
1546 case TargetOpcode::G_INSERT
:
1547 return narrowScalarInsert(MI
, TypeIdx
, NarrowTy
);
1548 case TargetOpcode::G_LOAD
: {
1549 auto &LoadMI
= cast
<GLoad
>(MI
);
1550 Register DstReg
= LoadMI
.getDstReg();
1551 LLT DstTy
= MRI
.getType(DstReg
);
1552 if (DstTy
.isVector())
1553 return UnableToLegalize
;
1555 if (8 * LoadMI
.getMemSize().getValue() != DstTy
.getSizeInBits()) {
1556 Register TmpReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
1557 MIRBuilder
.buildLoad(TmpReg
, LoadMI
.getPointerReg(), LoadMI
.getMMO());
1558 MIRBuilder
.buildAnyExt(DstReg
, TmpReg
);
1559 LoadMI
.eraseFromParent();
1563 return reduceLoadStoreWidth(LoadMI
, TypeIdx
, NarrowTy
);
1565 case TargetOpcode::G_ZEXTLOAD
:
1566 case TargetOpcode::G_SEXTLOAD
: {
1567 auto &LoadMI
= cast
<GExtLoad
>(MI
);
1568 Register DstReg
= LoadMI
.getDstReg();
1569 Register PtrReg
= LoadMI
.getPointerReg();
1571 Register TmpReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
1572 auto &MMO
= LoadMI
.getMMO();
1573 unsigned MemSize
= MMO
.getSizeInBits().getValue();
1575 if (MemSize
== NarrowSize
) {
1576 MIRBuilder
.buildLoad(TmpReg
, PtrReg
, MMO
);
1577 } else if (MemSize
< NarrowSize
) {
1578 MIRBuilder
.buildLoadInstr(LoadMI
.getOpcode(), TmpReg
, PtrReg
, MMO
);
1579 } else if (MemSize
> NarrowSize
) {
1580 // FIXME: Need to split the load.
1581 return UnableToLegalize
;
1584 if (isa
<GZExtLoad
>(LoadMI
))
1585 MIRBuilder
.buildZExt(DstReg
, TmpReg
);
1587 MIRBuilder
.buildSExt(DstReg
, TmpReg
);
1589 LoadMI
.eraseFromParent();
1592 case TargetOpcode::G_STORE
: {
1593 auto &StoreMI
= cast
<GStore
>(MI
);
1595 Register SrcReg
= StoreMI
.getValueReg();
1596 LLT SrcTy
= MRI
.getType(SrcReg
);
1597 if (SrcTy
.isVector())
1598 return UnableToLegalize
;
1600 int NumParts
= SizeOp0
/ NarrowSize
;
1601 unsigned HandledSize
= NumParts
* NarrowTy
.getSizeInBits();
1602 unsigned LeftoverBits
= SrcTy
.getSizeInBits() - HandledSize
;
1603 if (SrcTy
.isVector() && LeftoverBits
!= 0)
1604 return UnableToLegalize
;
1606 if (8 * StoreMI
.getMemSize().getValue() != SrcTy
.getSizeInBits()) {
1607 Register TmpReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
1608 MIRBuilder
.buildTrunc(TmpReg
, SrcReg
);
1609 MIRBuilder
.buildStore(TmpReg
, StoreMI
.getPointerReg(), StoreMI
.getMMO());
1610 StoreMI
.eraseFromParent();
1614 return reduceLoadStoreWidth(StoreMI
, 0, NarrowTy
);
1616 case TargetOpcode::G_SELECT
:
1617 return narrowScalarSelect(MI
, TypeIdx
, NarrowTy
);
1618 case TargetOpcode::G_AND
:
1619 case TargetOpcode::G_OR
:
1620 case TargetOpcode::G_XOR
: {
1621 // Legalize bitwise operation:
1622 // A = BinOp<Ty> B, C
1624 // B1, ..., BN = G_UNMERGE_VALUES B
1625 // C1, ..., CN = G_UNMERGE_VALUES C
1626 // A1 = BinOp<Ty/N> B1, C2
1628 // AN = BinOp<Ty/N> BN, CN
1629 // A = G_MERGE_VALUES A1, ..., AN
1630 return narrowScalarBasic(MI
, TypeIdx
, NarrowTy
);
1632 case TargetOpcode::G_SHL
:
1633 case TargetOpcode::G_LSHR
:
1634 case TargetOpcode::G_ASHR
:
1635 return narrowScalarShift(MI
, TypeIdx
, NarrowTy
);
1636 case TargetOpcode::G_CTLZ
:
1637 case TargetOpcode::G_CTLZ_ZERO_UNDEF
:
1638 case TargetOpcode::G_CTTZ
:
1639 case TargetOpcode::G_CTTZ_ZERO_UNDEF
:
1640 case TargetOpcode::G_CTPOP
:
1642 switch (MI
.getOpcode()) {
1643 case TargetOpcode::G_CTLZ
:
1644 case TargetOpcode::G_CTLZ_ZERO_UNDEF
:
1645 return narrowScalarCTLZ(MI
, TypeIdx
, NarrowTy
);
1646 case TargetOpcode::G_CTTZ
:
1647 case TargetOpcode::G_CTTZ_ZERO_UNDEF
:
1648 return narrowScalarCTTZ(MI
, TypeIdx
, NarrowTy
);
1649 case TargetOpcode::G_CTPOP
:
1650 return narrowScalarCTPOP(MI
, TypeIdx
, NarrowTy
);
1652 return UnableToLegalize
;
1655 Observer
.changingInstr(MI
);
1656 narrowScalarDst(MI
, NarrowTy
, 0, TargetOpcode::G_ZEXT
);
1657 Observer
.changedInstr(MI
);
1659 case TargetOpcode::G_INTTOPTR
:
1661 return UnableToLegalize
;
1663 Observer
.changingInstr(MI
);
1664 narrowScalarSrc(MI
, NarrowTy
, 1);
1665 Observer
.changedInstr(MI
);
1667 case TargetOpcode::G_PTRTOINT
:
1669 return UnableToLegalize
;
1671 Observer
.changingInstr(MI
);
1672 narrowScalarDst(MI
, NarrowTy
, 0, TargetOpcode::G_ZEXT
);
1673 Observer
.changedInstr(MI
);
1675 case TargetOpcode::G_PHI
: {
1676 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1678 if (SizeOp0
% NarrowSize
!= 0)
1679 return UnableToLegalize
;
1681 unsigned NumParts
= SizeOp0
/ NarrowSize
;
1682 SmallVector
<Register
, 2> DstRegs(NumParts
);
1683 SmallVector
<SmallVector
<Register
, 2>, 2> SrcRegs(MI
.getNumOperands() / 2);
1684 Observer
.changingInstr(MI
);
1685 for (unsigned i
= 1; i
< MI
.getNumOperands(); i
+= 2) {
1686 MachineBasicBlock
&OpMBB
= *MI
.getOperand(i
+ 1).getMBB();
1687 MIRBuilder
.setInsertPt(OpMBB
, OpMBB
.getFirstTerminatorForward());
1688 extractParts(MI
.getOperand(i
).getReg(), NarrowTy
, NumParts
,
1689 SrcRegs
[i
/ 2], MIRBuilder
, MRI
);
1691 MachineBasicBlock
&MBB
= *MI
.getParent();
1692 MIRBuilder
.setInsertPt(MBB
, MI
);
1693 for (unsigned i
= 0; i
< NumParts
; ++i
) {
1694 DstRegs
[i
] = MRI
.createGenericVirtualRegister(NarrowTy
);
1695 MachineInstrBuilder MIB
=
1696 MIRBuilder
.buildInstr(TargetOpcode::G_PHI
).addDef(DstRegs
[i
]);
1697 for (unsigned j
= 1; j
< MI
.getNumOperands(); j
+= 2)
1698 MIB
.addUse(SrcRegs
[j
/ 2][i
]).add(MI
.getOperand(j
+ 1));
1700 MIRBuilder
.setInsertPt(MBB
, MBB
.getFirstNonPHI());
1701 MIRBuilder
.buildMergeLikeInstr(MI
.getOperand(0), DstRegs
);
1702 Observer
.changedInstr(MI
);
1703 MI
.eraseFromParent();
1706 case TargetOpcode::G_EXTRACT_VECTOR_ELT
:
1707 case TargetOpcode::G_INSERT_VECTOR_ELT
: {
1709 return UnableToLegalize
;
1711 int OpIdx
= MI
.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT
? 2 : 3;
1712 Observer
.changingInstr(MI
);
1713 narrowScalarSrc(MI
, NarrowTy
, OpIdx
);
1714 Observer
.changedInstr(MI
);
1717 case TargetOpcode::G_ICMP
: {
1718 Register LHS
= MI
.getOperand(2).getReg();
1719 LLT SrcTy
= MRI
.getType(LHS
);
1720 CmpInst::Predicate Pred
=
1721 static_cast<CmpInst::Predicate
>(MI
.getOperand(1).getPredicate());
1723 LLT LeftoverTy
; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1724 SmallVector
<Register
, 4> LHSPartRegs
, LHSLeftoverRegs
;
1725 if (!extractParts(LHS
, SrcTy
, NarrowTy
, LeftoverTy
, LHSPartRegs
,
1726 LHSLeftoverRegs
, MIRBuilder
, MRI
))
1727 return UnableToLegalize
;
1729 LLT Unused
; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1730 SmallVector
<Register
, 4> RHSPartRegs
, RHSLeftoverRegs
;
1731 if (!extractParts(MI
.getOperand(3).getReg(), SrcTy
, NarrowTy
, Unused
,
1732 RHSPartRegs
, RHSLeftoverRegs
, MIRBuilder
, MRI
))
1733 return UnableToLegalize
;
1735 // We now have the LHS and RHS of the compare split into narrow-type
1736 // registers, plus potentially some leftover type.
1737 Register Dst
= MI
.getOperand(0).getReg();
1738 LLT ResTy
= MRI
.getType(Dst
);
1739 if (ICmpInst::isEquality(Pred
)) {
1740 // For each part on the LHS and RHS, keep track of the result of XOR-ing
1741 // them together. For each equal part, the result should be all 0s. For
1742 // each non-equal part, we'll get at least one 1.
1743 auto Zero
= MIRBuilder
.buildConstant(NarrowTy
, 0);
1744 SmallVector
<Register
, 4> Xors
;
1745 for (auto LHSAndRHS
: zip(LHSPartRegs
, RHSPartRegs
)) {
1746 auto LHS
= std::get
<0>(LHSAndRHS
);
1747 auto RHS
= std::get
<1>(LHSAndRHS
);
1748 auto Xor
= MIRBuilder
.buildXor(NarrowTy
, LHS
, RHS
).getReg(0);
1749 Xors
.push_back(Xor
);
1752 // Build a G_XOR for each leftover register. Each G_XOR must be widened
1753 // to the desired narrow type so that we can OR them together later.
1754 SmallVector
<Register
, 4> WidenedXors
;
1755 for (auto LHSAndRHS
: zip(LHSLeftoverRegs
, RHSLeftoverRegs
)) {
1756 auto LHS
= std::get
<0>(LHSAndRHS
);
1757 auto RHS
= std::get
<1>(LHSAndRHS
);
1758 auto Xor
= MIRBuilder
.buildXor(LeftoverTy
, LHS
, RHS
).getReg(0);
1759 LLT GCDTy
= extractGCDType(WidenedXors
, NarrowTy
, LeftoverTy
, Xor
);
1760 buildLCMMergePieces(LeftoverTy
, NarrowTy
, GCDTy
, WidenedXors
,
1761 /* PadStrategy = */ TargetOpcode::G_ZEXT
);
1762 Xors
.insert(Xors
.end(), WidenedXors
.begin(), WidenedXors
.end());
1765 // Now, for each part we broke up, we know if they are equal/not equal
1766 // based off the G_XOR. We can OR these all together and compare against
1767 // 0 to get the result.
1768 assert(Xors
.size() >= 2 && "Should have gotten at least two Xors?");
1769 auto Or
= MIRBuilder
.buildOr(NarrowTy
, Xors
[0], Xors
[1]);
1770 for (unsigned I
= 2, E
= Xors
.size(); I
< E
; ++I
)
1771 Or
= MIRBuilder
.buildOr(NarrowTy
, Or
, Xors
[I
]);
1772 MIRBuilder
.buildICmp(Pred
, Dst
, Or
, Zero
);
1775 for (unsigned I
= 0, E
= LHSPartRegs
.size(); I
!= E
; ++I
) {
1777 CmpInst::Predicate PartPred
;
1779 if (I
== E
- 1 && LHSLeftoverRegs
.empty()) {
1783 PartPred
= ICmpInst::getUnsignedPredicate(Pred
);
1784 CmpOut
= MRI
.createGenericVirtualRegister(ResTy
);
1788 MIRBuilder
.buildICmp(PartPred
, CmpOut
, LHSPartRegs
[I
],
1791 auto Cmp
= MIRBuilder
.buildICmp(PartPred
, ResTy
, LHSPartRegs
[I
],
1793 auto CmpEq
= MIRBuilder
.buildICmp(CmpInst::Predicate::ICMP_EQ
, ResTy
,
1794 LHSPartRegs
[I
], RHSPartRegs
[I
]);
1795 MIRBuilder
.buildSelect(CmpOut
, CmpEq
, CmpIn
, Cmp
);
1801 for (unsigned I
= 0, E
= LHSLeftoverRegs
.size(); I
!= E
; ++I
) {
1803 CmpInst::Predicate PartPred
;
1805 if (I
== E
- 1 && LHSLeftoverRegs
.empty()) {
1809 PartPred
= ICmpInst::getUnsignedPredicate(Pred
);
1810 CmpOut
= MRI
.createGenericVirtualRegister(ResTy
);
1814 MIRBuilder
.buildICmp(PartPred
, CmpOut
, LHSLeftoverRegs
[I
],
1815 RHSLeftoverRegs
[I
]);
1817 auto Cmp
= MIRBuilder
.buildICmp(PartPred
, ResTy
, LHSLeftoverRegs
[I
],
1818 RHSLeftoverRegs
[I
]);
1820 MIRBuilder
.buildICmp(CmpInst::Predicate::ICMP_EQ
, ResTy
,
1821 LHSLeftoverRegs
[I
], RHSLeftoverRegs
[I
]);
1822 MIRBuilder
.buildSelect(CmpOut
, CmpEq
, CmpIn
, Cmp
);
1828 MI
.eraseFromParent();
1831 case TargetOpcode::G_FCMP
:
1833 return UnableToLegalize
;
1835 Observer
.changingInstr(MI
);
1836 narrowScalarDst(MI
, NarrowTy
, 0, TargetOpcode::G_ZEXT
);
1837 Observer
.changedInstr(MI
);
1840 case TargetOpcode::G_SEXT_INREG
: {
1842 return UnableToLegalize
;
1844 int64_t SizeInBits
= MI
.getOperand(2).getImm();
1846 // So long as the new type has more bits than the bits we're extending we
1847 // don't need to break it apart.
1848 if (NarrowTy
.getScalarSizeInBits() > SizeInBits
) {
1849 Observer
.changingInstr(MI
);
1850 // We don't lose any non-extension bits by truncating the src and
1851 // sign-extending the dst.
1852 MachineOperand
&MO1
= MI
.getOperand(1);
1853 auto TruncMIB
= MIRBuilder
.buildTrunc(NarrowTy
, MO1
);
1854 MO1
.setReg(TruncMIB
.getReg(0));
1856 MachineOperand
&MO2
= MI
.getOperand(0);
1857 Register DstExt
= MRI
.createGenericVirtualRegister(NarrowTy
);
1858 MIRBuilder
.setInsertPt(MIRBuilder
.getMBB(), ++MIRBuilder
.getInsertPt());
1859 MIRBuilder
.buildSExt(MO2
, DstExt
);
1861 Observer
.changedInstr(MI
);
1865 // Break it apart. Components below the extension point are unmodified. The
1866 // component containing the extension point becomes a narrower SEXT_INREG.
1867 // Components above it are ashr'd from the component containing the
1869 if (SizeOp0
% NarrowSize
!= 0)
1870 return UnableToLegalize
;
1871 int NumParts
= SizeOp0
/ NarrowSize
;
1873 // List the registers where the destination will be scattered.
1874 SmallVector
<Register
, 2> DstRegs
;
1875 // List the registers where the source will be split.
1876 SmallVector
<Register
, 2> SrcRegs
;
1878 // Create all the temporary registers.
1879 for (int i
= 0; i
< NumParts
; ++i
) {
1880 Register SrcReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
1882 SrcRegs
.push_back(SrcReg
);
1885 // Explode the big arguments into smaller chunks.
1886 MIRBuilder
.buildUnmerge(SrcRegs
, MI
.getOperand(1));
1888 Register AshrCstReg
=
1889 MIRBuilder
.buildConstant(NarrowTy
, NarrowTy
.getScalarSizeInBits() - 1)
1891 Register FullExtensionReg
;
1892 Register PartialExtensionReg
;
1894 // Do the operation on each small part.
1895 for (int i
= 0; i
< NumParts
; ++i
) {
1896 if ((i
+ 1) * NarrowTy
.getScalarSizeInBits() <= SizeInBits
) {
1897 DstRegs
.push_back(SrcRegs
[i
]);
1898 PartialExtensionReg
= DstRegs
.back();
1899 } else if (i
* NarrowTy
.getScalarSizeInBits() >= SizeInBits
) {
1900 assert(PartialExtensionReg
&&
1901 "Expected to visit partial extension before full");
1902 if (FullExtensionReg
) {
1903 DstRegs
.push_back(FullExtensionReg
);
1907 MIRBuilder
.buildAShr(NarrowTy
, PartialExtensionReg
, AshrCstReg
)
1909 FullExtensionReg
= DstRegs
.back();
1914 TargetOpcode::G_SEXT_INREG
, {NarrowTy
},
1915 {SrcRegs
[i
], SizeInBits
% NarrowTy
.getScalarSizeInBits()})
1917 PartialExtensionReg
= DstRegs
.back();
1921 // Gather the destination registers into the final destination.
1922 Register DstReg
= MI
.getOperand(0).getReg();
1923 MIRBuilder
.buildMergeLikeInstr(DstReg
, DstRegs
);
1924 MI
.eraseFromParent();
1927 case TargetOpcode::G_BSWAP
:
1928 case TargetOpcode::G_BITREVERSE
: {
1929 if (SizeOp0
% NarrowSize
!= 0)
1930 return UnableToLegalize
;
1932 Observer
.changingInstr(MI
);
1933 SmallVector
<Register
, 2> SrcRegs
, DstRegs
;
1934 unsigned NumParts
= SizeOp0
/ NarrowSize
;
1935 extractParts(MI
.getOperand(1).getReg(), NarrowTy
, NumParts
, SrcRegs
,
1938 for (unsigned i
= 0; i
< NumParts
; ++i
) {
1939 auto DstPart
= MIRBuilder
.buildInstr(MI
.getOpcode(), {NarrowTy
},
1940 {SrcRegs
[NumParts
- 1 - i
]});
1941 DstRegs
.push_back(DstPart
.getReg(0));
1944 MIRBuilder
.buildMergeLikeInstr(MI
.getOperand(0), DstRegs
);
1946 Observer
.changedInstr(MI
);
1947 MI
.eraseFromParent();
1950 case TargetOpcode::G_PTR_ADD
:
1951 case TargetOpcode::G_PTRMASK
: {
1953 return UnableToLegalize
;
1954 Observer
.changingInstr(MI
);
1955 narrowScalarSrc(MI
, NarrowTy
, 2);
1956 Observer
.changedInstr(MI
);
1959 case TargetOpcode::G_FPTOUI
:
1960 case TargetOpcode::G_FPTOSI
:
1961 case TargetOpcode::G_FPTOUI_SAT
:
1962 case TargetOpcode::G_FPTOSI_SAT
:
1963 return narrowScalarFPTOI(MI
, TypeIdx
, NarrowTy
);
1964 case TargetOpcode::G_FPEXT
:
1966 return UnableToLegalize
;
1967 Observer
.changingInstr(MI
);
1968 narrowScalarDst(MI
, NarrowTy
, 0, TargetOpcode::G_FPEXT
);
1969 Observer
.changedInstr(MI
);
1971 case TargetOpcode::G_FLDEXP
:
1972 case TargetOpcode::G_STRICT_FLDEXP
:
1973 return narrowScalarFLDEXP(MI
, TypeIdx
, NarrowTy
);
1974 case TargetOpcode::G_VSCALE
: {
1975 Register Dst
= MI
.getOperand(0).getReg();
1976 LLT Ty
= MRI
.getType(Dst
);
1978 // Assume VSCALE(1) fits into a legal integer
1979 const APInt
One(NarrowTy
.getSizeInBits(), 1);
1980 auto VScaleBase
= MIRBuilder
.buildVScale(NarrowTy
, One
);
1981 auto ZExt
= MIRBuilder
.buildZExt(Ty
, VScaleBase
);
1982 auto C
= MIRBuilder
.buildConstant(Ty
, *MI
.getOperand(1).getCImm());
1983 MIRBuilder
.buildMul(Dst
, ZExt
, C
);
1985 MI
.eraseFromParent();
1991 Register
LegalizerHelper::coerceToScalar(Register Val
) {
1992 LLT Ty
= MRI
.getType(Val
);
1996 const DataLayout
&DL
= MIRBuilder
.getDataLayout();
1997 LLT NewTy
= LLT::scalar(Ty
.getSizeInBits());
1998 if (Ty
.isPointer()) {
1999 if (DL
.isNonIntegralAddressSpace(Ty
.getAddressSpace()))
2001 return MIRBuilder
.buildPtrToInt(NewTy
, Val
).getReg(0);
2004 Register NewVal
= Val
;
2006 assert(Ty
.isVector());
2007 if (Ty
.isPointerVector())
2008 NewVal
= MIRBuilder
.buildPtrToInt(NewTy
, NewVal
).getReg(0);
2009 return MIRBuilder
.buildBitcast(NewTy
, NewVal
).getReg(0);
2012 void LegalizerHelper::widenScalarSrc(MachineInstr
&MI
, LLT WideTy
,
2013 unsigned OpIdx
, unsigned ExtOpcode
) {
2014 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
2015 auto ExtB
= MIRBuilder
.buildInstr(ExtOpcode
, {WideTy
}, {MO
});
2016 MO
.setReg(ExtB
.getReg(0));
2019 void LegalizerHelper::narrowScalarSrc(MachineInstr
&MI
, LLT NarrowTy
,
2021 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
2022 auto ExtB
= MIRBuilder
.buildTrunc(NarrowTy
, MO
);
2023 MO
.setReg(ExtB
.getReg(0));
2026 void LegalizerHelper::widenScalarDst(MachineInstr
&MI
, LLT WideTy
,
2027 unsigned OpIdx
, unsigned TruncOpcode
) {
2028 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
2029 Register DstExt
= MRI
.createGenericVirtualRegister(WideTy
);
2030 MIRBuilder
.setInsertPt(MIRBuilder
.getMBB(), ++MIRBuilder
.getInsertPt());
2031 MIRBuilder
.buildInstr(TruncOpcode
, {MO
}, {DstExt
});
2035 void LegalizerHelper::narrowScalarDst(MachineInstr
&MI
, LLT NarrowTy
,
2036 unsigned OpIdx
, unsigned ExtOpcode
) {
2037 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
2038 Register DstTrunc
= MRI
.createGenericVirtualRegister(NarrowTy
);
2039 MIRBuilder
.setInsertPt(MIRBuilder
.getMBB(), ++MIRBuilder
.getInsertPt());
2040 MIRBuilder
.buildInstr(ExtOpcode
, {MO
}, {DstTrunc
});
2041 MO
.setReg(DstTrunc
);
2044 void LegalizerHelper::moreElementsVectorDst(MachineInstr
&MI
, LLT WideTy
,
2046 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
2047 MIRBuilder
.setInsertPt(MIRBuilder
.getMBB(), ++MIRBuilder
.getInsertPt());
2048 Register Dst
= MO
.getReg();
2049 Register DstExt
= MRI
.createGenericVirtualRegister(WideTy
);
2051 MIRBuilder
.buildDeleteTrailingVectorElements(Dst
, DstExt
);
2054 void LegalizerHelper::moreElementsVectorSrc(MachineInstr
&MI
, LLT MoreTy
,
2056 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
2057 SmallVector
<Register
, 8> Regs
;
2058 MO
.setReg(MIRBuilder
.buildPadVectorWithUndefElements(MoreTy
, MO
).getReg(0));
2061 void LegalizerHelper::bitcastSrc(MachineInstr
&MI
, LLT CastTy
, unsigned OpIdx
) {
2062 MachineOperand
&Op
= MI
.getOperand(OpIdx
);
2063 Op
.setReg(MIRBuilder
.buildBitcast(CastTy
, Op
).getReg(0));
2066 void LegalizerHelper::bitcastDst(MachineInstr
&MI
, LLT CastTy
, unsigned OpIdx
) {
2067 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
2068 Register CastDst
= MRI
.createGenericVirtualRegister(CastTy
);
2069 MIRBuilder
.setInsertPt(MIRBuilder
.getMBB(), ++MIRBuilder
.getInsertPt());
2070 MIRBuilder
.buildBitcast(MO
, CastDst
);
2074 LegalizerHelper::LegalizeResult
2075 LegalizerHelper::widenScalarMergeValues(MachineInstr
&MI
, unsigned TypeIdx
,
2078 return UnableToLegalize
;
2080 auto [DstReg
, DstTy
, Src1Reg
, Src1Ty
] = MI
.getFirst2RegLLTs();
2081 if (DstTy
.isVector())
2082 return UnableToLegalize
;
2084 LLT SrcTy
= MRI
.getType(Src1Reg
);
2085 const int DstSize
= DstTy
.getSizeInBits();
2086 const int SrcSize
= SrcTy
.getSizeInBits();
2087 const int WideSize
= WideTy
.getSizeInBits();
2088 const int NumMerge
= (DstSize
+ WideSize
- 1) / WideSize
;
2090 unsigned NumOps
= MI
.getNumOperands();
2091 unsigned NumSrc
= MI
.getNumOperands() - 1;
2092 unsigned PartSize
= DstTy
.getSizeInBits() / NumSrc
;
2094 if (WideSize
>= DstSize
) {
2095 // Directly pack the bits in the target type.
2096 Register ResultReg
= MIRBuilder
.buildZExt(WideTy
, Src1Reg
).getReg(0);
2098 for (unsigned I
= 2; I
!= NumOps
; ++I
) {
2099 const unsigned Offset
= (I
- 1) * PartSize
;
2101 Register SrcReg
= MI
.getOperand(I
).getReg();
2102 assert(MRI
.getType(SrcReg
) == LLT::scalar(PartSize
));
2104 auto ZextInput
= MIRBuilder
.buildZExt(WideTy
, SrcReg
);
2106 Register NextResult
= I
+ 1 == NumOps
&& WideTy
== DstTy
? DstReg
:
2107 MRI
.createGenericVirtualRegister(WideTy
);
2109 auto ShiftAmt
= MIRBuilder
.buildConstant(WideTy
, Offset
);
2110 auto Shl
= MIRBuilder
.buildShl(WideTy
, ZextInput
, ShiftAmt
);
2111 MIRBuilder
.buildOr(NextResult
, ResultReg
, Shl
);
2112 ResultReg
= NextResult
;
2115 if (WideSize
> DstSize
)
2116 MIRBuilder
.buildTrunc(DstReg
, ResultReg
);
2117 else if (DstTy
.isPointer())
2118 MIRBuilder
.buildIntToPtr(DstReg
, ResultReg
);
2120 MI
.eraseFromParent();
2124 // Unmerge the original values to the GCD type, and recombine to the next
2125 // multiple greater than the original type.
2127 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
2128 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
2129 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
2130 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
2131 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
2132 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
2133 // %12:_(s12) = G_MERGE_VALUES %10, %11
2135 // Padding with undef if necessary:
2137 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
2138 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
2139 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
2140 // %7:_(s2) = G_IMPLICIT_DEF
2141 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
2142 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
2143 // %10:_(s12) = G_MERGE_VALUES %8, %9
2145 const int GCD
= std::gcd(SrcSize
, WideSize
);
2146 LLT GCDTy
= LLT::scalar(GCD
);
2148 SmallVector
<Register
, 8> Parts
;
2149 SmallVector
<Register
, 8> NewMergeRegs
;
2150 SmallVector
<Register
, 8> Unmerges
;
2151 LLT WideDstTy
= LLT::scalar(NumMerge
* WideSize
);
2153 // Decompose the original operands if they don't evenly divide.
2154 for (const MachineOperand
&MO
: llvm::drop_begin(MI
.operands())) {
2155 Register SrcReg
= MO
.getReg();
2156 if (GCD
== SrcSize
) {
2157 Unmerges
.push_back(SrcReg
);
2159 auto Unmerge
= MIRBuilder
.buildUnmerge(GCDTy
, SrcReg
);
2160 for (int J
= 0, JE
= Unmerge
->getNumOperands() - 1; J
!= JE
; ++J
)
2161 Unmerges
.push_back(Unmerge
.getReg(J
));
2165 // Pad with undef to the next size that is a multiple of the requested size.
2166 if (static_cast<int>(Unmerges
.size()) != NumMerge
* WideSize
) {
2167 Register UndefReg
= MIRBuilder
.buildUndef(GCDTy
).getReg(0);
2168 for (int I
= Unmerges
.size(); I
!= NumMerge
* WideSize
; ++I
)
2169 Unmerges
.push_back(UndefReg
);
2172 const int PartsPerGCD
= WideSize
/ GCD
;
2174 // Build merges of each piece.
2175 ArrayRef
<Register
> Slicer(Unmerges
);
2176 for (int I
= 0; I
!= NumMerge
; ++I
, Slicer
= Slicer
.drop_front(PartsPerGCD
)) {
2178 MIRBuilder
.buildMergeLikeInstr(WideTy
, Slicer
.take_front(PartsPerGCD
));
2179 NewMergeRegs
.push_back(Merge
.getReg(0));
2182 // A truncate may be necessary if the requested type doesn't evenly divide the
2183 // original result type.
2184 if (DstTy
.getSizeInBits() == WideDstTy
.getSizeInBits()) {
2185 MIRBuilder
.buildMergeLikeInstr(DstReg
, NewMergeRegs
);
2187 auto FinalMerge
= MIRBuilder
.buildMergeLikeInstr(WideDstTy
, NewMergeRegs
);
2188 MIRBuilder
.buildTrunc(DstReg
, FinalMerge
.getReg(0));
2191 MI
.eraseFromParent();
2195 LegalizerHelper::LegalizeResult
2196 LegalizerHelper::widenScalarUnmergeValues(MachineInstr
&MI
, unsigned TypeIdx
,
2199 return UnableToLegalize
;
2201 int NumDst
= MI
.getNumOperands() - 1;
2202 Register SrcReg
= MI
.getOperand(NumDst
).getReg();
2203 LLT SrcTy
= MRI
.getType(SrcReg
);
2204 if (SrcTy
.isVector())
2205 return UnableToLegalize
;
2207 Register Dst0Reg
= MI
.getOperand(0).getReg();
2208 LLT DstTy
= MRI
.getType(Dst0Reg
);
2209 if (!DstTy
.isScalar())
2210 return UnableToLegalize
;
2212 if (WideTy
.getSizeInBits() >= SrcTy
.getSizeInBits()) {
2213 if (SrcTy
.isPointer()) {
2214 const DataLayout
&DL
= MIRBuilder
.getDataLayout();
2215 if (DL
.isNonIntegralAddressSpace(SrcTy
.getAddressSpace())) {
2217 dbgs() << "Not casting non-integral address space integer\n");
2218 return UnableToLegalize
;
2221 SrcTy
= LLT::scalar(SrcTy
.getSizeInBits());
2222 SrcReg
= MIRBuilder
.buildPtrToInt(SrcTy
, SrcReg
).getReg(0);
2225 // Widen SrcTy to WideTy. This does not affect the result, but since the
2226 // user requested this size, it is probably better handled than SrcTy and
2227 // should reduce the total number of legalization artifacts.
2228 if (WideTy
.getSizeInBits() > SrcTy
.getSizeInBits()) {
2230 SrcReg
= MIRBuilder
.buildAnyExt(WideTy
, SrcReg
).getReg(0);
2233 // Theres no unmerge type to target. Directly extract the bits from the
2235 unsigned DstSize
= DstTy
.getSizeInBits();
2237 MIRBuilder
.buildTrunc(Dst0Reg
, SrcReg
);
2238 for (int I
= 1; I
!= NumDst
; ++I
) {
2239 auto ShiftAmt
= MIRBuilder
.buildConstant(SrcTy
, DstSize
* I
);
2240 auto Shr
= MIRBuilder
.buildLShr(SrcTy
, SrcReg
, ShiftAmt
);
2241 MIRBuilder
.buildTrunc(MI
.getOperand(I
), Shr
);
2244 MI
.eraseFromParent();
2248 // Extend the source to a wider type.
2249 LLT LCMTy
= getLCMType(SrcTy
, WideTy
);
2251 Register WideSrc
= SrcReg
;
2252 if (LCMTy
.getSizeInBits() != SrcTy
.getSizeInBits()) {
2253 // TODO: If this is an integral address space, cast to integer and anyext.
2254 if (SrcTy
.isPointer()) {
2255 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
2256 return UnableToLegalize
;
2259 WideSrc
= MIRBuilder
.buildAnyExt(LCMTy
, WideSrc
).getReg(0);
2262 auto Unmerge
= MIRBuilder
.buildUnmerge(WideTy
, WideSrc
);
2264 // Create a sequence of unmerges and merges to the original results. Since we
2265 // may have widened the source, we will need to pad the results with dead defs
2266 // to cover the source register.
2267 // e.g. widen s48 to s64:
2268 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
2271 // %4:_(s192) = G_ANYEXT %0:_(s96)
2272 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
2273 // ; unpack to GCD type, with extra dead defs
2274 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
2275 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
2276 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
2277 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
2278 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
2279 const LLT GCDTy
= getGCDType(WideTy
, DstTy
);
2280 const int NumUnmerge
= Unmerge
->getNumOperands() - 1;
2281 const int PartsPerRemerge
= DstTy
.getSizeInBits() / GCDTy
.getSizeInBits();
2283 // Directly unmerge to the destination without going through a GCD type
2285 if (PartsPerRemerge
== 1) {
2286 const int PartsPerUnmerge
= WideTy
.getSizeInBits() / DstTy
.getSizeInBits();
2288 for (int I
= 0; I
!= NumUnmerge
; ++I
) {
2289 auto MIB
= MIRBuilder
.buildInstr(TargetOpcode::G_UNMERGE_VALUES
);
2291 for (int J
= 0; J
!= PartsPerUnmerge
; ++J
) {
2292 int Idx
= I
* PartsPerUnmerge
+ J
;
2294 MIB
.addDef(MI
.getOperand(Idx
).getReg());
2296 // Create dead def for excess components.
2297 MIB
.addDef(MRI
.createGenericVirtualRegister(DstTy
));
2301 MIB
.addUse(Unmerge
.getReg(I
));
2304 SmallVector
<Register
, 16> Parts
;
2305 for (int J
= 0; J
!= NumUnmerge
; ++J
)
2306 extractGCDType(Parts
, GCDTy
, Unmerge
.getReg(J
));
2308 SmallVector
<Register
, 8> RemergeParts
;
2309 for (int I
= 0; I
!= NumDst
; ++I
) {
2310 for (int J
= 0; J
< PartsPerRemerge
; ++J
) {
2311 const int Idx
= I
* PartsPerRemerge
+ J
;
2312 RemergeParts
.emplace_back(Parts
[Idx
]);
2315 MIRBuilder
.buildMergeLikeInstr(MI
.getOperand(I
).getReg(), RemergeParts
);
2316 RemergeParts
.clear();
2320 MI
.eraseFromParent();
2324 LegalizerHelper::LegalizeResult
2325 LegalizerHelper::widenScalarExtract(MachineInstr
&MI
, unsigned TypeIdx
,
2327 auto [DstReg
, DstTy
, SrcReg
, SrcTy
] = MI
.getFirst2RegLLTs();
2328 unsigned Offset
= MI
.getOperand(2).getImm();
2331 if (SrcTy
.isVector() || DstTy
.isVector())
2332 return UnableToLegalize
;
2335 if (SrcTy
.isPointer()) {
2336 // Extracts from pointers can be handled only if they are really just
2338 const DataLayout
&DL
= MIRBuilder
.getDataLayout();
2339 if (DL
.isNonIntegralAddressSpace(SrcTy
.getAddressSpace()))
2340 return UnableToLegalize
;
2342 LLT SrcAsIntTy
= LLT::scalar(SrcTy
.getSizeInBits());
2343 Src
= MIRBuilder
.buildPtrToInt(SrcAsIntTy
, Src
);
2347 if (DstTy
.isPointer())
2348 return UnableToLegalize
;
2351 // Avoid a shift in the degenerate case.
2352 MIRBuilder
.buildTrunc(DstReg
,
2353 MIRBuilder
.buildAnyExtOrTrunc(WideTy
, Src
));
2354 MI
.eraseFromParent();
2358 // Do a shift in the source type.
2359 LLT ShiftTy
= SrcTy
;
2360 if (WideTy
.getSizeInBits() > SrcTy
.getSizeInBits()) {
2361 Src
= MIRBuilder
.buildAnyExt(WideTy
, Src
);
2365 auto LShr
= MIRBuilder
.buildLShr(
2366 ShiftTy
, Src
, MIRBuilder
.buildConstant(ShiftTy
, Offset
));
2367 MIRBuilder
.buildTrunc(DstReg
, LShr
);
2368 MI
.eraseFromParent();
2372 if (SrcTy
.isScalar()) {
2373 Observer
.changingInstr(MI
);
2374 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
2375 Observer
.changedInstr(MI
);
2379 if (!SrcTy
.isVector())
2380 return UnableToLegalize
;
2382 if (DstTy
!= SrcTy
.getElementType())
2383 return UnableToLegalize
;
2385 if (Offset
% SrcTy
.getScalarSizeInBits() != 0)
2386 return UnableToLegalize
;
2388 Observer
.changingInstr(MI
);
2389 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
2391 MI
.getOperand(2).setImm((WideTy
.getSizeInBits() / SrcTy
.getSizeInBits()) *
2393 widenScalarDst(MI
, WideTy
.getScalarType(), 0);
2394 Observer
.changedInstr(MI
);
2398 LegalizerHelper::LegalizeResult
2399 LegalizerHelper::widenScalarInsert(MachineInstr
&MI
, unsigned TypeIdx
,
2401 if (TypeIdx
!= 0 || WideTy
.isVector())
2402 return UnableToLegalize
;
2403 Observer
.changingInstr(MI
);
2404 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
2405 widenScalarDst(MI
, WideTy
);
2406 Observer
.changedInstr(MI
);
2410 LegalizerHelper::LegalizeResult
2411 LegalizerHelper::widenScalarAddSubOverflow(MachineInstr
&MI
, unsigned TypeIdx
,
2415 std::optional
<Register
> CarryIn
;
2416 switch (MI
.getOpcode()) {
2418 llvm_unreachable("Unexpected opcode!");
2419 case TargetOpcode::G_SADDO
:
2420 Opcode
= TargetOpcode::G_ADD
;
2421 ExtOpcode
= TargetOpcode::G_SEXT
;
2423 case TargetOpcode::G_SSUBO
:
2424 Opcode
= TargetOpcode::G_SUB
;
2425 ExtOpcode
= TargetOpcode::G_SEXT
;
2427 case TargetOpcode::G_UADDO
:
2428 Opcode
= TargetOpcode::G_ADD
;
2429 ExtOpcode
= TargetOpcode::G_ZEXT
;
2431 case TargetOpcode::G_USUBO
:
2432 Opcode
= TargetOpcode::G_SUB
;
2433 ExtOpcode
= TargetOpcode::G_ZEXT
;
2435 case TargetOpcode::G_SADDE
:
2436 Opcode
= TargetOpcode::G_UADDE
;
2437 ExtOpcode
= TargetOpcode::G_SEXT
;
2438 CarryIn
= MI
.getOperand(4).getReg();
2440 case TargetOpcode::G_SSUBE
:
2441 Opcode
= TargetOpcode::G_USUBE
;
2442 ExtOpcode
= TargetOpcode::G_SEXT
;
2443 CarryIn
= MI
.getOperand(4).getReg();
2445 case TargetOpcode::G_UADDE
:
2446 Opcode
= TargetOpcode::G_UADDE
;
2447 ExtOpcode
= TargetOpcode::G_ZEXT
;
2448 CarryIn
= MI
.getOperand(4).getReg();
2450 case TargetOpcode::G_USUBE
:
2451 Opcode
= TargetOpcode::G_USUBE
;
2452 ExtOpcode
= TargetOpcode::G_ZEXT
;
2453 CarryIn
= MI
.getOperand(4).getReg();
2458 unsigned BoolExtOp
= MIRBuilder
.getBoolExtOp(WideTy
.isVector(), false);
2460 Observer
.changingInstr(MI
);
2462 widenScalarSrc(MI
, WideTy
, 4, BoolExtOp
);
2463 widenScalarDst(MI
, WideTy
, 1);
2465 Observer
.changedInstr(MI
);
2469 auto LHSExt
= MIRBuilder
.buildInstr(ExtOpcode
, {WideTy
}, {MI
.getOperand(2)});
2470 auto RHSExt
= MIRBuilder
.buildInstr(ExtOpcode
, {WideTy
}, {MI
.getOperand(3)});
2471 // Do the arithmetic in the larger type.
2474 LLT CarryOutTy
= MRI
.getType(MI
.getOperand(1).getReg());
2476 .buildInstr(Opcode
, {WideTy
, CarryOutTy
},
2477 {LHSExt
, RHSExt
, *CarryIn
})
2480 NewOp
= MIRBuilder
.buildInstr(Opcode
, {WideTy
}, {LHSExt
, RHSExt
}).getReg(0);
2482 LLT OrigTy
= MRI
.getType(MI
.getOperand(0).getReg());
2483 auto TruncOp
= MIRBuilder
.buildTrunc(OrigTy
, NewOp
);
2484 auto ExtOp
= MIRBuilder
.buildInstr(ExtOpcode
, {WideTy
}, {TruncOp
});
2485 // There is no overflow if the ExtOp is the same as NewOp.
2486 MIRBuilder
.buildICmp(CmpInst::ICMP_NE
, MI
.getOperand(1), NewOp
, ExtOp
);
2487 // Now trunc the NewOp to the original result.
2488 MIRBuilder
.buildTrunc(MI
.getOperand(0), NewOp
);
2489 MI
.eraseFromParent();
2493 LegalizerHelper::LegalizeResult
2494 LegalizerHelper::widenScalarAddSubShlSat(MachineInstr
&MI
, unsigned TypeIdx
,
2496 bool IsSigned
= MI
.getOpcode() == TargetOpcode::G_SADDSAT
||
2497 MI
.getOpcode() == TargetOpcode::G_SSUBSAT
||
2498 MI
.getOpcode() == TargetOpcode::G_SSHLSAT
;
2499 bool IsShift
= MI
.getOpcode() == TargetOpcode::G_SSHLSAT
||
2500 MI
.getOpcode() == TargetOpcode::G_USHLSAT
;
2501 // We can convert this to:
2502 // 1. Any extend iN to iM
2504 // 3. [US][ADD|SUB|SHL]SAT
2507 // It may be more efficient to lower this to a min and a max operation in
2508 // the higher precision arithmetic if the promoted operation isn't legal,
2509 // but this decision is up to the target's lowering request.
2510 Register DstReg
= MI
.getOperand(0).getReg();
2512 unsigned NewBits
= WideTy
.getScalarSizeInBits();
2513 unsigned SHLAmount
= NewBits
- MRI
.getType(DstReg
).getScalarSizeInBits();
2515 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
2516 // must not left shift the RHS to preserve the shift amount.
2517 auto LHS
= MIRBuilder
.buildAnyExt(WideTy
, MI
.getOperand(1));
2518 auto RHS
= IsShift
? MIRBuilder
.buildZExt(WideTy
, MI
.getOperand(2))
2519 : MIRBuilder
.buildAnyExt(WideTy
, MI
.getOperand(2));
2520 auto ShiftK
= MIRBuilder
.buildConstant(WideTy
, SHLAmount
);
2521 auto ShiftL
= MIRBuilder
.buildShl(WideTy
, LHS
, ShiftK
);
2522 auto ShiftR
= IsShift
? RHS
: MIRBuilder
.buildShl(WideTy
, RHS
, ShiftK
);
2524 auto WideInst
= MIRBuilder
.buildInstr(MI
.getOpcode(), {WideTy
},
2525 {ShiftL
, ShiftR
}, MI
.getFlags());
2527 // Use a shift that will preserve the number of sign bits when the trunc is
2529 auto Result
= IsSigned
? MIRBuilder
.buildAShr(WideTy
, WideInst
, ShiftK
)
2530 : MIRBuilder
.buildLShr(WideTy
, WideInst
, ShiftK
);
2532 MIRBuilder
.buildTrunc(DstReg
, Result
);
2533 MI
.eraseFromParent();
2537 LegalizerHelper::LegalizeResult
2538 LegalizerHelper::widenScalarMulo(MachineInstr
&MI
, unsigned TypeIdx
,
2541 Observer
.changingInstr(MI
);
2542 widenScalarDst(MI
, WideTy
, 1);
2543 Observer
.changedInstr(MI
);
2547 bool IsSigned
= MI
.getOpcode() == TargetOpcode::G_SMULO
;
2548 auto [Result
, OriginalOverflow
, LHS
, RHS
] = MI
.getFirst4Regs();
2549 LLT SrcTy
= MRI
.getType(LHS
);
2550 LLT OverflowTy
= MRI
.getType(OriginalOverflow
);
2551 unsigned SrcBitWidth
= SrcTy
.getScalarSizeInBits();
2553 // To determine if the result overflowed in the larger type, we extend the
2554 // input to the larger type, do the multiply (checking if it overflows),
2555 // then also check the high bits of the result to see if overflow happened
2557 unsigned ExtOp
= IsSigned
? TargetOpcode::G_SEXT
: TargetOpcode::G_ZEXT
;
2558 auto LeftOperand
= MIRBuilder
.buildInstr(ExtOp
, {WideTy
}, {LHS
});
2559 auto RightOperand
= MIRBuilder
.buildInstr(ExtOp
, {WideTy
}, {RHS
});
2561 // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2562 // so we don't need to check the overflow result of larger type Mulo.
2563 bool WideMulCanOverflow
= WideTy
.getScalarSizeInBits() < 2 * SrcBitWidth
;
2566 WideMulCanOverflow
? MI
.getOpcode() : (unsigned)TargetOpcode::G_MUL
;
2568 MachineInstrBuilder Mulo
;
2569 if (WideMulCanOverflow
)
2570 Mulo
= MIRBuilder
.buildInstr(MulOpc
, {WideTy
, OverflowTy
},
2571 {LeftOperand
, RightOperand
});
2573 Mulo
= MIRBuilder
.buildInstr(MulOpc
, {WideTy
}, {LeftOperand
, RightOperand
});
2575 auto Mul
= Mulo
->getOperand(0);
2576 MIRBuilder
.buildTrunc(Result
, Mul
);
2578 MachineInstrBuilder ExtResult
;
2579 // Overflow occurred if it occurred in the larger type, or if the high part
2580 // of the result does not zero/sign-extend the low part. Check this second
2581 // possibility first.
2583 // For signed, overflow occurred when the high part does not sign-extend
2585 ExtResult
= MIRBuilder
.buildSExtInReg(WideTy
, Mul
, SrcBitWidth
);
2587 // Unsigned overflow occurred when the high part does not zero-extend the
2589 ExtResult
= MIRBuilder
.buildZExtInReg(WideTy
, Mul
, SrcBitWidth
);
2592 if (WideMulCanOverflow
) {
2594 MIRBuilder
.buildICmp(CmpInst::ICMP_NE
, OverflowTy
, Mul
, ExtResult
);
2595 // Finally check if the multiplication in the larger type itself overflowed.
2596 MIRBuilder
.buildOr(OriginalOverflow
, Mulo
->getOperand(1), Overflow
);
2598 MIRBuilder
.buildICmp(CmpInst::ICMP_NE
, OriginalOverflow
, Mul
, ExtResult
);
2600 MI
.eraseFromParent();
2604 LegalizerHelper::LegalizeResult
2605 LegalizerHelper::widenScalar(MachineInstr
&MI
, unsigned TypeIdx
, LLT WideTy
) {
2606 unsigned Opcode
= MI
.getOpcode();
2609 return UnableToLegalize
;
2610 case TargetOpcode::G_ATOMICRMW_XCHG
:
2611 case TargetOpcode::G_ATOMICRMW_ADD
:
2612 case TargetOpcode::G_ATOMICRMW_SUB
:
2613 case TargetOpcode::G_ATOMICRMW_AND
:
2614 case TargetOpcode::G_ATOMICRMW_OR
:
2615 case TargetOpcode::G_ATOMICRMW_XOR
:
2616 case TargetOpcode::G_ATOMICRMW_MIN
:
2617 case TargetOpcode::G_ATOMICRMW_MAX
:
2618 case TargetOpcode::G_ATOMICRMW_UMIN
:
2619 case TargetOpcode::G_ATOMICRMW_UMAX
:
2620 assert(TypeIdx
== 0 && "atomicrmw with second scalar type");
2621 Observer
.changingInstr(MI
);
2622 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ANYEXT
);
2623 widenScalarDst(MI
, WideTy
, 0);
2624 Observer
.changedInstr(MI
);
2626 case TargetOpcode::G_ATOMIC_CMPXCHG
:
2627 assert(TypeIdx
== 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2628 Observer
.changingInstr(MI
);
2629 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ANYEXT
);
2630 widenScalarSrc(MI
, WideTy
, 3, TargetOpcode::G_ANYEXT
);
2631 widenScalarDst(MI
, WideTy
, 0);
2632 Observer
.changedInstr(MI
);
2634 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS
:
2636 Observer
.changingInstr(MI
);
2637 widenScalarSrc(MI
, WideTy
, 3, TargetOpcode::G_ANYEXT
);
2638 widenScalarSrc(MI
, WideTy
, 4, TargetOpcode::G_ANYEXT
);
2639 widenScalarDst(MI
, WideTy
, 0);
2640 Observer
.changedInstr(MI
);
2643 assert(TypeIdx
== 1 &&
2644 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2645 Observer
.changingInstr(MI
);
2646 widenScalarDst(MI
, WideTy
, 1);
2647 Observer
.changedInstr(MI
);
2649 case TargetOpcode::G_EXTRACT
:
2650 return widenScalarExtract(MI
, TypeIdx
, WideTy
);
2651 case TargetOpcode::G_INSERT
:
2652 return widenScalarInsert(MI
, TypeIdx
, WideTy
);
2653 case TargetOpcode::G_MERGE_VALUES
:
2654 return widenScalarMergeValues(MI
, TypeIdx
, WideTy
);
2655 case TargetOpcode::G_UNMERGE_VALUES
:
2656 return widenScalarUnmergeValues(MI
, TypeIdx
, WideTy
);
2657 case TargetOpcode::G_SADDO
:
2658 case TargetOpcode::G_SSUBO
:
2659 case TargetOpcode::G_UADDO
:
2660 case TargetOpcode::G_USUBO
:
2661 case TargetOpcode::G_SADDE
:
2662 case TargetOpcode::G_SSUBE
:
2663 case TargetOpcode::G_UADDE
:
2664 case TargetOpcode::G_USUBE
:
2665 return widenScalarAddSubOverflow(MI
, TypeIdx
, WideTy
);
2666 case TargetOpcode::G_UMULO
:
2667 case TargetOpcode::G_SMULO
:
2668 return widenScalarMulo(MI
, TypeIdx
, WideTy
);
2669 case TargetOpcode::G_SADDSAT
:
2670 case TargetOpcode::G_SSUBSAT
:
2671 case TargetOpcode::G_SSHLSAT
:
2672 case TargetOpcode::G_UADDSAT
:
2673 case TargetOpcode::G_USUBSAT
:
2674 case TargetOpcode::G_USHLSAT
:
2675 return widenScalarAddSubShlSat(MI
, TypeIdx
, WideTy
);
2676 case TargetOpcode::G_CTTZ
:
2677 case TargetOpcode::G_CTTZ_ZERO_UNDEF
:
2678 case TargetOpcode::G_CTLZ
:
2679 case TargetOpcode::G_CTLZ_ZERO_UNDEF
:
2680 case TargetOpcode::G_CTPOP
: {
2682 Observer
.changingInstr(MI
);
2683 widenScalarDst(MI
, WideTy
, 0);
2684 Observer
.changedInstr(MI
);
2688 Register SrcReg
= MI
.getOperand(1).getReg();
2690 // First extend the input.
2691 unsigned ExtOpc
= Opcode
== TargetOpcode::G_CTTZ
||
2692 Opcode
== TargetOpcode::G_CTTZ_ZERO_UNDEF
2693 ? TargetOpcode::G_ANYEXT
2694 : TargetOpcode::G_ZEXT
;
2695 auto MIBSrc
= MIRBuilder
.buildInstr(ExtOpc
, {WideTy
}, {SrcReg
});
2696 LLT CurTy
= MRI
.getType(SrcReg
);
2697 unsigned NewOpc
= Opcode
;
2698 if (NewOpc
== TargetOpcode::G_CTTZ
) {
2699 // The count is the same in the larger type except if the original
2700 // value was zero. This can be handled by setting the bit just off
2701 // the top of the original type.
2703 APInt::getOneBitSet(WideTy
.getSizeInBits(), CurTy
.getSizeInBits());
2704 MIBSrc
= MIRBuilder
.buildOr(
2705 WideTy
, MIBSrc
, MIRBuilder
.buildConstant(WideTy
, TopBit
));
2706 // Now we know the operand is non-zero, use the more relaxed opcode.
2707 NewOpc
= TargetOpcode::G_CTTZ_ZERO_UNDEF
;
2710 unsigned SizeDiff
= WideTy
.getSizeInBits() - CurTy
.getSizeInBits();
2712 if (Opcode
== TargetOpcode::G_CTLZ_ZERO_UNDEF
) {
2713 // An optimization where the result is the CTLZ after the left shift by
2714 // (Difference in widety and current ty), that is,
2715 // MIBSrc = MIBSrc << (sizeinbits(WideTy) - sizeinbits(CurTy))
2716 // Result = ctlz MIBSrc
2717 MIBSrc
= MIRBuilder
.buildShl(WideTy
, MIBSrc
,
2718 MIRBuilder
.buildConstant(WideTy
, SizeDiff
));
2721 // Perform the operation at the larger size.
2722 auto MIBNewOp
= MIRBuilder
.buildInstr(NewOpc
, {WideTy
}, {MIBSrc
});
2723 // This is already the correct result for CTPOP and CTTZs
2724 if (Opcode
== TargetOpcode::G_CTLZ
) {
2725 // The correct result is NewOp - (Difference in widety and current ty).
2726 MIBNewOp
= MIRBuilder
.buildSub(
2727 WideTy
, MIBNewOp
, MIRBuilder
.buildConstant(WideTy
, SizeDiff
));
2730 MIRBuilder
.buildZExtOrTrunc(MI
.getOperand(0), MIBNewOp
);
2731 MI
.eraseFromParent();
2734 case TargetOpcode::G_BSWAP
: {
2735 Observer
.changingInstr(MI
);
2736 Register DstReg
= MI
.getOperand(0).getReg();
2738 Register ShrReg
= MRI
.createGenericVirtualRegister(WideTy
);
2739 Register DstExt
= MRI
.createGenericVirtualRegister(WideTy
);
2740 Register ShiftAmtReg
= MRI
.createGenericVirtualRegister(WideTy
);
2741 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
2743 MI
.getOperand(0).setReg(DstExt
);
2745 MIRBuilder
.setInsertPt(MIRBuilder
.getMBB(), ++MIRBuilder
.getInsertPt());
2747 LLT Ty
= MRI
.getType(DstReg
);
2748 unsigned DiffBits
= WideTy
.getScalarSizeInBits() - Ty
.getScalarSizeInBits();
2749 MIRBuilder
.buildConstant(ShiftAmtReg
, DiffBits
);
2750 MIRBuilder
.buildLShr(ShrReg
, DstExt
, ShiftAmtReg
);
2752 MIRBuilder
.buildTrunc(DstReg
, ShrReg
);
2753 Observer
.changedInstr(MI
);
2756 case TargetOpcode::G_BITREVERSE
: {
2757 Observer
.changingInstr(MI
);
2759 Register DstReg
= MI
.getOperand(0).getReg();
2760 LLT Ty
= MRI
.getType(DstReg
);
2761 unsigned DiffBits
= WideTy
.getScalarSizeInBits() - Ty
.getScalarSizeInBits();
2763 Register DstExt
= MRI
.createGenericVirtualRegister(WideTy
);
2764 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
2765 MI
.getOperand(0).setReg(DstExt
);
2766 MIRBuilder
.setInsertPt(MIRBuilder
.getMBB(), ++MIRBuilder
.getInsertPt());
2768 auto ShiftAmt
= MIRBuilder
.buildConstant(WideTy
, DiffBits
);
2769 auto Shift
= MIRBuilder
.buildLShr(WideTy
, DstExt
, ShiftAmt
);
2770 MIRBuilder
.buildTrunc(DstReg
, Shift
);
2771 Observer
.changedInstr(MI
);
2774 case TargetOpcode::G_FREEZE
:
2775 case TargetOpcode::G_CONSTANT_FOLD_BARRIER
:
2776 Observer
.changingInstr(MI
);
2777 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
2778 widenScalarDst(MI
, WideTy
);
2779 Observer
.changedInstr(MI
);
2782 case TargetOpcode::G_ABS
:
2783 Observer
.changingInstr(MI
);
2784 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_SEXT
);
2785 widenScalarDst(MI
, WideTy
);
2786 Observer
.changedInstr(MI
);
2789 case TargetOpcode::G_ADD
:
2790 case TargetOpcode::G_AND
:
2791 case TargetOpcode::G_MUL
:
2792 case TargetOpcode::G_OR
:
2793 case TargetOpcode::G_XOR
:
2794 case TargetOpcode::G_SUB
:
2795 case TargetOpcode::G_SHUFFLE_VECTOR
:
2796 // Perform operation at larger width (any extension is fines here, high bits
2797 // don't affect the result) and then truncate the result back to the
2799 Observer
.changingInstr(MI
);
2800 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
2801 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ANYEXT
);
2802 widenScalarDst(MI
, WideTy
);
2803 Observer
.changedInstr(MI
);
2806 case TargetOpcode::G_SBFX
:
2807 case TargetOpcode::G_UBFX
:
2808 Observer
.changingInstr(MI
);
2811 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
2812 widenScalarDst(MI
, WideTy
);
2814 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ZEXT
);
2815 widenScalarSrc(MI
, WideTy
, 3, TargetOpcode::G_ZEXT
);
2818 Observer
.changedInstr(MI
);
2821 case TargetOpcode::G_SHL
:
2822 Observer
.changingInstr(MI
);
2825 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
2826 widenScalarDst(MI
, WideTy
);
2828 assert(TypeIdx
== 1);
2829 // The "number of bits to shift" operand must preserve its value as an
2830 // unsigned integer:
2831 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ZEXT
);
2834 Observer
.changedInstr(MI
);
2837 case TargetOpcode::G_ROTR
:
2838 case TargetOpcode::G_ROTL
:
2840 return UnableToLegalize
;
2842 Observer
.changingInstr(MI
);
2843 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ZEXT
);
2844 Observer
.changedInstr(MI
);
2847 case TargetOpcode::G_SDIV
:
2848 case TargetOpcode::G_SREM
:
2849 case TargetOpcode::G_SMIN
:
2850 case TargetOpcode::G_SMAX
:
2851 Observer
.changingInstr(MI
);
2852 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_SEXT
);
2853 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_SEXT
);
2854 widenScalarDst(MI
, WideTy
);
2855 Observer
.changedInstr(MI
);
2858 case TargetOpcode::G_SDIVREM
:
2859 Observer
.changingInstr(MI
);
2860 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_SEXT
);
2861 widenScalarSrc(MI
, WideTy
, 3, TargetOpcode::G_SEXT
);
2862 widenScalarDst(MI
, WideTy
);
2863 widenScalarDst(MI
, WideTy
, 1);
2864 Observer
.changedInstr(MI
);
2867 case TargetOpcode::G_ASHR
:
2868 case TargetOpcode::G_LSHR
:
2869 Observer
.changingInstr(MI
);
2872 unsigned CvtOp
= Opcode
== TargetOpcode::G_ASHR
? TargetOpcode::G_SEXT
2873 : TargetOpcode::G_ZEXT
;
2875 widenScalarSrc(MI
, WideTy
, 1, CvtOp
);
2876 widenScalarDst(MI
, WideTy
);
2878 assert(TypeIdx
== 1);
2879 // The "number of bits to shift" operand must preserve its value as an
2880 // unsigned integer:
2881 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ZEXT
);
2884 Observer
.changedInstr(MI
);
2886 case TargetOpcode::G_UDIV
:
2887 case TargetOpcode::G_UREM
:
2888 Observer
.changingInstr(MI
);
2889 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ZEXT
);
2890 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ZEXT
);
2891 widenScalarDst(MI
, WideTy
);
2892 Observer
.changedInstr(MI
);
2894 case TargetOpcode::G_UDIVREM
:
2895 Observer
.changingInstr(MI
);
2896 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ZEXT
);
2897 widenScalarSrc(MI
, WideTy
, 3, TargetOpcode::G_ZEXT
);
2898 widenScalarDst(MI
, WideTy
);
2899 widenScalarDst(MI
, WideTy
, 1);
2900 Observer
.changedInstr(MI
);
2902 case TargetOpcode::G_UMIN
:
2903 case TargetOpcode::G_UMAX
: {
2904 LLT Ty
= MRI
.getType(MI
.getOperand(0).getReg());
2906 auto &Ctx
= MIRBuilder
.getMF().getFunction().getContext();
2908 TLI
.isSExtCheaperThanZExt(getApproximateEVTForLLT(Ty
, Ctx
),
2909 getApproximateEVTForLLT(WideTy
, Ctx
))
2910 ? TargetOpcode::G_SEXT
2911 : TargetOpcode::G_ZEXT
;
2913 Observer
.changingInstr(MI
);
2914 widenScalarSrc(MI
, WideTy
, 1, ExtOpc
);
2915 widenScalarSrc(MI
, WideTy
, 2, ExtOpc
);
2916 widenScalarDst(MI
, WideTy
);
2917 Observer
.changedInstr(MI
);
2921 case TargetOpcode::G_SELECT
:
2922 Observer
.changingInstr(MI
);
2924 // Perform operation at larger width (any extension is fine here, high
2925 // bits don't affect the result) and then truncate the result back to the
2927 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ANYEXT
);
2928 widenScalarSrc(MI
, WideTy
, 3, TargetOpcode::G_ANYEXT
);
2929 widenScalarDst(MI
, WideTy
);
2931 bool IsVec
= MRI
.getType(MI
.getOperand(1).getReg()).isVector();
2932 // Explicit extension is required here since high bits affect the result.
2933 widenScalarSrc(MI
, WideTy
, 1, MIRBuilder
.getBoolExtOp(IsVec
, false));
2935 Observer
.changedInstr(MI
);
2938 case TargetOpcode::G_FPTOSI
:
2939 case TargetOpcode::G_FPTOUI
:
2940 case TargetOpcode::G_INTRINSIC_LRINT
:
2941 case TargetOpcode::G_INTRINSIC_LLRINT
:
2942 case TargetOpcode::G_IS_FPCLASS
:
2943 Observer
.changingInstr(MI
);
2946 widenScalarDst(MI
, WideTy
);
2948 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_FPEXT
);
2950 Observer
.changedInstr(MI
);
2952 case TargetOpcode::G_SITOFP
:
2953 Observer
.changingInstr(MI
);
2956 widenScalarDst(MI
, WideTy
, 0, TargetOpcode::G_FPTRUNC
);
2958 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_SEXT
);
2960 Observer
.changedInstr(MI
);
2962 case TargetOpcode::G_UITOFP
:
2963 Observer
.changingInstr(MI
);
2966 widenScalarDst(MI
, WideTy
, 0, TargetOpcode::G_FPTRUNC
);
2968 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ZEXT
);
2970 Observer
.changedInstr(MI
);
2972 case TargetOpcode::G_FPTOSI_SAT
:
2973 case TargetOpcode::G_FPTOUI_SAT
:
2974 Observer
.changingInstr(MI
);
2977 Register OldDst
= MI
.getOperand(0).getReg();
2978 LLT Ty
= MRI
.getType(OldDst
);
2979 Register ExtReg
= MRI
.createGenericVirtualRegister(WideTy
);
2981 MI
.getOperand(0).setReg(ExtReg
);
2982 uint64_t ShortBits
= Ty
.getScalarSizeInBits();
2983 uint64_t WideBits
= WideTy
.getScalarSizeInBits();
2984 MIRBuilder
.setInsertPt(MIRBuilder
.getMBB(), ++MIRBuilder
.getInsertPt());
2985 if (Opcode
== TargetOpcode::G_FPTOSI_SAT
) {
2986 // z = i16 fptosi_sat(a)
2988 // x = i32 fptosi_sat(a)
2989 // y = smin(x, 32767)
2990 // z = smax(y, -32768)
2991 auto MaxVal
= MIRBuilder
.buildConstant(
2992 WideTy
, APInt::getSignedMaxValue(ShortBits
).sext(WideBits
));
2993 auto MinVal
= MIRBuilder
.buildConstant(
2994 WideTy
, APInt::getSignedMinValue(ShortBits
).sext(WideBits
));
2996 MIRBuilder
.buildSMin(WideTy
, ExtReg
, MaxVal
).getReg(0);
2997 NewDst
= MIRBuilder
.buildSMax(WideTy
, MidReg
, MinVal
).getReg(0);
2999 // z = i16 fptoui_sat(a)
3001 // x = i32 fptoui_sat(a)
3002 // y = smin(x, 65535)
3003 auto MaxVal
= MIRBuilder
.buildConstant(
3004 WideTy
, APInt::getAllOnes(ShortBits
).zext(WideBits
));
3005 NewDst
= MIRBuilder
.buildUMin(WideTy
, ExtReg
, MaxVal
).getReg(0);
3007 MIRBuilder
.buildTrunc(OldDst
, NewDst
);
3009 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_FPEXT
);
3011 Observer
.changedInstr(MI
);
3013 case TargetOpcode::G_LOAD
:
3014 case TargetOpcode::G_SEXTLOAD
:
3015 case TargetOpcode::G_ZEXTLOAD
:
3016 Observer
.changingInstr(MI
);
3017 widenScalarDst(MI
, WideTy
);
3018 Observer
.changedInstr(MI
);
3021 case TargetOpcode::G_STORE
: {
3023 return UnableToLegalize
;
3025 LLT Ty
= MRI
.getType(MI
.getOperand(0).getReg());
3026 assert(!Ty
.isPointerOrPointerVector() && "Can't widen type");
3027 if (!Ty
.isScalar()) {
3028 // We need to widen the vector element type.
3029 Observer
.changingInstr(MI
);
3030 widenScalarSrc(MI
, WideTy
, 0, TargetOpcode::G_ANYEXT
);
3031 // We also need to adjust the MMO to turn this into a truncating store.
3032 MachineMemOperand
&MMO
= **MI
.memoperands_begin();
3033 MachineFunction
&MF
= MIRBuilder
.getMF();
3034 auto *NewMMO
= MF
.getMachineMemOperand(&MMO
, MMO
.getPointerInfo(), Ty
);
3035 MI
.setMemRefs(MF
, {NewMMO
});
3036 Observer
.changedInstr(MI
);
3040 Observer
.changingInstr(MI
);
3042 unsigned ExtType
= Ty
.getScalarSizeInBits() == 1 ?
3043 TargetOpcode::G_ZEXT
: TargetOpcode::G_ANYEXT
;
3044 widenScalarSrc(MI
, WideTy
, 0, ExtType
);
3046 Observer
.changedInstr(MI
);
3049 case TargetOpcode::G_CONSTANT
: {
3050 MachineOperand
&SrcMO
= MI
.getOperand(1);
3051 LLVMContext
&Ctx
= MIRBuilder
.getMF().getFunction().getContext();
3052 unsigned ExtOpc
= LI
.getExtOpcodeForWideningConstant(
3053 MRI
.getType(MI
.getOperand(0).getReg()));
3054 assert((ExtOpc
== TargetOpcode::G_ZEXT
|| ExtOpc
== TargetOpcode::G_SEXT
||
3055 ExtOpc
== TargetOpcode::G_ANYEXT
) &&
3057 const APInt
&SrcVal
= SrcMO
.getCImm()->getValue();
3058 const APInt
&Val
= (ExtOpc
== TargetOpcode::G_SEXT
)
3059 ? SrcVal
.sext(WideTy
.getSizeInBits())
3060 : SrcVal
.zext(WideTy
.getSizeInBits());
3061 Observer
.changingInstr(MI
);
3062 SrcMO
.setCImm(ConstantInt::get(Ctx
, Val
));
3064 widenScalarDst(MI
, WideTy
);
3065 Observer
.changedInstr(MI
);
3068 case TargetOpcode::G_FCONSTANT
: {
3069 // To avoid changing the bits of the constant due to extension to a larger
3070 // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
3071 MachineOperand
&SrcMO
= MI
.getOperand(1);
3072 APInt Val
= SrcMO
.getFPImm()->getValueAPF().bitcastToAPInt();
3073 MIRBuilder
.setInstrAndDebugLoc(MI
);
3074 auto IntCst
= MIRBuilder
.buildConstant(MI
.getOperand(0).getReg(), Val
);
3075 widenScalarDst(*IntCst
, WideTy
, 0, TargetOpcode::G_TRUNC
);
3076 MI
.eraseFromParent();
3079 case TargetOpcode::G_IMPLICIT_DEF
: {
3080 Observer
.changingInstr(MI
);
3081 widenScalarDst(MI
, WideTy
);
3082 Observer
.changedInstr(MI
);
3085 case TargetOpcode::G_BRCOND
:
3086 Observer
.changingInstr(MI
);
3087 widenScalarSrc(MI
, WideTy
, 0, MIRBuilder
.getBoolExtOp(false, false));
3088 Observer
.changedInstr(MI
);
3091 case TargetOpcode::G_FCMP
:
3092 Observer
.changingInstr(MI
);
3094 widenScalarDst(MI
, WideTy
);
3096 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_FPEXT
);
3097 widenScalarSrc(MI
, WideTy
, 3, TargetOpcode::G_FPEXT
);
3099 Observer
.changedInstr(MI
);
3102 case TargetOpcode::G_ICMP
:
3103 Observer
.changingInstr(MI
);
3105 widenScalarDst(MI
, WideTy
);
3107 LLT SrcTy
= MRI
.getType(MI
.getOperand(2).getReg());
3108 CmpInst::Predicate Pred
=
3109 static_cast<CmpInst::Predicate
>(MI
.getOperand(1).getPredicate());
3111 auto &Ctx
= MIRBuilder
.getMF().getFunction().getContext();
3112 unsigned ExtOpcode
=
3113 (CmpInst::isSigned(Pred
) ||
3114 TLI
.isSExtCheaperThanZExt(getApproximateEVTForLLT(SrcTy
, Ctx
),
3115 getApproximateEVTForLLT(WideTy
, Ctx
)))
3116 ? TargetOpcode::G_SEXT
3117 : TargetOpcode::G_ZEXT
;
3118 widenScalarSrc(MI
, WideTy
, 2, ExtOpcode
);
3119 widenScalarSrc(MI
, WideTy
, 3, ExtOpcode
);
3121 Observer
.changedInstr(MI
);
3124 case TargetOpcode::G_PTR_ADD
:
3125 assert(TypeIdx
== 1 && "unable to legalize pointer of G_PTR_ADD");
3126 Observer
.changingInstr(MI
);
3127 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_SEXT
);
3128 Observer
.changedInstr(MI
);
3131 case TargetOpcode::G_PHI
: {
3132 assert(TypeIdx
== 0 && "Expecting only Idx 0");
3134 Observer
.changingInstr(MI
);
3135 for (unsigned I
= 1; I
< MI
.getNumOperands(); I
+= 2) {
3136 MachineBasicBlock
&OpMBB
= *MI
.getOperand(I
+ 1).getMBB();
3137 MIRBuilder
.setInsertPt(OpMBB
, OpMBB
.getFirstTerminatorForward());
3138 widenScalarSrc(MI
, WideTy
, I
, TargetOpcode::G_ANYEXT
);
3141 MachineBasicBlock
&MBB
= *MI
.getParent();
3142 MIRBuilder
.setInsertPt(MBB
, --MBB
.getFirstNonPHI());
3143 widenScalarDst(MI
, WideTy
);
3144 Observer
.changedInstr(MI
);
3147 case TargetOpcode::G_EXTRACT_VECTOR_ELT
: {
3149 Register VecReg
= MI
.getOperand(1).getReg();
3150 LLT VecTy
= MRI
.getType(VecReg
);
3151 Observer
.changingInstr(MI
);
3154 MI
, LLT::vector(VecTy
.getElementCount(), WideTy
.getSizeInBits()), 1,
3155 TargetOpcode::G_ANYEXT
);
3157 widenScalarDst(MI
, WideTy
, 0);
3158 Observer
.changedInstr(MI
);
3163 return UnableToLegalize
;
3164 Observer
.changingInstr(MI
);
3165 // TODO: Probably should be zext
3166 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_SEXT
);
3167 Observer
.changedInstr(MI
);
3170 case TargetOpcode::G_INSERT_VECTOR_ELT
: {
3172 Observer
.changingInstr(MI
);
3173 const LLT WideEltTy
= WideTy
.getElementType();
3175 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
3176 widenScalarSrc(MI
, WideEltTy
, 2, TargetOpcode::G_ANYEXT
);
3177 widenScalarDst(MI
, WideTy
, 0);
3178 Observer
.changedInstr(MI
);
3183 Observer
.changingInstr(MI
);
3185 Register VecReg
= MI
.getOperand(1).getReg();
3186 LLT VecTy
= MRI
.getType(VecReg
);
3187 LLT WideVecTy
= LLT::vector(VecTy
.getElementCount(), WideTy
);
3189 widenScalarSrc(MI
, WideVecTy
, 1, TargetOpcode::G_ANYEXT
);
3190 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ANYEXT
);
3191 widenScalarDst(MI
, WideVecTy
, 0);
3192 Observer
.changedInstr(MI
);
3197 Observer
.changingInstr(MI
);
3198 // TODO: Probably should be zext
3199 widenScalarSrc(MI
, WideTy
, 3, TargetOpcode::G_SEXT
);
3200 Observer
.changedInstr(MI
);
3204 return UnableToLegalize
;
3206 case TargetOpcode::G_FADD
:
3207 case TargetOpcode::G_FMUL
:
3208 case TargetOpcode::G_FSUB
:
3209 case TargetOpcode::G_FMA
:
3210 case TargetOpcode::G_FMAD
:
3211 case TargetOpcode::G_FNEG
:
3212 case TargetOpcode::G_FABS
:
3213 case TargetOpcode::G_FCANONICALIZE
:
3214 case TargetOpcode::G_FMINNUM
:
3215 case TargetOpcode::G_FMAXNUM
:
3216 case TargetOpcode::G_FMINNUM_IEEE
:
3217 case TargetOpcode::G_FMAXNUM_IEEE
:
3218 case TargetOpcode::G_FMINIMUM
:
3219 case TargetOpcode::G_FMAXIMUM
:
3220 case TargetOpcode::G_FDIV
:
3221 case TargetOpcode::G_FREM
:
3222 case TargetOpcode::G_FCEIL
:
3223 case TargetOpcode::G_FFLOOR
:
3224 case TargetOpcode::G_FCOS
:
3225 case TargetOpcode::G_FSIN
:
3226 case TargetOpcode::G_FTAN
:
3227 case TargetOpcode::G_FACOS
:
3228 case TargetOpcode::G_FASIN
:
3229 case TargetOpcode::G_FATAN
:
3230 case TargetOpcode::G_FATAN2
:
3231 case TargetOpcode::G_FCOSH
:
3232 case TargetOpcode::G_FSINH
:
3233 case TargetOpcode::G_FTANH
:
3234 case TargetOpcode::G_FLOG10
:
3235 case TargetOpcode::G_FLOG
:
3236 case TargetOpcode::G_FLOG2
:
3237 case TargetOpcode::G_FRINT
:
3238 case TargetOpcode::G_FNEARBYINT
:
3239 case TargetOpcode::G_FSQRT
:
3240 case TargetOpcode::G_FEXP
:
3241 case TargetOpcode::G_FEXP2
:
3242 case TargetOpcode::G_FEXP10
:
3243 case TargetOpcode::G_FPOW
:
3244 case TargetOpcode::G_INTRINSIC_TRUNC
:
3245 case TargetOpcode::G_INTRINSIC_ROUND
:
3246 case TargetOpcode::G_INTRINSIC_ROUNDEVEN
:
3247 assert(TypeIdx
== 0);
3248 Observer
.changingInstr(MI
);
3250 for (unsigned I
= 1, E
= MI
.getNumOperands(); I
!= E
; ++I
)
3251 widenScalarSrc(MI
, WideTy
, I
, TargetOpcode::G_FPEXT
);
3253 widenScalarDst(MI
, WideTy
, 0, TargetOpcode::G_FPTRUNC
);
3254 Observer
.changedInstr(MI
);
3256 case TargetOpcode::G_FPOWI
:
3257 case TargetOpcode::G_FLDEXP
:
3258 case TargetOpcode::G_STRICT_FLDEXP
: {
3260 if (Opcode
== TargetOpcode::G_STRICT_FLDEXP
)
3261 return UnableToLegalize
;
3263 Observer
.changingInstr(MI
);
3264 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_FPEXT
);
3265 widenScalarDst(MI
, WideTy
, 0, TargetOpcode::G_FPTRUNC
);
3266 Observer
.changedInstr(MI
);
3271 // For some reason SelectionDAG tries to promote to a libcall without
3272 // actually changing the integer type for promotion.
3273 Observer
.changingInstr(MI
);
3274 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_SEXT
);
3275 Observer
.changedInstr(MI
);
3279 return UnableToLegalize
;
3281 case TargetOpcode::G_FFREXP
: {
3282 Observer
.changingInstr(MI
);
3285 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_FPEXT
);
3286 widenScalarDst(MI
, WideTy
, 0, TargetOpcode::G_FPTRUNC
);
3288 widenScalarDst(MI
, WideTy
, 1);
3291 Observer
.changedInstr(MI
);
3294 case TargetOpcode::G_INTTOPTR
:
3296 return UnableToLegalize
;
3298 Observer
.changingInstr(MI
);
3299 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ZEXT
);
3300 Observer
.changedInstr(MI
);
3302 case TargetOpcode::G_PTRTOINT
:
3304 return UnableToLegalize
;
3306 Observer
.changingInstr(MI
);
3307 widenScalarDst(MI
, WideTy
, 0);
3308 Observer
.changedInstr(MI
);
3310 case TargetOpcode::G_BUILD_VECTOR
: {
3311 Observer
.changingInstr(MI
);
3313 const LLT WideEltTy
= TypeIdx
== 1 ? WideTy
: WideTy
.getElementType();
3314 for (int I
= 1, E
= MI
.getNumOperands(); I
!= E
; ++I
)
3315 widenScalarSrc(MI
, WideEltTy
, I
, TargetOpcode::G_ANYEXT
);
3317 // Avoid changing the result vector type if the source element type was
3320 MI
.setDesc(MIRBuilder
.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC
));
3322 widenScalarDst(MI
, WideTy
, 0);
3325 Observer
.changedInstr(MI
);
3328 case TargetOpcode::G_SEXT_INREG
:
3330 return UnableToLegalize
;
3332 Observer
.changingInstr(MI
);
3333 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
3334 widenScalarDst(MI
, WideTy
, 0, TargetOpcode::G_TRUNC
);
3335 Observer
.changedInstr(MI
);
3337 case TargetOpcode::G_PTRMASK
: {
3339 return UnableToLegalize
;
3340 Observer
.changingInstr(MI
);
3341 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ZEXT
);
3342 Observer
.changedInstr(MI
);
3345 case TargetOpcode::G_VECREDUCE_ADD
: {
3347 return UnableToLegalize
;
3348 Observer
.changingInstr(MI
);
3349 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
3350 widenScalarDst(MI
, WideTy
.getScalarType(), 0, TargetOpcode::G_TRUNC
);
3351 Observer
.changedInstr(MI
);
3354 case TargetOpcode::G_VECREDUCE_FADD
:
3355 case TargetOpcode::G_VECREDUCE_FMUL
:
3356 case TargetOpcode::G_VECREDUCE_FMIN
:
3357 case TargetOpcode::G_VECREDUCE_FMAX
:
3358 case TargetOpcode::G_VECREDUCE_FMINIMUM
:
3359 case TargetOpcode::G_VECREDUCE_FMAXIMUM
: {
3361 return UnableToLegalize
;
3362 Observer
.changingInstr(MI
);
3363 Register VecReg
= MI
.getOperand(1).getReg();
3364 LLT VecTy
= MRI
.getType(VecReg
);
3365 LLT WideVecTy
= VecTy
.isVector()
3366 ? LLT::vector(VecTy
.getElementCount(), WideTy
)
3368 widenScalarSrc(MI
, WideVecTy
, 1, TargetOpcode::G_FPEXT
);
3369 widenScalarDst(MI
, WideTy
, 0, TargetOpcode::G_FPTRUNC
);
3370 Observer
.changedInstr(MI
);
3373 case TargetOpcode::G_VSCALE
: {
3374 MachineOperand
&SrcMO
= MI
.getOperand(1);
3375 LLVMContext
&Ctx
= MIRBuilder
.getMF().getFunction().getContext();
3376 const APInt
&SrcVal
= SrcMO
.getCImm()->getValue();
3377 // The CImm is always a signed value
3378 const APInt Val
= SrcVal
.sext(WideTy
.getSizeInBits());
3379 Observer
.changingInstr(MI
);
3380 SrcMO
.setCImm(ConstantInt::get(Ctx
, Val
));
3381 widenScalarDst(MI
, WideTy
);
3382 Observer
.changedInstr(MI
);
3385 case TargetOpcode::G_SPLAT_VECTOR
: {
3387 return UnableToLegalize
;
3389 Observer
.changingInstr(MI
);
3390 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
3391 Observer
.changedInstr(MI
);
3394 case TargetOpcode::G_INSERT_SUBVECTOR
: {
3396 return UnableToLegalize
;
3398 GInsertSubvector
&IS
= cast
<GInsertSubvector
>(MI
);
3399 Register BigVec
= IS
.getBigVec();
3400 Register SubVec
= IS
.getSubVec();
3402 LLT SubVecTy
= MRI
.getType(SubVec
);
3403 LLT SubVecWideTy
= SubVecTy
.changeElementType(WideTy
.getElementType());
3405 // Widen the G_INSERT_SUBVECTOR
3406 auto BigZExt
= MIRBuilder
.buildZExt(WideTy
, BigVec
);
3407 auto SubZExt
= MIRBuilder
.buildZExt(SubVecWideTy
, SubVec
);
3408 auto WideInsert
= MIRBuilder
.buildInsertSubvector(WideTy
, BigZExt
, SubZExt
,
3411 // Truncate back down
3412 auto SplatZero
= MIRBuilder
.buildSplatVector(
3413 WideTy
, MIRBuilder
.buildConstant(WideTy
.getElementType(), 0));
3414 MIRBuilder
.buildICmp(CmpInst::Predicate::ICMP_NE
, IS
.getReg(0), WideInsert
,
3417 MI
.eraseFromParent();
3424 static void getUnmergePieces(SmallVectorImpl
<Register
> &Pieces
,
3425 MachineIRBuilder
&B
, Register Src
, LLT Ty
) {
3426 auto Unmerge
= B
.buildUnmerge(Ty
, Src
);
3427 for (int I
= 0, E
= Unmerge
->getNumOperands() - 1; I
!= E
; ++I
)
3428 Pieces
.push_back(Unmerge
.getReg(I
));
3431 static void emitLoadFromConstantPool(Register DstReg
, const Constant
*ConstVal
,
3432 MachineIRBuilder
&MIRBuilder
) {
3433 MachineRegisterInfo
&MRI
= *MIRBuilder
.getMRI();
3434 MachineFunction
&MF
= MIRBuilder
.getMF();
3435 const DataLayout
&DL
= MIRBuilder
.getDataLayout();
3436 unsigned AddrSpace
= DL
.getDefaultGlobalsAddressSpace();
3437 LLT AddrPtrTy
= LLT::pointer(AddrSpace
, DL
.getPointerSizeInBits(AddrSpace
));
3438 LLT DstLLT
= MRI
.getType(DstReg
);
3440 Align
Alignment(DL
.getABITypeAlign(ConstVal
->getType()));
3442 auto Addr
= MIRBuilder
.buildConstantPool(
3444 MF
.getConstantPool()->getConstantPoolIndex(ConstVal
, Alignment
));
3446 MachineMemOperand
*MMO
=
3447 MF
.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF
),
3448 MachineMemOperand::MOLoad
, DstLLT
, Alignment
);
3450 MIRBuilder
.buildLoadInstr(TargetOpcode::G_LOAD
, DstReg
, Addr
, *MMO
);
3453 LegalizerHelper::LegalizeResult
3454 LegalizerHelper::lowerConstant(MachineInstr
&MI
) {
3455 const MachineOperand
&ConstOperand
= MI
.getOperand(1);
3456 const Constant
*ConstantVal
= ConstOperand
.getCImm();
3458 emitLoadFromConstantPool(MI
.getOperand(0).getReg(), ConstantVal
, MIRBuilder
);
3459 MI
.eraseFromParent();
3464 LegalizerHelper::LegalizeResult
3465 LegalizerHelper::lowerFConstant(MachineInstr
&MI
) {
3466 const MachineOperand
&ConstOperand
= MI
.getOperand(1);
3467 const Constant
*ConstantVal
= ConstOperand
.getFPImm();
3469 emitLoadFromConstantPool(MI
.getOperand(0).getReg(), ConstantVal
, MIRBuilder
);
3470 MI
.eraseFromParent();
3475 LegalizerHelper::LegalizeResult
3476 LegalizerHelper::lowerBitcast(MachineInstr
&MI
) {
3477 auto [Dst
, DstTy
, Src
, SrcTy
] = MI
.getFirst2RegLLTs();
3478 if (SrcTy
.isVector()) {
3479 LLT SrcEltTy
= SrcTy
.getElementType();
3480 SmallVector
<Register
, 8> SrcRegs
;
3482 if (DstTy
.isVector()) {
3483 int NumDstElt
= DstTy
.getNumElements();
3484 int NumSrcElt
= SrcTy
.getNumElements();
3486 LLT DstEltTy
= DstTy
.getElementType();
3487 LLT DstCastTy
= DstEltTy
; // Intermediate bitcast result type
3488 LLT SrcPartTy
= SrcEltTy
; // Original unmerge result type.
3490 // If there's an element size mismatch, insert intermediate casts to match
3491 // the result element type.
3492 if (NumSrcElt
< NumDstElt
) { // Source element type is larger.
3493 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
3497 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
3498 // %3:_(<2 x s8>) = G_BITCAST %2
3499 // %4:_(<2 x s8>) = G_BITCAST %3
3500 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
3501 DstCastTy
= LLT::fixed_vector(NumDstElt
/ NumSrcElt
, DstEltTy
);
3502 SrcPartTy
= SrcEltTy
;
3503 } else if (NumSrcElt
> NumDstElt
) { // Source element type is smaller.
3505 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
3509 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
3510 // %3:_(s16) = G_BITCAST %2
3511 // %4:_(s16) = G_BITCAST %3
3512 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
3513 SrcPartTy
= LLT::fixed_vector(NumSrcElt
/ NumDstElt
, SrcEltTy
);
3514 DstCastTy
= DstEltTy
;
3517 getUnmergePieces(SrcRegs
, MIRBuilder
, Src
, SrcPartTy
);
3518 for (Register
&SrcReg
: SrcRegs
)
3519 SrcReg
= MIRBuilder
.buildBitcast(DstCastTy
, SrcReg
).getReg(0);
3521 getUnmergePieces(SrcRegs
, MIRBuilder
, Src
, SrcEltTy
);
3523 MIRBuilder
.buildMergeLikeInstr(Dst
, SrcRegs
);
3524 MI
.eraseFromParent();
3528 if (DstTy
.isVector()) {
3529 SmallVector
<Register
, 8> SrcRegs
;
3530 getUnmergePieces(SrcRegs
, MIRBuilder
, Src
, DstTy
.getElementType());
3531 MIRBuilder
.buildMergeLikeInstr(Dst
, SrcRegs
);
3532 MI
.eraseFromParent();
3536 return UnableToLegalize
;
3539 /// Figure out the bit offset into a register when coercing a vector index for
3540 /// the wide element type. This is only for the case when promoting vector to
3541 /// one with larger elements.
3544 /// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3545 /// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3546 static Register
getBitcastWiderVectorElementOffset(MachineIRBuilder
&B
,
3548 unsigned NewEltSize
,
3549 unsigned OldEltSize
) {
3550 const unsigned Log2EltRatio
= Log2_32(NewEltSize
/ OldEltSize
);
3551 LLT IdxTy
= B
.getMRI()->getType(Idx
);
3553 // Now figure out the amount we need to shift to get the target bits.
3554 auto OffsetMask
= B
.buildConstant(
3555 IdxTy
, ~(APInt::getAllOnes(IdxTy
.getSizeInBits()) << Log2EltRatio
));
3556 auto OffsetIdx
= B
.buildAnd(IdxTy
, Idx
, OffsetMask
);
3557 return B
.buildShl(IdxTy
, OffsetIdx
,
3558 B
.buildConstant(IdxTy
, Log2_32(OldEltSize
))).getReg(0);
3561 /// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
3562 /// is casting to a vector with a smaller element size, perform multiple element
3563 /// extracts and merge the results. If this is coercing to a vector with larger
3564 /// elements, index the bitcasted vector and extract the target element with bit
3565 /// operations. This is intended to force the indexing in the native register
3566 /// size for architectures that can dynamically index the register file.
3567 LegalizerHelper::LegalizeResult
3568 LegalizerHelper::bitcastExtractVectorElt(MachineInstr
&MI
, unsigned TypeIdx
,
3571 return UnableToLegalize
;
3573 auto [Dst
, DstTy
, SrcVec
, SrcVecTy
, Idx
, IdxTy
] = MI
.getFirst3RegLLTs();
3575 LLT SrcEltTy
= SrcVecTy
.getElementType();
3576 unsigned NewNumElts
= CastTy
.isVector() ? CastTy
.getNumElements() : 1;
3577 unsigned OldNumElts
= SrcVecTy
.getNumElements();
3579 LLT NewEltTy
= CastTy
.isVector() ? CastTy
.getElementType() : CastTy
;
3580 Register CastVec
= MIRBuilder
.buildBitcast(CastTy
, SrcVec
).getReg(0);
3582 const unsigned NewEltSize
= NewEltTy
.getSizeInBits();
3583 const unsigned OldEltSize
= SrcEltTy
.getSizeInBits();
3584 if (NewNumElts
> OldNumElts
) {
3585 // Decreasing the vector element size
3587 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
3589 // v4i32:castx = bitcast x:v2i64
3592 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
3593 // (i32 (extract_vector_elt castx, (2 * y + 1)))
3595 if (NewNumElts
% OldNumElts
!= 0)
3596 return UnableToLegalize
;
3598 // Type of the intermediate result vector.
3599 const unsigned NewEltsPerOldElt
= NewNumElts
/ OldNumElts
;
3601 LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt
), NewEltTy
);
3603 auto NewEltsPerOldEltK
= MIRBuilder
.buildConstant(IdxTy
, NewEltsPerOldElt
);
3605 SmallVector
<Register
, 8> NewOps(NewEltsPerOldElt
);
3606 auto NewBaseIdx
= MIRBuilder
.buildMul(IdxTy
, Idx
, NewEltsPerOldEltK
);
3608 for (unsigned I
= 0; I
< NewEltsPerOldElt
; ++I
) {
3609 auto IdxOffset
= MIRBuilder
.buildConstant(IdxTy
, I
);
3610 auto TmpIdx
= MIRBuilder
.buildAdd(IdxTy
, NewBaseIdx
, IdxOffset
);
3611 auto Elt
= MIRBuilder
.buildExtractVectorElement(NewEltTy
, CastVec
, TmpIdx
);
3612 NewOps
[I
] = Elt
.getReg(0);
3615 auto NewVec
= MIRBuilder
.buildBuildVector(MidTy
, NewOps
);
3616 MIRBuilder
.buildBitcast(Dst
, NewVec
);
3617 MI
.eraseFromParent();
3621 if (NewNumElts
< OldNumElts
) {
3622 if (NewEltSize
% OldEltSize
!= 0)
3623 return UnableToLegalize
;
3625 // This only depends on powers of 2 because we use bit tricks to figure out
3626 // the bit offset we need to shift to get the target element. A general
3627 // expansion could emit division/multiply.
3628 if (!isPowerOf2_32(NewEltSize
/ OldEltSize
))
3629 return UnableToLegalize
;
3631 // Increasing the vector element size.
3632 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
3636 // %cast = G_BITCAST %vec
3637 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
3638 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
3639 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3640 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3641 // %elt_bits = G_LSHR %wide_elt, %offset_bits
3642 // %elt = G_TRUNC %elt_bits
3644 const unsigned Log2EltRatio
= Log2_32(NewEltSize
/ OldEltSize
);
3645 auto Log2Ratio
= MIRBuilder
.buildConstant(IdxTy
, Log2EltRatio
);
3647 // Divide to get the index in the wider element type.
3648 auto ScaledIdx
= MIRBuilder
.buildLShr(IdxTy
, Idx
, Log2Ratio
);
3650 Register WideElt
= CastVec
;
3651 if (CastTy
.isVector()) {
3652 WideElt
= MIRBuilder
.buildExtractVectorElement(NewEltTy
, CastVec
,
3653 ScaledIdx
).getReg(0);
3656 // Compute the bit offset into the register of the target element.
3657 Register OffsetBits
= getBitcastWiderVectorElementOffset(
3658 MIRBuilder
, Idx
, NewEltSize
, OldEltSize
);
3660 // Shift the wide element to get the target element.
3661 auto ExtractedBits
= MIRBuilder
.buildLShr(NewEltTy
, WideElt
, OffsetBits
);
3662 MIRBuilder
.buildTrunc(Dst
, ExtractedBits
);
3663 MI
.eraseFromParent();
3667 return UnableToLegalize
;
3670 /// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
3671 /// TargetReg, while preserving other bits in \p TargetReg.
3673 /// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
3674 static Register
buildBitFieldInsert(MachineIRBuilder
&B
,
3675 Register TargetReg
, Register InsertReg
,
3676 Register OffsetBits
) {
3677 LLT TargetTy
= B
.getMRI()->getType(TargetReg
);
3678 LLT InsertTy
= B
.getMRI()->getType(InsertReg
);
3679 auto ZextVal
= B
.buildZExt(TargetTy
, InsertReg
);
3680 auto ShiftedInsertVal
= B
.buildShl(TargetTy
, ZextVal
, OffsetBits
);
3682 // Produce a bitmask of the value to insert
3683 auto EltMask
= B
.buildConstant(
3684 TargetTy
, APInt::getLowBitsSet(TargetTy
.getSizeInBits(),
3685 InsertTy
.getSizeInBits()));
3686 // Shift it into position
3687 auto ShiftedMask
= B
.buildShl(TargetTy
, EltMask
, OffsetBits
);
3688 auto InvShiftedMask
= B
.buildNot(TargetTy
, ShiftedMask
);
3690 // Clear out the bits in the wide element
3691 auto MaskedOldElt
= B
.buildAnd(TargetTy
, TargetReg
, InvShiftedMask
);
3693 // The value to insert has all zeros already, so stick it into the masked
3695 return B
.buildOr(TargetTy
, MaskedOldElt
, ShiftedInsertVal
).getReg(0);
3698 /// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
3699 /// is increasing the element size, perform the indexing in the target element
3700 /// type, and use bit operations to insert at the element position. This is
3701 /// intended for architectures that can dynamically index the register file and
3702 /// want to force indexing in the native register size.
3703 LegalizerHelper::LegalizeResult
3704 LegalizerHelper::bitcastInsertVectorElt(MachineInstr
&MI
, unsigned TypeIdx
,
3707 return UnableToLegalize
;
3709 auto [Dst
, DstTy
, SrcVec
, SrcVecTy
, Val
, ValTy
, Idx
, IdxTy
] =
3710 MI
.getFirst4RegLLTs();
3713 LLT VecEltTy
= VecTy
.getElementType();
3714 LLT NewEltTy
= CastTy
.isVector() ? CastTy
.getElementType() : CastTy
;
3715 const unsigned NewEltSize
= NewEltTy
.getSizeInBits();
3716 const unsigned OldEltSize
= VecEltTy
.getSizeInBits();
3718 unsigned NewNumElts
= CastTy
.isVector() ? CastTy
.getNumElements() : 1;
3719 unsigned OldNumElts
= VecTy
.getNumElements();
3721 Register CastVec
= MIRBuilder
.buildBitcast(CastTy
, SrcVec
).getReg(0);
3722 if (NewNumElts
< OldNumElts
) {
3723 if (NewEltSize
% OldEltSize
!= 0)
3724 return UnableToLegalize
;
3726 // This only depends on powers of 2 because we use bit tricks to figure out
3727 // the bit offset we need to shift to get the target element. A general
3728 // expansion could emit division/multiply.
3729 if (!isPowerOf2_32(NewEltSize
/ OldEltSize
))
3730 return UnableToLegalize
;
3732 const unsigned Log2EltRatio
= Log2_32(NewEltSize
/ OldEltSize
);
3733 auto Log2Ratio
= MIRBuilder
.buildConstant(IdxTy
, Log2EltRatio
);
3735 // Divide to get the index in the wider element type.
3736 auto ScaledIdx
= MIRBuilder
.buildLShr(IdxTy
, Idx
, Log2Ratio
);
3738 Register ExtractedElt
= CastVec
;
3739 if (CastTy
.isVector()) {
3740 ExtractedElt
= MIRBuilder
.buildExtractVectorElement(NewEltTy
, CastVec
,
3741 ScaledIdx
).getReg(0);
3744 // Compute the bit offset into the register of the target element.
3745 Register OffsetBits
= getBitcastWiderVectorElementOffset(
3746 MIRBuilder
, Idx
, NewEltSize
, OldEltSize
);
3748 Register InsertedElt
= buildBitFieldInsert(MIRBuilder
, ExtractedElt
,
3750 if (CastTy
.isVector()) {
3751 InsertedElt
= MIRBuilder
.buildInsertVectorElement(
3752 CastTy
, CastVec
, InsertedElt
, ScaledIdx
).getReg(0);
3755 MIRBuilder
.buildBitcast(Dst
, InsertedElt
);
3756 MI
.eraseFromParent();
3760 return UnableToLegalize
;
3763 // This attempts to handle G_CONCAT_VECTORS with illegal operands, particularly
3764 // those that have smaller than legal operands.
3766 // <16 x s8> = G_CONCAT_VECTORS <4 x s8>, <4 x s8>, <4 x s8>, <4 x s8>
3770 // s32 = G_BITCAST <4 x s8>
3771 // s32 = G_BITCAST <4 x s8>
3772 // s32 = G_BITCAST <4 x s8>
3773 // s32 = G_BITCAST <4 x s8>
3774 // <4 x s32> = G_BUILD_VECTOR s32, s32, s32, s32
3775 // <16 x s8> = G_BITCAST <4 x s32>
3776 LegalizerHelper::LegalizeResult
3777 LegalizerHelper::bitcastConcatVector(MachineInstr
&MI
, unsigned TypeIdx
,
3779 // Convert it to CONCAT instruction
3780 auto ConcatMI
= dyn_cast
<GConcatVectors
>(&MI
);
3782 return UnableToLegalize
;
3785 // Check if bitcast is Legal
3786 auto [DstReg
, DstTy
, SrcReg
, SrcTy
] = MI
.getFirst2RegLLTs();
3787 LLT SrcScalTy
= LLT::scalar(SrcTy
.getSizeInBits());
3789 // Check if the build vector is Legal
3790 if (!LI
.isLegal({TargetOpcode::G_BUILD_VECTOR
, {CastTy
, SrcScalTy
}})) {
3791 return UnableToLegalize
;
3794 // Bitcast the sources
3795 SmallVector
<Register
> BitcastRegs
;
3796 for (unsigned i
= 0; i
< ConcatMI
->getNumSources(); i
++) {
3797 BitcastRegs
.push_back(
3798 MIRBuilder
.buildBitcast(SrcScalTy
, ConcatMI
->getSourceReg(i
))
3802 // Build the scalar values into a vector
3804 MIRBuilder
.buildBuildVector(CastTy
, BitcastRegs
).getReg(0);
3805 MIRBuilder
.buildBitcast(DstReg
, BuildReg
);
3807 MI
.eraseFromParent();
3811 // This bitcasts a shuffle vector to a different type currently of the same
3812 // element size. Mostly used to legalize ptr vectors, where ptrtoint/inttoptr
3813 // will be used instead.
3815 // <16 x p0> = G_CONCAT_VECTORS <4 x p0>, <4 x p0>, mask
3817 // <4 x s64> = G_PTRTOINT <4 x p0>
3818 // <4 x s64> = G_PTRTOINT <4 x p0>
3819 // <16 x s64> = G_CONCAT_VECTORS <4 x s64>, <4 x s64>, mask
3820 // <16 x p0> = G_INTTOPTR <16 x s64>
3821 LegalizerHelper::LegalizeResult
3822 LegalizerHelper::bitcastShuffleVector(MachineInstr
&MI
, unsigned TypeIdx
,
3824 auto ShuffleMI
= cast
<GShuffleVector
>(&MI
);
3825 LLT DstTy
= MRI
.getType(ShuffleMI
->getReg(0));
3826 LLT SrcTy
= MRI
.getType(ShuffleMI
->getReg(1));
3828 // We currently only handle vectors of the same size.
3830 CastTy
.getScalarSizeInBits() != DstTy
.getScalarSizeInBits() ||
3831 CastTy
.getElementCount() != DstTy
.getElementCount())
3832 return UnableToLegalize
;
3834 LLT NewSrcTy
= SrcTy
.changeElementType(CastTy
.getScalarType());
3836 auto Inp1
= MIRBuilder
.buildCast(NewSrcTy
, ShuffleMI
->getReg(1));
3837 auto Inp2
= MIRBuilder
.buildCast(NewSrcTy
, ShuffleMI
->getReg(2));
3839 MIRBuilder
.buildShuffleVector(CastTy
, Inp1
, Inp2
, ShuffleMI
->getMask());
3840 MIRBuilder
.buildCast(ShuffleMI
->getReg(0), Shuf
);
3842 MI
.eraseFromParent();
3846 /// This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
3848 /// <vscale x 8 x i1> = G_EXTRACT_SUBVECTOR <vscale x 16 x i1>, N
3852 /// <vscale x 2 x i1> = G_BITCAST <vscale x 16 x i1>
3853 /// <vscale x 1 x i8> = G_EXTRACT_SUBVECTOR <vscale x 2 x i1>, N / 8
3854 /// <vscale x 8 x i1> = G_BITCAST <vscale x 1 x i8>
3855 LegalizerHelper::LegalizeResult
3856 LegalizerHelper::bitcastExtractSubvector(MachineInstr
&MI
, unsigned TypeIdx
,
3858 auto ES
= cast
<GExtractSubvector
>(&MI
);
3860 if (!CastTy
.isVector())
3861 return UnableToLegalize
;
3864 return UnableToLegalize
;
3866 Register Dst
= ES
->getReg(0);
3867 Register Src
= ES
->getSrcVec();
3868 uint64_t Idx
= ES
->getIndexImm();
3870 MachineRegisterInfo
&MRI
= *MIRBuilder
.getMRI();
3872 LLT DstTy
= MRI
.getType(Dst
);
3873 LLT SrcTy
= MRI
.getType(Src
);
3874 ElementCount DstTyEC
= DstTy
.getElementCount();
3875 ElementCount SrcTyEC
= SrcTy
.getElementCount();
3876 auto DstTyMinElts
= DstTyEC
.getKnownMinValue();
3877 auto SrcTyMinElts
= SrcTyEC
.getKnownMinValue();
3879 if (DstTy
== CastTy
)
3882 if (DstTy
.getSizeInBits() != CastTy
.getSizeInBits())
3883 return UnableToLegalize
;
3885 unsigned CastEltSize
= CastTy
.getElementType().getSizeInBits();
3886 unsigned DstEltSize
= DstTy
.getElementType().getSizeInBits();
3887 if (CastEltSize
< DstEltSize
)
3888 return UnableToLegalize
;
3890 auto AdjustAmt
= CastEltSize
/ DstEltSize
;
3891 if (Idx
% AdjustAmt
!= 0 || DstTyMinElts
% AdjustAmt
!= 0 ||
3892 SrcTyMinElts
% AdjustAmt
!= 0)
3893 return UnableToLegalize
;
3896 SrcTy
= LLT::vector(SrcTyEC
.divideCoefficientBy(AdjustAmt
), AdjustAmt
);
3897 auto CastVec
= MIRBuilder
.buildBitcast(SrcTy
, Src
);
3898 auto PromotedES
= MIRBuilder
.buildExtractSubvector(CastTy
, CastVec
, Idx
);
3899 MIRBuilder
.buildBitcast(Dst
, PromotedES
);
3901 ES
->eraseFromParent();
3905 /// This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
3907 /// <vscale x 16 x i1> = G_INSERT_SUBVECTOR <vscale x 16 x i1>,
3908 /// <vscale x 8 x i1>,
3913 /// <vscale x 2 x i8> = G_BITCAST <vscale x 16 x i1>
3914 /// <vscale x 1 x i8> = G_BITCAST <vscale x 8 x i1>
3915 /// <vscale x 2 x i8> = G_INSERT_SUBVECTOR <vscale x 2 x i8>,
3916 /// <vscale x 1 x i8>, N / 8
3917 /// <vscale x 16 x i1> = G_BITCAST <vscale x 2 x i8>
3918 LegalizerHelper::LegalizeResult
3919 LegalizerHelper::bitcastInsertSubvector(MachineInstr
&MI
, unsigned TypeIdx
,
3921 auto ES
= cast
<GInsertSubvector
>(&MI
);
3923 if (!CastTy
.isVector())
3924 return UnableToLegalize
;
3927 return UnableToLegalize
;
3929 Register Dst
= ES
->getReg(0);
3930 Register BigVec
= ES
->getBigVec();
3931 Register SubVec
= ES
->getSubVec();
3932 uint64_t Idx
= ES
->getIndexImm();
3934 MachineRegisterInfo
&MRI
= *MIRBuilder
.getMRI();
3936 LLT DstTy
= MRI
.getType(Dst
);
3937 LLT BigVecTy
= MRI
.getType(BigVec
);
3938 LLT SubVecTy
= MRI
.getType(SubVec
);
3940 if (DstTy
== CastTy
)
3943 if (DstTy
.getSizeInBits() != CastTy
.getSizeInBits())
3944 return UnableToLegalize
;
3946 ElementCount DstTyEC
= DstTy
.getElementCount();
3947 ElementCount BigVecTyEC
= BigVecTy
.getElementCount();
3948 ElementCount SubVecTyEC
= SubVecTy
.getElementCount();
3949 auto DstTyMinElts
= DstTyEC
.getKnownMinValue();
3950 auto BigVecTyMinElts
= BigVecTyEC
.getKnownMinValue();
3951 auto SubVecTyMinElts
= SubVecTyEC
.getKnownMinValue();
3953 unsigned CastEltSize
= CastTy
.getElementType().getSizeInBits();
3954 unsigned DstEltSize
= DstTy
.getElementType().getSizeInBits();
3955 if (CastEltSize
< DstEltSize
)
3956 return UnableToLegalize
;
3958 auto AdjustAmt
= CastEltSize
/ DstEltSize
;
3959 if (Idx
% AdjustAmt
!= 0 || DstTyMinElts
% AdjustAmt
!= 0 ||
3960 BigVecTyMinElts
% AdjustAmt
!= 0 || SubVecTyMinElts
% AdjustAmt
!= 0)
3961 return UnableToLegalize
;
3964 BigVecTy
= LLT::vector(BigVecTyEC
.divideCoefficientBy(AdjustAmt
), AdjustAmt
);
3965 SubVecTy
= LLT::vector(SubVecTyEC
.divideCoefficientBy(AdjustAmt
), AdjustAmt
);
3966 auto CastBigVec
= MIRBuilder
.buildBitcast(BigVecTy
, BigVec
);
3967 auto CastSubVec
= MIRBuilder
.buildBitcast(SubVecTy
, SubVec
);
3969 MIRBuilder
.buildInsertSubvector(CastTy
, CastBigVec
, CastSubVec
, Idx
);
3970 MIRBuilder
.buildBitcast(Dst
, PromotedIS
);
3972 ES
->eraseFromParent();
3976 LegalizerHelper::LegalizeResult
LegalizerHelper::lowerLoad(GAnyLoad
&LoadMI
) {
3977 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
3978 Register DstReg
= LoadMI
.getDstReg();
3979 Register PtrReg
= LoadMI
.getPointerReg();
3980 LLT DstTy
= MRI
.getType(DstReg
);
3981 MachineMemOperand
&MMO
= LoadMI
.getMMO();
3982 LLT MemTy
= MMO
.getMemoryType();
3983 MachineFunction
&MF
= MIRBuilder
.getMF();
3985 unsigned MemSizeInBits
= MemTy
.getSizeInBits();
3986 unsigned MemStoreSizeInBits
= 8 * MemTy
.getSizeInBytes();
3988 if (MemSizeInBits
!= MemStoreSizeInBits
) {
3989 if (MemTy
.isVector())
3990 return UnableToLegalize
;
3992 // Promote to a byte-sized load if not loading an integral number of
3993 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
3994 LLT WideMemTy
= LLT::scalar(MemStoreSizeInBits
);
3995 MachineMemOperand
*NewMMO
=
3996 MF
.getMachineMemOperand(&MMO
, MMO
.getPointerInfo(), WideMemTy
);
3998 Register LoadReg
= DstReg
;
4001 // If this wasn't already an extending load, we need to widen the result
4002 // register to avoid creating a load with a narrower result than the source.
4003 if (MemStoreSizeInBits
> DstTy
.getSizeInBits()) {
4005 LoadReg
= MRI
.createGenericVirtualRegister(WideMemTy
);
4008 if (isa
<GSExtLoad
>(LoadMI
)) {
4009 auto NewLoad
= MIRBuilder
.buildLoad(LoadTy
, PtrReg
, *NewMMO
);
4010 MIRBuilder
.buildSExtInReg(LoadReg
, NewLoad
, MemSizeInBits
);
4011 } else if (isa
<GZExtLoad
>(LoadMI
) || WideMemTy
== LoadTy
) {
4012 auto NewLoad
= MIRBuilder
.buildLoad(LoadTy
, PtrReg
, *NewMMO
);
4013 // The extra bits are guaranteed to be zero, since we stored them that
4014 // way. A zext load from Wide thus automatically gives zext from MemVT.
4015 MIRBuilder
.buildAssertZExt(LoadReg
, NewLoad
, MemSizeInBits
);
4017 MIRBuilder
.buildLoad(LoadReg
, PtrReg
, *NewMMO
);
4020 if (DstTy
!= LoadTy
)
4021 MIRBuilder
.buildTrunc(DstReg
, LoadReg
);
4023 LoadMI
.eraseFromParent();
4027 // Big endian lowering not implemented.
4028 if (MIRBuilder
.getDataLayout().isBigEndian())
4029 return UnableToLegalize
;
4031 // This load needs splitting into power of 2 sized loads.
4033 // Our strategy here is to generate anyextending loads for the smaller
4034 // types up to next power-2 result type, and then combine the two larger
4035 // result values together, before truncating back down to the non-pow-2
4037 // E.g. v1 = i24 load =>
4038 // v2 = i32 zextload (2 byte)
4039 // v3 = i32 load (1 byte)
4040 // v4 = i32 shl v3, 16
4041 // v5 = i32 or v4, v2
4042 // v1 = i24 trunc v5
4043 // By doing this we generate the correct truncate which should get
4044 // combined away as an artifact with a matching extend.
4046 uint64_t LargeSplitSize
, SmallSplitSize
;
4048 if (!isPowerOf2_32(MemSizeInBits
)) {
4049 // This load needs splitting into power of 2 sized loads.
4050 LargeSplitSize
= llvm::bit_floor(MemSizeInBits
);
4051 SmallSplitSize
= MemSizeInBits
- LargeSplitSize
;
4053 // This is already a power of 2, but we still need to split this in half.
4055 // Assume we're being asked to decompose an unaligned load.
4056 // TODO: If this requires multiple splits, handle them all at once.
4057 auto &Ctx
= MF
.getFunction().getContext();
4058 if (TLI
.allowsMemoryAccess(Ctx
, MIRBuilder
.getDataLayout(), MemTy
, MMO
))
4059 return UnableToLegalize
;
4061 SmallSplitSize
= LargeSplitSize
= MemSizeInBits
/ 2;
4064 if (MemTy
.isVector()) {
4065 // TODO: Handle vector extloads
4067 return UnableToLegalize
;
4069 // TODO: We can do better than scalarizing the vector and at least split it
4071 return reduceLoadStoreWidth(LoadMI
, 0, DstTy
.getElementType());
4074 MachineMemOperand
*LargeMMO
=
4075 MF
.getMachineMemOperand(&MMO
, 0, LargeSplitSize
/ 8);
4076 MachineMemOperand
*SmallMMO
=
4077 MF
.getMachineMemOperand(&MMO
, LargeSplitSize
/ 8, SmallSplitSize
/ 8);
4079 LLT PtrTy
= MRI
.getType(PtrReg
);
4080 unsigned AnyExtSize
= PowerOf2Ceil(DstTy
.getSizeInBits());
4081 LLT AnyExtTy
= LLT::scalar(AnyExtSize
);
4082 auto LargeLoad
= MIRBuilder
.buildLoadInstr(TargetOpcode::G_ZEXTLOAD
, AnyExtTy
,
4085 auto OffsetCst
= MIRBuilder
.buildConstant(LLT::scalar(PtrTy
.getSizeInBits()),
4086 LargeSplitSize
/ 8);
4087 Register PtrAddReg
= MRI
.createGenericVirtualRegister(PtrTy
);
4088 auto SmallPtr
= MIRBuilder
.buildPtrAdd(PtrAddReg
, PtrReg
, OffsetCst
);
4089 auto SmallLoad
= MIRBuilder
.buildLoadInstr(LoadMI
.getOpcode(), AnyExtTy
,
4090 SmallPtr
, *SmallMMO
);
4092 auto ShiftAmt
= MIRBuilder
.buildConstant(AnyExtTy
, LargeSplitSize
);
4093 auto Shift
= MIRBuilder
.buildShl(AnyExtTy
, SmallLoad
, ShiftAmt
);
4095 if (AnyExtTy
== DstTy
)
4096 MIRBuilder
.buildOr(DstReg
, Shift
, LargeLoad
);
4097 else if (AnyExtTy
.getSizeInBits() != DstTy
.getSizeInBits()) {
4098 auto Or
= MIRBuilder
.buildOr(AnyExtTy
, Shift
, LargeLoad
);
4099 MIRBuilder
.buildTrunc(DstReg
, {Or
});
4101 assert(DstTy
.isPointer() && "expected pointer");
4102 auto Or
= MIRBuilder
.buildOr(AnyExtTy
, Shift
, LargeLoad
);
4104 // FIXME: We currently consider this to be illegal for non-integral address
4105 // spaces, but we need still need a way to reinterpret the bits.
4106 MIRBuilder
.buildIntToPtr(DstReg
, Or
);
4109 LoadMI
.eraseFromParent();
4113 LegalizerHelper::LegalizeResult
LegalizerHelper::lowerStore(GStore
&StoreMI
) {
4114 // Lower a non-power of 2 store into multiple pow-2 stores.
4115 // E.g. split an i24 store into an i16 store + i8 store.
4116 // We do this by first extending the stored value to the next largest power
4117 // of 2 type, and then using truncating stores to store the components.
4118 // By doing this, likewise with G_LOAD, generate an extend that can be
4119 // artifact-combined away instead of leaving behind extracts.
4120 Register SrcReg
= StoreMI
.getValueReg();
4121 Register PtrReg
= StoreMI
.getPointerReg();
4122 LLT SrcTy
= MRI
.getType(SrcReg
);
4123 MachineFunction
&MF
= MIRBuilder
.getMF();
4124 MachineMemOperand
&MMO
= **StoreMI
.memoperands_begin();
4125 LLT MemTy
= MMO
.getMemoryType();
4127 unsigned StoreWidth
= MemTy
.getSizeInBits();
4128 unsigned StoreSizeInBits
= 8 * MemTy
.getSizeInBytes();
4130 if (StoreWidth
!= StoreSizeInBits
&& !SrcTy
.isVector()) {
4131 // Promote to a byte-sized store with upper bits zero if not
4132 // storing an integral number of bytes. For example, promote
4133 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
4134 LLT WideTy
= LLT::scalar(StoreSizeInBits
);
4136 if (StoreSizeInBits
> SrcTy
.getSizeInBits()) {
4137 // Avoid creating a store with a narrower source than result.
4138 SrcReg
= MIRBuilder
.buildAnyExt(WideTy
, SrcReg
).getReg(0);
4142 auto ZextInReg
= MIRBuilder
.buildZExtInReg(SrcTy
, SrcReg
, StoreWidth
);
4144 MachineMemOperand
*NewMMO
=
4145 MF
.getMachineMemOperand(&MMO
, MMO
.getPointerInfo(), WideTy
);
4146 MIRBuilder
.buildStore(ZextInReg
, PtrReg
, *NewMMO
);
4147 StoreMI
.eraseFromParent();
4151 if (MemTy
.isVector()) {
4153 return scalarizeVectorBooleanStore(StoreMI
);
4155 // TODO: We can do better than scalarizing the vector and at least split it
4157 return reduceLoadStoreWidth(StoreMI
, 0, SrcTy
.getElementType());
4160 unsigned MemSizeInBits
= MemTy
.getSizeInBits();
4161 uint64_t LargeSplitSize
, SmallSplitSize
;
4163 if (!isPowerOf2_32(MemSizeInBits
)) {
4164 LargeSplitSize
= llvm::bit_floor
<uint64_t>(MemTy
.getSizeInBits());
4165 SmallSplitSize
= MemTy
.getSizeInBits() - LargeSplitSize
;
4167 auto &Ctx
= MF
.getFunction().getContext();
4168 if (TLI
.allowsMemoryAccess(Ctx
, MIRBuilder
.getDataLayout(), MemTy
, MMO
))
4169 return UnableToLegalize
; // Don't know what we're being asked to do.
4171 SmallSplitSize
= LargeSplitSize
= MemSizeInBits
/ 2;
4174 // Extend to the next pow-2. If this store was itself the result of lowering,
4175 // e.g. an s56 store being broken into s32 + s24, we might have a stored type
4176 // that's wider than the stored size.
4177 unsigned AnyExtSize
= PowerOf2Ceil(MemTy
.getSizeInBits());
4178 const LLT NewSrcTy
= LLT::scalar(AnyExtSize
);
4180 if (SrcTy
.isPointer()) {
4181 const LLT IntPtrTy
= LLT::scalar(SrcTy
.getSizeInBits());
4182 SrcReg
= MIRBuilder
.buildPtrToInt(IntPtrTy
, SrcReg
).getReg(0);
4185 auto ExtVal
= MIRBuilder
.buildAnyExtOrTrunc(NewSrcTy
, SrcReg
);
4187 // Obtain the smaller value by shifting away the larger value.
4188 auto ShiftAmt
= MIRBuilder
.buildConstant(NewSrcTy
, LargeSplitSize
);
4189 auto SmallVal
= MIRBuilder
.buildLShr(NewSrcTy
, ExtVal
, ShiftAmt
);
4191 // Generate the PtrAdd and truncating stores.
4192 LLT PtrTy
= MRI
.getType(PtrReg
);
4193 auto OffsetCst
= MIRBuilder
.buildConstant(
4194 LLT::scalar(PtrTy
.getSizeInBits()), LargeSplitSize
/ 8);
4196 MIRBuilder
.buildPtrAdd(PtrTy
, PtrReg
, OffsetCst
);
4198 MachineMemOperand
*LargeMMO
=
4199 MF
.getMachineMemOperand(&MMO
, 0, LargeSplitSize
/ 8);
4200 MachineMemOperand
*SmallMMO
=
4201 MF
.getMachineMemOperand(&MMO
, LargeSplitSize
/ 8, SmallSplitSize
/ 8);
4202 MIRBuilder
.buildStore(ExtVal
, PtrReg
, *LargeMMO
);
4203 MIRBuilder
.buildStore(SmallVal
, SmallPtr
, *SmallMMO
);
4204 StoreMI
.eraseFromParent();
4208 LegalizerHelper::LegalizeResult
4209 LegalizerHelper::scalarizeVectorBooleanStore(GStore
&StoreMI
) {
4210 Register SrcReg
= StoreMI
.getValueReg();
4211 Register PtrReg
= StoreMI
.getPointerReg();
4212 LLT SrcTy
= MRI
.getType(SrcReg
);
4213 MachineMemOperand
&MMO
= **StoreMI
.memoperands_begin();
4214 LLT MemTy
= MMO
.getMemoryType();
4215 LLT MemScalarTy
= MemTy
.getElementType();
4216 MachineFunction
&MF
= MIRBuilder
.getMF();
4218 assert(SrcTy
.isVector() && "Expect a vector store type");
4220 if (!MemScalarTy
.isByteSized()) {
4221 // We need to build an integer scalar of the vector bit pattern.
4222 // It's not legal for us to add padding when storing a vector.
4223 unsigned NumBits
= MemTy
.getSizeInBits();
4224 LLT IntTy
= LLT::scalar(NumBits
);
4225 auto CurrVal
= MIRBuilder
.buildConstant(IntTy
, 0);
4226 LLT IdxTy
= getLLTForMVT(TLI
.getVectorIdxTy(MF
.getDataLayout()));
4228 for (unsigned I
= 0, E
= MemTy
.getNumElements(); I
< E
; ++I
) {
4229 auto Elt
= MIRBuilder
.buildExtractVectorElement(
4230 SrcTy
.getElementType(), SrcReg
, MIRBuilder
.buildConstant(IdxTy
, I
));
4231 auto Trunc
= MIRBuilder
.buildTrunc(MemScalarTy
, Elt
);
4232 auto ZExt
= MIRBuilder
.buildZExt(IntTy
, Trunc
);
4233 unsigned ShiftIntoIdx
= MF
.getDataLayout().isBigEndian()
4234 ? (MemTy
.getNumElements() - 1) - I
4236 auto ShiftAmt
= MIRBuilder
.buildConstant(
4237 IntTy
, ShiftIntoIdx
* MemScalarTy
.getSizeInBits());
4238 auto Shifted
= MIRBuilder
.buildShl(IntTy
, ZExt
, ShiftAmt
);
4239 CurrVal
= MIRBuilder
.buildOr(IntTy
, CurrVal
, Shifted
);
4241 auto PtrInfo
= MMO
.getPointerInfo();
4242 auto *NewMMO
= MF
.getMachineMemOperand(&MMO
, PtrInfo
, IntTy
);
4243 MIRBuilder
.buildStore(CurrVal
, PtrReg
, *NewMMO
);
4244 StoreMI
.eraseFromParent();
4248 // TODO: implement simple scalarization.
4249 return UnableToLegalize
;
4252 LegalizerHelper::LegalizeResult
4253 LegalizerHelper::bitcast(MachineInstr
&MI
, unsigned TypeIdx
, LLT CastTy
) {
4254 switch (MI
.getOpcode()) {
4255 case TargetOpcode::G_LOAD
: {
4257 return UnableToLegalize
;
4258 MachineMemOperand
&MMO
= **MI
.memoperands_begin();
4260 // Not sure how to interpret a bitcast of an extending load.
4261 if (MMO
.getMemoryType().getSizeInBits() != CastTy
.getSizeInBits())
4262 return UnableToLegalize
;
4264 Observer
.changingInstr(MI
);
4265 bitcastDst(MI
, CastTy
, 0);
4266 MMO
.setType(CastTy
);
4267 // The range metadata is no longer valid when reinterpreted as a different
4270 Observer
.changedInstr(MI
);
4273 case TargetOpcode::G_STORE
: {
4275 return UnableToLegalize
;
4277 MachineMemOperand
&MMO
= **MI
.memoperands_begin();
4279 // Not sure how to interpret a bitcast of a truncating store.
4280 if (MMO
.getMemoryType().getSizeInBits() != CastTy
.getSizeInBits())
4281 return UnableToLegalize
;
4283 Observer
.changingInstr(MI
);
4284 bitcastSrc(MI
, CastTy
, 0);
4285 MMO
.setType(CastTy
);
4286 Observer
.changedInstr(MI
);
4289 case TargetOpcode::G_SELECT
: {
4291 return UnableToLegalize
;
4293 if (MRI
.getType(MI
.getOperand(1).getReg()).isVector()) {
4295 dbgs() << "bitcast action not implemented for vector select\n");
4296 return UnableToLegalize
;
4299 Observer
.changingInstr(MI
);
4300 bitcastSrc(MI
, CastTy
, 2);
4301 bitcastSrc(MI
, CastTy
, 3);
4302 bitcastDst(MI
, CastTy
, 0);
4303 Observer
.changedInstr(MI
);
4306 case TargetOpcode::G_AND
:
4307 case TargetOpcode::G_OR
:
4308 case TargetOpcode::G_XOR
: {
4309 Observer
.changingInstr(MI
);
4310 bitcastSrc(MI
, CastTy
, 1);
4311 bitcastSrc(MI
, CastTy
, 2);
4312 bitcastDst(MI
, CastTy
, 0);
4313 Observer
.changedInstr(MI
);
4316 case TargetOpcode::G_EXTRACT_VECTOR_ELT
:
4317 return bitcastExtractVectorElt(MI
, TypeIdx
, CastTy
);
4318 case TargetOpcode::G_INSERT_VECTOR_ELT
:
4319 return bitcastInsertVectorElt(MI
, TypeIdx
, CastTy
);
4320 case TargetOpcode::G_CONCAT_VECTORS
:
4321 return bitcastConcatVector(MI
, TypeIdx
, CastTy
);
4322 case TargetOpcode::G_SHUFFLE_VECTOR
:
4323 return bitcastShuffleVector(MI
, TypeIdx
, CastTy
);
4324 case TargetOpcode::G_EXTRACT_SUBVECTOR
:
4325 return bitcastExtractSubvector(MI
, TypeIdx
, CastTy
);
4326 case TargetOpcode::G_INSERT_SUBVECTOR
:
4327 return bitcastInsertSubvector(MI
, TypeIdx
, CastTy
);
4329 return UnableToLegalize
;
4333 // Legalize an instruction by changing the opcode in place.
4334 void LegalizerHelper::changeOpcode(MachineInstr
&MI
, unsigned NewOpcode
) {
4335 Observer
.changingInstr(MI
);
4336 MI
.setDesc(MIRBuilder
.getTII().get(NewOpcode
));
4337 Observer
.changedInstr(MI
);
4340 LegalizerHelper::LegalizeResult
4341 LegalizerHelper::lower(MachineInstr
&MI
, unsigned TypeIdx
, LLT LowerHintTy
) {
4342 using namespace TargetOpcode
;
4344 switch(MI
.getOpcode()) {
4346 return UnableToLegalize
;
4347 case TargetOpcode::G_FCONSTANT
:
4348 return lowerFConstant(MI
);
4349 case TargetOpcode::G_BITCAST
:
4350 return lowerBitcast(MI
);
4351 case TargetOpcode::G_SREM
:
4352 case TargetOpcode::G_UREM
: {
4353 LLT Ty
= MRI
.getType(MI
.getOperand(0).getReg());
4355 MIRBuilder
.buildInstr(MI
.getOpcode() == G_SREM
? G_SDIV
: G_UDIV
, {Ty
},
4356 {MI
.getOperand(1), MI
.getOperand(2)});
4358 auto Prod
= MIRBuilder
.buildMul(Ty
, Quot
, MI
.getOperand(2));
4359 MIRBuilder
.buildSub(MI
.getOperand(0), MI
.getOperand(1), Prod
);
4360 MI
.eraseFromParent();
4363 case TargetOpcode::G_SADDO
:
4364 case TargetOpcode::G_SSUBO
:
4365 return lowerSADDO_SSUBO(MI
);
4366 case TargetOpcode::G_UMULH
:
4367 case TargetOpcode::G_SMULH
:
4368 return lowerSMULH_UMULH(MI
);
4369 case TargetOpcode::G_SMULO
:
4370 case TargetOpcode::G_UMULO
: {
4371 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
4373 auto [Res
, Overflow
, LHS
, RHS
] = MI
.getFirst4Regs();
4374 LLT Ty
= MRI
.getType(Res
);
4376 unsigned Opcode
= MI
.getOpcode() == TargetOpcode::G_SMULO
4377 ? TargetOpcode::G_SMULH
4378 : TargetOpcode::G_UMULH
;
4380 Observer
.changingInstr(MI
);
4381 const auto &TII
= MIRBuilder
.getTII();
4382 MI
.setDesc(TII
.get(TargetOpcode::G_MUL
));
4383 MI
.removeOperand(1);
4384 Observer
.changedInstr(MI
);
4386 auto HiPart
= MIRBuilder
.buildInstr(Opcode
, {Ty
}, {LHS
, RHS
});
4387 auto Zero
= MIRBuilder
.buildConstant(Ty
, 0);
4389 // Move insert point forward so we can use the Res register if needed.
4390 MIRBuilder
.setInsertPt(MIRBuilder
.getMBB(), ++MIRBuilder
.getInsertPt());
4392 // For *signed* multiply, overflow is detected by checking:
4393 // (hi != (lo >> bitwidth-1))
4394 if (Opcode
== TargetOpcode::G_SMULH
) {
4395 auto ShiftAmt
= MIRBuilder
.buildConstant(Ty
, Ty
.getSizeInBits() - 1);
4396 auto Shifted
= MIRBuilder
.buildAShr(Ty
, Res
, ShiftAmt
);
4397 MIRBuilder
.buildICmp(CmpInst::ICMP_NE
, Overflow
, HiPart
, Shifted
);
4399 MIRBuilder
.buildICmp(CmpInst::ICMP_NE
, Overflow
, HiPart
, Zero
);
4403 case TargetOpcode::G_FNEG
: {
4404 auto [Res
, SubByReg
] = MI
.getFirst2Regs();
4405 LLT Ty
= MRI
.getType(Res
);
4407 auto SignMask
= MIRBuilder
.buildConstant(
4408 Ty
, APInt::getSignMask(Ty
.getScalarSizeInBits()));
4409 MIRBuilder
.buildXor(Res
, SubByReg
, SignMask
);
4410 MI
.eraseFromParent();
4413 case TargetOpcode::G_FSUB
:
4414 case TargetOpcode::G_STRICT_FSUB
: {
4415 auto [Res
, LHS
, RHS
] = MI
.getFirst3Regs();
4416 LLT Ty
= MRI
.getType(Res
);
4418 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
4419 auto Neg
= MIRBuilder
.buildFNeg(Ty
, RHS
);
4421 if (MI
.getOpcode() == TargetOpcode::G_STRICT_FSUB
)
4422 MIRBuilder
.buildStrictFAdd(Res
, LHS
, Neg
, MI
.getFlags());
4424 MIRBuilder
.buildFAdd(Res
, LHS
, Neg
, MI
.getFlags());
4426 MI
.eraseFromParent();
4429 case TargetOpcode::G_FMAD
:
4430 return lowerFMad(MI
);
4431 case TargetOpcode::G_FFLOOR
:
4432 return lowerFFloor(MI
);
4433 case TargetOpcode::G_LROUND
:
4434 case TargetOpcode::G_LLROUND
: {
4435 Register DstReg
= MI
.getOperand(0).getReg();
4436 Register SrcReg
= MI
.getOperand(1).getReg();
4437 LLT SrcTy
= MRI
.getType(SrcReg
);
4438 auto Round
= MIRBuilder
.buildInstr(TargetOpcode::G_INTRINSIC_ROUND
, {SrcTy
},
4440 MIRBuilder
.buildFPTOSI(DstReg
, Round
);
4441 MI
.eraseFromParent();
4444 case TargetOpcode::G_INTRINSIC_ROUND
:
4445 return lowerIntrinsicRound(MI
);
4446 case TargetOpcode::G_FRINT
: {
4447 // Since round even is the assumed rounding mode for unconstrained FP
4448 // operations, rint and roundeven are the same operation.
4449 changeOpcode(MI
, TargetOpcode::G_INTRINSIC_ROUNDEVEN
);
4452 case TargetOpcode::G_INTRINSIC_LRINT
:
4453 case TargetOpcode::G_INTRINSIC_LLRINT
: {
4454 Register DstReg
= MI
.getOperand(0).getReg();
4455 Register SrcReg
= MI
.getOperand(1).getReg();
4456 LLT SrcTy
= MRI
.getType(SrcReg
);
4458 MIRBuilder
.buildInstr(TargetOpcode::G_FRINT
, {SrcTy
}, {SrcReg
});
4459 MIRBuilder
.buildFPTOSI(DstReg
, Round
);
4460 MI
.eraseFromParent();
4463 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS
: {
4464 auto [OldValRes
, SuccessRes
, Addr
, CmpVal
, NewVal
] = MI
.getFirst5Regs();
4465 Register NewOldValRes
= MRI
.cloneVirtualRegister(OldValRes
);
4466 MIRBuilder
.buildAtomicCmpXchg(NewOldValRes
, Addr
, CmpVal
, NewVal
,
4467 **MI
.memoperands_begin());
4468 MIRBuilder
.buildICmp(CmpInst::ICMP_EQ
, SuccessRes
, NewOldValRes
, CmpVal
);
4469 MIRBuilder
.buildCopy(OldValRes
, NewOldValRes
);
4470 MI
.eraseFromParent();
4473 case TargetOpcode::G_LOAD
:
4474 case TargetOpcode::G_SEXTLOAD
:
4475 case TargetOpcode::G_ZEXTLOAD
:
4476 return lowerLoad(cast
<GAnyLoad
>(MI
));
4477 case TargetOpcode::G_STORE
:
4478 return lowerStore(cast
<GStore
>(MI
));
4479 case TargetOpcode::G_CTLZ_ZERO_UNDEF
:
4480 case TargetOpcode::G_CTTZ_ZERO_UNDEF
:
4481 case TargetOpcode::G_CTLZ
:
4482 case TargetOpcode::G_CTTZ
:
4483 case TargetOpcode::G_CTPOP
:
4484 return lowerBitCount(MI
);
4486 auto [Res
, CarryOut
, LHS
, RHS
] = MI
.getFirst4Regs();
4488 Register NewRes
= MRI
.cloneVirtualRegister(Res
);
4490 MIRBuilder
.buildAdd(NewRes
, LHS
, RHS
);
4491 MIRBuilder
.buildICmp(CmpInst::ICMP_ULT
, CarryOut
, NewRes
, RHS
);
4493 MIRBuilder
.buildCopy(Res
, NewRes
);
4495 MI
.eraseFromParent();
4499 auto [Res
, CarryOut
, LHS
, RHS
, CarryIn
] = MI
.getFirst5Regs();
4500 const LLT CondTy
= MRI
.getType(CarryOut
);
4501 const LLT Ty
= MRI
.getType(Res
);
4503 Register NewRes
= MRI
.cloneVirtualRegister(Res
);
4505 // Initial add of the two operands.
4506 auto TmpRes
= MIRBuilder
.buildAdd(Ty
, LHS
, RHS
);
4508 // Initial check for carry.
4509 auto Carry
= MIRBuilder
.buildICmp(CmpInst::ICMP_ULT
, CondTy
, TmpRes
, LHS
);
4511 // Add the sum and the carry.
4512 auto ZExtCarryIn
= MIRBuilder
.buildZExt(Ty
, CarryIn
);
4513 MIRBuilder
.buildAdd(NewRes
, TmpRes
, ZExtCarryIn
);
4515 // Second check for carry. We can only carry if the initial sum is all 1s
4516 // and the carry is set, resulting in a new sum of 0.
4517 auto Zero
= MIRBuilder
.buildConstant(Ty
, 0);
4519 MIRBuilder
.buildICmp(CmpInst::ICMP_EQ
, CondTy
, NewRes
, Zero
);
4520 auto Carry2
= MIRBuilder
.buildAnd(CondTy
, ResEqZero
, CarryIn
);
4521 MIRBuilder
.buildOr(CarryOut
, Carry
, Carry2
);
4523 MIRBuilder
.buildCopy(Res
, NewRes
);
4525 MI
.eraseFromParent();
4529 auto [Res
, BorrowOut
, LHS
, RHS
] = MI
.getFirst4Regs();
4531 MIRBuilder
.buildSub(Res
, LHS
, RHS
);
4532 MIRBuilder
.buildICmp(CmpInst::ICMP_ULT
, BorrowOut
, LHS
, RHS
);
4534 MI
.eraseFromParent();
4538 auto [Res
, BorrowOut
, LHS
, RHS
, BorrowIn
] = MI
.getFirst5Regs();
4539 const LLT CondTy
= MRI
.getType(BorrowOut
);
4540 const LLT Ty
= MRI
.getType(Res
);
4542 // Initial subtract of the two operands.
4543 auto TmpRes
= MIRBuilder
.buildSub(Ty
, LHS
, RHS
);
4545 // Initial check for borrow.
4546 auto Borrow
= MIRBuilder
.buildICmp(CmpInst::ICMP_UGT
, CondTy
, TmpRes
, LHS
);
4548 // Subtract the borrow from the first subtract.
4549 auto ZExtBorrowIn
= MIRBuilder
.buildZExt(Ty
, BorrowIn
);
4550 MIRBuilder
.buildSub(Res
, TmpRes
, ZExtBorrowIn
);
4552 // Second check for borrow. We can only borrow if the initial difference is
4553 // 0 and the borrow is set, resulting in a new difference of all 1s.
4554 auto Zero
= MIRBuilder
.buildConstant(Ty
, 0);
4556 MIRBuilder
.buildICmp(CmpInst::ICMP_EQ
, CondTy
, TmpRes
, Zero
);
4557 auto Borrow2
= MIRBuilder
.buildAnd(CondTy
, TmpResEqZero
, BorrowIn
);
4558 MIRBuilder
.buildOr(BorrowOut
, Borrow
, Borrow2
);
4560 MI
.eraseFromParent();
4564 return lowerUITOFP(MI
);
4566 return lowerSITOFP(MI
);
4568 return lowerFPTOUI(MI
);
4570 return lowerFPTOSI(MI
);
4573 return lowerFPTOINT_SAT(MI
);
4575 return lowerFPTRUNC(MI
);
4577 return lowerFPOWI(MI
);
4582 return lowerMinMax(MI
);
4585 return lowerThreewayCompare(MI
);
4587 return lowerFCopySign(MI
);
4590 return lowerFMinNumMaxNum(MI
);
4591 case G_MERGE_VALUES
:
4592 return lowerMergeValues(MI
);
4593 case G_UNMERGE_VALUES
:
4594 return lowerUnmergeValues(MI
);
4595 case TargetOpcode::G_SEXT_INREG
: {
4596 assert(MI
.getOperand(2).isImm() && "Expected immediate");
4597 int64_t SizeInBits
= MI
.getOperand(2).getImm();
4599 auto [DstReg
, SrcReg
] = MI
.getFirst2Regs();
4600 LLT DstTy
= MRI
.getType(DstReg
);
4601 Register TmpRes
= MRI
.createGenericVirtualRegister(DstTy
);
4603 auto MIBSz
= MIRBuilder
.buildConstant(DstTy
, DstTy
.getScalarSizeInBits() - SizeInBits
);
4604 MIRBuilder
.buildShl(TmpRes
, SrcReg
, MIBSz
->getOperand(0));
4605 MIRBuilder
.buildAShr(DstReg
, TmpRes
, MIBSz
->getOperand(0));
4606 MI
.eraseFromParent();
4609 case G_EXTRACT_VECTOR_ELT
:
4610 case G_INSERT_VECTOR_ELT
:
4611 return lowerExtractInsertVectorElt(MI
);
4612 case G_SHUFFLE_VECTOR
:
4613 return lowerShuffleVector(MI
);
4614 case G_VECTOR_COMPRESS
:
4615 return lowerVECTOR_COMPRESS(MI
);
4616 case G_DYN_STACKALLOC
:
4617 return lowerDynStackAlloc(MI
);
4619 return lowerStackSave(MI
);
4620 case G_STACKRESTORE
:
4621 return lowerStackRestore(MI
);
4623 return lowerExtract(MI
);
4625 return lowerInsert(MI
);
4627 return lowerBswap(MI
);
4629 return lowerBitreverse(MI
);
4630 case G_READ_REGISTER
:
4631 case G_WRITE_REGISTER
:
4632 return lowerReadWriteRegister(MI
);
4635 // Try to make a reasonable guess about which lowering strategy to use. The
4636 // target can override this with custom lowering and calling the
4637 // implementation functions.
4638 LLT Ty
= MRI
.getType(MI
.getOperand(0).getReg());
4639 if (LI
.isLegalOrCustom({G_UMIN
, Ty
}))
4640 return lowerAddSubSatToMinMax(MI
);
4641 return lowerAddSubSatToAddoSubo(MI
);
4645 LLT Ty
= MRI
.getType(MI
.getOperand(0).getReg());
4647 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
4648 // since it's a shorter expansion. However, we would need to figure out the
4649 // preferred boolean type for the carry out for the query.
4650 if (LI
.isLegalOrCustom({G_SMIN
, Ty
}) && LI
.isLegalOrCustom({G_SMAX
, Ty
}))
4651 return lowerAddSubSatToMinMax(MI
);
4652 return lowerAddSubSatToAddoSubo(MI
);
4656 return lowerShlSat(MI
);
4658 return lowerAbsToAddXor(MI
);
4660 return lowerFAbs(MI
);
4662 return lowerSelect(MI
);
4664 return lowerISFPCLASS(MI
);
4667 return lowerDIVREM(MI
);
4670 return lowerFunnelShift(MI
);
4673 return lowerRotate(MI
);
4677 return lowerMemCpyFamily(MI
);
4678 case G_MEMCPY_INLINE
:
4679 return lowerMemcpyInline(MI
);
4683 return lowerEXT(MI
);
4685 return lowerTRUNC(MI
);
4686 GISEL_VECREDUCE_CASES_NONSEQ
4687 return lowerVectorReduction(MI
);
4689 return lowerVAArg(MI
);
4693 Align
LegalizerHelper::getStackTemporaryAlignment(LLT Ty
,
4694 Align MinAlign
) const {
4695 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
4696 // datalayout for the preferred alignment. Also there should be a target hook
4697 // for this to allow targets to reduce the alignment and ignore the
4698 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
4700 return std::max(Align(PowerOf2Ceil(Ty
.getSizeInBytes())), MinAlign
);
4704 LegalizerHelper::createStackTemporary(TypeSize Bytes
, Align Alignment
,
4705 MachinePointerInfo
&PtrInfo
) {
4706 MachineFunction
&MF
= MIRBuilder
.getMF();
4707 const DataLayout
&DL
= MIRBuilder
.getDataLayout();
4708 int FrameIdx
= MF
.getFrameInfo().CreateStackObject(Bytes
, Alignment
, false);
4710 unsigned AddrSpace
= DL
.getAllocaAddrSpace();
4711 LLT FramePtrTy
= LLT::pointer(AddrSpace
, DL
.getPointerSizeInBits(AddrSpace
));
4713 PtrInfo
= MachinePointerInfo::getFixedStack(MF
, FrameIdx
);
4714 return MIRBuilder
.buildFrameIndex(FramePtrTy
, FrameIdx
);
4717 MachineInstrBuilder
LegalizerHelper::createStackStoreLoad(const DstOp
&Res
,
4719 LLT SrcTy
= Val
.getLLTTy(MRI
);
4720 Align StackTypeAlign
=
4721 std::max(getStackTemporaryAlignment(SrcTy
),
4722 getStackTemporaryAlignment(Res
.getLLTTy(MRI
)));
4723 MachinePointerInfo PtrInfo
;
4725 createStackTemporary(SrcTy
.getSizeInBytes(), StackTypeAlign
, PtrInfo
);
4727 MIRBuilder
.buildStore(Val
, StackTemp
, PtrInfo
, StackTypeAlign
);
4728 return MIRBuilder
.buildLoad(Res
, StackTemp
, PtrInfo
, StackTypeAlign
);
4731 static Register
clampVectorIndex(MachineIRBuilder
&B
, Register IdxReg
,
4733 LLT IdxTy
= B
.getMRI()->getType(IdxReg
);
4734 unsigned NElts
= VecTy
.getNumElements();
4737 if (mi_match(IdxReg
, *B
.getMRI(), m_ICst(IdxVal
))) {
4738 if (IdxVal
< VecTy
.getNumElements())
4740 // If a constant index would be out of bounds, clamp it as well.
4743 if (isPowerOf2_32(NElts
)) {
4744 APInt Imm
= APInt::getLowBitsSet(IdxTy
.getSizeInBits(), Log2_32(NElts
));
4745 return B
.buildAnd(IdxTy
, IdxReg
, B
.buildConstant(IdxTy
, Imm
)).getReg(0);
4748 return B
.buildUMin(IdxTy
, IdxReg
, B
.buildConstant(IdxTy
, NElts
- 1))
4752 Register
LegalizerHelper::getVectorElementPointer(Register VecPtr
, LLT VecTy
,
4754 LLT EltTy
= VecTy
.getElementType();
4756 // Calculate the element offset and add it to the pointer.
4757 unsigned EltSize
= EltTy
.getSizeInBits() / 8; // FIXME: should be ABI size.
4758 assert(EltSize
* 8 == EltTy
.getSizeInBits() &&
4759 "Converting bits to bytes lost precision");
4761 Index
= clampVectorIndex(MIRBuilder
, Index
, VecTy
);
4763 // Convert index to the correct size for the address space.
4764 const DataLayout
&DL
= MIRBuilder
.getDataLayout();
4765 unsigned AS
= MRI
.getType(VecPtr
).getAddressSpace();
4766 unsigned IndexSizeInBits
= DL
.getIndexSize(AS
) * 8;
4767 LLT IdxTy
= MRI
.getType(Index
).changeElementSize(IndexSizeInBits
);
4768 if (IdxTy
!= MRI
.getType(Index
))
4769 Index
= MIRBuilder
.buildSExtOrTrunc(IdxTy
, Index
).getReg(0);
4771 auto Mul
= MIRBuilder
.buildMul(IdxTy
, Index
,
4772 MIRBuilder
.buildConstant(IdxTy
, EltSize
));
4774 LLT PtrTy
= MRI
.getType(VecPtr
);
4775 return MIRBuilder
.buildPtrAdd(PtrTy
, VecPtr
, Mul
).getReg(0);
4779 /// Check that all vector operands have same number of elements. Other operands
4780 /// should be listed in NonVecOp.
4781 static bool hasSameNumEltsOnAllVectorOperands(
4782 GenericMachineInstr
&MI
, MachineRegisterInfo
&MRI
,
4783 std::initializer_list
<unsigned> NonVecOpIndices
) {
4784 if (MI
.getNumMemOperands() != 0)
4787 LLT VecTy
= MRI
.getType(MI
.getReg(0));
4788 if (!VecTy
.isVector())
4790 unsigned NumElts
= VecTy
.getNumElements();
4792 for (unsigned OpIdx
= 1; OpIdx
< MI
.getNumOperands(); ++OpIdx
) {
4793 MachineOperand
&Op
= MI
.getOperand(OpIdx
);
4795 if (!is_contained(NonVecOpIndices
, OpIdx
))
4800 LLT Ty
= MRI
.getType(Op
.getReg());
4801 if (!Ty
.isVector()) {
4802 if (!is_contained(NonVecOpIndices
, OpIdx
))
4807 if (Ty
.getNumElements() != NumElts
)
4815 /// Fill \p DstOps with DstOps that have same number of elements combined as
4816 /// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
4817 /// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
4818 /// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
4819 static void makeDstOps(SmallVectorImpl
<DstOp
> &DstOps
, LLT Ty
,
4822 assert(Ty
.isVector() && "Expected vector type");
4823 LLT EltTy
= Ty
.getElementType();
4824 LLT NarrowTy
= (NumElts
== 1) ? EltTy
: LLT::fixed_vector(NumElts
, EltTy
);
4825 int NumParts
, NumLeftover
;
4826 std::tie(NumParts
, NumLeftover
) =
4827 getNarrowTypeBreakDown(Ty
, NarrowTy
, LeftoverTy
);
4829 assert(NumParts
> 0 && "Error in getNarrowTypeBreakDown");
4830 for (int i
= 0; i
< NumParts
; ++i
) {
4831 DstOps
.push_back(NarrowTy
);
4834 if (LeftoverTy
.isValid()) {
4835 assert(NumLeftover
== 1 && "expected exactly one leftover");
4836 DstOps
.push_back(LeftoverTy
);
4840 /// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
4841 /// made from \p Op depending on operand type.
4842 static void broadcastSrcOp(SmallVectorImpl
<SrcOp
> &Ops
, unsigned N
,
4843 MachineOperand
&Op
) {
4844 for (unsigned i
= 0; i
< N
; ++i
) {
4846 Ops
.push_back(Op
.getReg());
4847 else if (Op
.isImm())
4848 Ops
.push_back(Op
.getImm());
4849 else if (Op
.isPredicate())
4850 Ops
.push_back(static_cast<CmpInst::Predicate
>(Op
.getPredicate()));
4852 llvm_unreachable("Unsupported type");
4856 // Handle splitting vector operations which need to have the same number of
4857 // elements in each type index, but each type index may have a different element
4860 // e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
4861 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4862 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4864 // Also handles some irregular breakdown cases, e.g.
4865 // e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
4866 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4867 // s64 = G_SHL s64, s32
4868 LegalizerHelper::LegalizeResult
4869 LegalizerHelper::fewerElementsVectorMultiEltType(
4870 GenericMachineInstr
&MI
, unsigned NumElts
,
4871 std::initializer_list
<unsigned> NonVecOpIndices
) {
4872 assert(hasSameNumEltsOnAllVectorOperands(MI
, MRI
, NonVecOpIndices
) &&
4873 "Non-compatible opcode or not specified non-vector operands");
4874 unsigned OrigNumElts
= MRI
.getType(MI
.getReg(0)).getNumElements();
4876 unsigned NumInputs
= MI
.getNumOperands() - MI
.getNumDefs();
4877 unsigned NumDefs
= MI
.getNumDefs();
4879 // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
4880 // Build instructions with DstOps to use instruction found by CSE directly.
4881 // CSE copies found instruction into given vreg when building with vreg dest.
4882 SmallVector
<SmallVector
<DstOp
, 8>, 2> OutputOpsPieces(NumDefs
);
4883 // Output registers will be taken from created instructions.
4884 SmallVector
<SmallVector
<Register
, 8>, 2> OutputRegs(NumDefs
);
4885 for (unsigned i
= 0; i
< NumDefs
; ++i
) {
4886 makeDstOps(OutputOpsPieces
[i
], MRI
.getType(MI
.getReg(i
)), NumElts
);
4889 // Split vector input operands into sub-vectors with NumElts elts + Leftover.
4890 // Operands listed in NonVecOpIndices will be used as is without splitting;
4891 // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
4892 // scalar condition (op 1), immediate in sext_inreg (op 2).
4893 SmallVector
<SmallVector
<SrcOp
, 8>, 3> InputOpsPieces(NumInputs
);
4894 for (unsigned UseIdx
= NumDefs
, UseNo
= 0; UseIdx
< MI
.getNumOperands();
4895 ++UseIdx
, ++UseNo
) {
4896 if (is_contained(NonVecOpIndices
, UseIdx
)) {
4897 broadcastSrcOp(InputOpsPieces
[UseNo
], OutputOpsPieces
[0].size(),
4898 MI
.getOperand(UseIdx
));
4900 SmallVector
<Register
, 8> SplitPieces
;
4901 extractVectorParts(MI
.getReg(UseIdx
), NumElts
, SplitPieces
, MIRBuilder
,
4903 for (auto Reg
: SplitPieces
)
4904 InputOpsPieces
[UseNo
].push_back(Reg
);
4908 unsigned NumLeftovers
= OrigNumElts
% NumElts
? 1 : 0;
4910 // Take i-th piece of each input operand split and build sub-vector/scalar
4911 // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
4912 for (unsigned i
= 0; i
< OrigNumElts
/ NumElts
+ NumLeftovers
; ++i
) {
4913 SmallVector
<DstOp
, 2> Defs
;
4914 for (unsigned DstNo
= 0; DstNo
< NumDefs
; ++DstNo
)
4915 Defs
.push_back(OutputOpsPieces
[DstNo
][i
]);
4917 SmallVector
<SrcOp
, 3> Uses
;
4918 for (unsigned InputNo
= 0; InputNo
< NumInputs
; ++InputNo
)
4919 Uses
.push_back(InputOpsPieces
[InputNo
][i
]);
4921 auto I
= MIRBuilder
.buildInstr(MI
.getOpcode(), Defs
, Uses
, MI
.getFlags());
4922 for (unsigned DstNo
= 0; DstNo
< NumDefs
; ++DstNo
)
4923 OutputRegs
[DstNo
].push_back(I
.getReg(DstNo
));
4926 // Merge small outputs into MI's output for each def operand.
4928 for (unsigned i
= 0; i
< NumDefs
; ++i
)
4929 mergeMixedSubvectors(MI
.getReg(i
), OutputRegs
[i
]);
4931 for (unsigned i
= 0; i
< NumDefs
; ++i
)
4932 MIRBuilder
.buildMergeLikeInstr(MI
.getReg(i
), OutputRegs
[i
]);
4935 MI
.eraseFromParent();
4939 LegalizerHelper::LegalizeResult
4940 LegalizerHelper::fewerElementsVectorPhi(GenericMachineInstr
&MI
,
4942 unsigned OrigNumElts
= MRI
.getType(MI
.getReg(0)).getNumElements();
4944 unsigned NumInputs
= MI
.getNumOperands() - MI
.getNumDefs();
4945 unsigned NumDefs
= MI
.getNumDefs();
4947 SmallVector
<DstOp
, 8> OutputOpsPieces
;
4948 SmallVector
<Register
, 8> OutputRegs
;
4949 makeDstOps(OutputOpsPieces
, MRI
.getType(MI
.getReg(0)), NumElts
);
4951 // Instructions that perform register split will be inserted in basic block
4952 // where register is defined (basic block is in the next operand).
4953 SmallVector
<SmallVector
<Register
, 8>, 3> InputOpsPieces(NumInputs
/ 2);
4954 for (unsigned UseIdx
= NumDefs
, UseNo
= 0; UseIdx
< MI
.getNumOperands();
4955 UseIdx
+= 2, ++UseNo
) {
4956 MachineBasicBlock
&OpMBB
= *MI
.getOperand(UseIdx
+ 1).getMBB();
4957 MIRBuilder
.setInsertPt(OpMBB
, OpMBB
.getFirstTerminatorForward());
4958 extractVectorParts(MI
.getReg(UseIdx
), NumElts
, InputOpsPieces
[UseNo
],
4962 // Build PHIs with fewer elements.
4963 unsigned NumLeftovers
= OrigNumElts
% NumElts
? 1 : 0;
4964 MIRBuilder
.setInsertPt(*MI
.getParent(), MI
);
4965 for (unsigned i
= 0; i
< OrigNumElts
/ NumElts
+ NumLeftovers
; ++i
) {
4966 auto Phi
= MIRBuilder
.buildInstr(TargetOpcode::G_PHI
);
4968 MRI
.createGenericVirtualRegister(OutputOpsPieces
[i
].getLLTTy(MRI
)));
4969 OutputRegs
.push_back(Phi
.getReg(0));
4971 for (unsigned j
= 0; j
< NumInputs
/ 2; ++j
) {
4972 Phi
.addUse(InputOpsPieces
[j
][i
]);
4973 Phi
.add(MI
.getOperand(1 + j
* 2 + 1));
4977 // Set the insert point after the existing PHIs
4978 MachineBasicBlock
&MBB
= *MI
.getParent();
4979 MIRBuilder
.setInsertPt(MBB
, MBB
.getFirstNonPHI());
4981 // Merge small outputs into MI's def.
4983 mergeMixedSubvectors(MI
.getReg(0), OutputRegs
);
4985 MIRBuilder
.buildMergeLikeInstr(MI
.getReg(0), OutputRegs
);
4988 MI
.eraseFromParent();
4992 LegalizerHelper::LegalizeResult
4993 LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr
&MI
,
4996 const int NumDst
= MI
.getNumOperands() - 1;
4997 const Register SrcReg
= MI
.getOperand(NumDst
).getReg();
4998 LLT DstTy
= MRI
.getType(MI
.getOperand(0).getReg());
4999 LLT SrcTy
= MRI
.getType(SrcReg
);
5001 if (TypeIdx
!= 1 || NarrowTy
== DstTy
)
5002 return UnableToLegalize
;
5004 // Requires compatible types. Otherwise SrcReg should have been defined by
5005 // merge-like instruction that would get artifact combined. Most likely
5006 // instruction that defines SrcReg has to perform more/fewer elements
5007 // legalization compatible with NarrowTy.
5008 assert(SrcTy
.isVector() && NarrowTy
.isVector() && "Expected vector types");
5009 assert((SrcTy
.getScalarType() == NarrowTy
.getScalarType()) && "bad type");
5011 if ((SrcTy
.getSizeInBits() % NarrowTy
.getSizeInBits() != 0) ||
5012 (NarrowTy
.getSizeInBits() % DstTy
.getSizeInBits() != 0))
5013 return UnableToLegalize
;
5015 // This is most likely DstTy (smaller then register size) packed in SrcTy
5016 // (larger then register size) and since unmerge was not combined it will be
5017 // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
5018 // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
5020 // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
5022 // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
5023 // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
5024 // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
5025 auto Unmerge
= MIRBuilder
.buildUnmerge(NarrowTy
, SrcReg
);
5026 const int NumUnmerge
= Unmerge
->getNumOperands() - 1;
5027 const int PartsPerUnmerge
= NumDst
/ NumUnmerge
;
5029 for (int I
= 0; I
!= NumUnmerge
; ++I
) {
5030 auto MIB
= MIRBuilder
.buildInstr(TargetOpcode::G_UNMERGE_VALUES
);
5032 for (int J
= 0; J
!= PartsPerUnmerge
; ++J
)
5033 MIB
.addDef(MI
.getOperand(I
* PartsPerUnmerge
+ J
).getReg());
5034 MIB
.addUse(Unmerge
.getReg(I
));
5037 MI
.eraseFromParent();
5041 LegalizerHelper::LegalizeResult
5042 LegalizerHelper::fewerElementsVectorMerge(MachineInstr
&MI
, unsigned TypeIdx
,
5044 auto [DstReg
, DstTy
, SrcReg
, SrcTy
] = MI
.getFirst2RegLLTs();
5045 // Requires compatible types. Otherwise user of DstReg did not perform unmerge
5046 // that should have been artifact combined. Most likely instruction that uses
5047 // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
5048 assert(DstTy
.isVector() && NarrowTy
.isVector() && "Expected vector types");
5049 assert((DstTy
.getScalarType() == NarrowTy
.getScalarType()) && "bad type");
5050 if (NarrowTy
== SrcTy
)
5051 return UnableToLegalize
;
5053 // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
5054 // is for old mir tests. Since the changes to more/fewer elements it should no
5055 // longer be possible to generate MIR like this when starting from llvm-ir
5056 // because LCMTy approach was replaced with merge/unmerge to vector elements.
5058 assert(SrcTy
.isVector() && "Expected vector types");
5059 assert((SrcTy
.getScalarType() == NarrowTy
.getScalarType()) && "bad type");
5060 if ((DstTy
.getSizeInBits() % NarrowTy
.getSizeInBits() != 0) ||
5061 (NarrowTy
.getNumElements() >= SrcTy
.getNumElements()))
5062 return UnableToLegalize
;
5063 // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
5065 // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
5066 // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
5067 // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
5068 // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
5069 // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
5070 // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
5072 SmallVector
<Register
, 8> Elts
;
5073 LLT EltTy
= MRI
.getType(MI
.getOperand(1).getReg()).getScalarType();
5074 for (unsigned i
= 1; i
< MI
.getNumOperands(); ++i
) {
5075 auto Unmerge
= MIRBuilder
.buildUnmerge(EltTy
, MI
.getOperand(i
).getReg());
5076 for (unsigned j
= 0; j
< Unmerge
->getNumDefs(); ++j
)
5077 Elts
.push_back(Unmerge
.getReg(j
));
5080 SmallVector
<Register
, 8> NarrowTyElts
;
5081 unsigned NumNarrowTyElts
= NarrowTy
.getNumElements();
5082 unsigned NumNarrowTyPieces
= DstTy
.getNumElements() / NumNarrowTyElts
;
5083 for (unsigned i
= 0, Offset
= 0; i
< NumNarrowTyPieces
;
5084 ++i
, Offset
+= NumNarrowTyElts
) {
5085 ArrayRef
<Register
> Pieces(&Elts
[Offset
], NumNarrowTyElts
);
5086 NarrowTyElts
.push_back(
5087 MIRBuilder
.buildMergeLikeInstr(NarrowTy
, Pieces
).getReg(0));
5090 MIRBuilder
.buildMergeLikeInstr(DstReg
, NarrowTyElts
);
5091 MI
.eraseFromParent();
5095 assert(TypeIdx
== 0 && "Bad type index");
5096 if ((NarrowTy
.getSizeInBits() % SrcTy
.getSizeInBits() != 0) ||
5097 (DstTy
.getSizeInBits() % NarrowTy
.getSizeInBits() != 0))
5098 return UnableToLegalize
;
5100 // This is most likely SrcTy (smaller then register size) packed in DstTy
5101 // (larger then register size) and since merge was not combined it will be
5102 // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
5103 // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
5105 // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
5107 // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
5108 // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
5109 // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
5110 SmallVector
<Register
, 8> NarrowTyElts
;
5111 unsigned NumParts
= DstTy
.getNumElements() / NarrowTy
.getNumElements();
5112 unsigned NumSrcElts
= SrcTy
.isVector() ? SrcTy
.getNumElements() : 1;
5113 unsigned NumElts
= NarrowTy
.getNumElements() / NumSrcElts
;
5114 for (unsigned i
= 0; i
< NumParts
; ++i
) {
5115 SmallVector
<Register
, 8> Sources
;
5116 for (unsigned j
= 0; j
< NumElts
; ++j
)
5117 Sources
.push_back(MI
.getOperand(1 + i
* NumElts
+ j
).getReg());
5118 NarrowTyElts
.push_back(
5119 MIRBuilder
.buildMergeLikeInstr(NarrowTy
, Sources
).getReg(0));
5122 MIRBuilder
.buildMergeLikeInstr(DstReg
, NarrowTyElts
);
5123 MI
.eraseFromParent();
5127 LegalizerHelper::LegalizeResult
5128 LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr
&MI
,
5131 auto [DstReg
, SrcVec
] = MI
.getFirst2Regs();
5133 bool IsInsert
= MI
.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT
;
5135 assert((IsInsert
? TypeIdx
== 0 : TypeIdx
== 1) && "not a vector type index");
5137 InsertVal
= MI
.getOperand(2).getReg();
5139 Register Idx
= MI
.getOperand(MI
.getNumOperands() - 1).getReg();
5141 // TODO: Handle total scalarization case.
5142 if (!NarrowVecTy
.isVector())
5143 return UnableToLegalize
;
5145 LLT VecTy
= MRI
.getType(SrcVec
);
5147 // If the index is a constant, we can really break this down as you would
5148 // expect, and index into the target size pieces.
5150 auto MaybeCst
= getIConstantVRegValWithLookThrough(Idx
, MRI
);
5152 IdxVal
= MaybeCst
->Value
.getSExtValue();
5153 // Avoid out of bounds indexing the pieces.
5154 if (IdxVal
>= VecTy
.getNumElements()) {
5155 MIRBuilder
.buildUndef(DstReg
);
5156 MI
.eraseFromParent();
5160 SmallVector
<Register
, 8> VecParts
;
5161 LLT GCDTy
= extractGCDType(VecParts
, VecTy
, NarrowVecTy
, SrcVec
);
5163 // Build a sequence of NarrowTy pieces in VecParts for this operand.
5164 LLT LCMTy
= buildLCMMergePieces(VecTy
, NarrowVecTy
, GCDTy
, VecParts
,
5165 TargetOpcode::G_ANYEXT
);
5167 unsigned NewNumElts
= NarrowVecTy
.getNumElements();
5169 LLT IdxTy
= MRI
.getType(Idx
);
5170 int64_t PartIdx
= IdxVal
/ NewNumElts
;
5172 MIRBuilder
.buildConstant(IdxTy
, IdxVal
- NewNumElts
* PartIdx
);
5175 LLT PartTy
= MRI
.getType(VecParts
[PartIdx
]);
5177 // Use the adjusted index to insert into one of the subvectors.
5178 auto InsertPart
= MIRBuilder
.buildInsertVectorElement(
5179 PartTy
, VecParts
[PartIdx
], InsertVal
, NewIdx
);
5180 VecParts
[PartIdx
] = InsertPart
.getReg(0);
5182 // Recombine the inserted subvector with the others to reform the result
5184 buildWidenedRemergeToDst(DstReg
, LCMTy
, VecParts
);
5186 MIRBuilder
.buildExtractVectorElement(DstReg
, VecParts
[PartIdx
], NewIdx
);
5189 MI
.eraseFromParent();
5193 // With a variable index, we can't perform the operation in a smaller type, so
5194 // we're forced to expand this.
5196 // TODO: We could emit a chain of compare/select to figure out which piece to
5198 return lowerExtractInsertVectorElt(MI
);
5201 LegalizerHelper::LegalizeResult
5202 LegalizerHelper::reduceLoadStoreWidth(GLoadStore
&LdStMI
, unsigned TypeIdx
,
5204 // FIXME: Don't know how to handle secondary types yet.
5206 return UnableToLegalize
;
5208 // This implementation doesn't work for atomics. Give up instead of doing
5209 // something invalid.
5210 if (LdStMI
.isAtomic())
5211 return UnableToLegalize
;
5213 bool IsLoad
= isa
<GLoad
>(LdStMI
);
5214 Register ValReg
= LdStMI
.getReg(0);
5215 Register AddrReg
= LdStMI
.getPointerReg();
5216 LLT ValTy
= MRI
.getType(ValReg
);
5218 // FIXME: Do we need a distinct NarrowMemory legalize action?
5219 if (ValTy
.getSizeInBits() != 8 * LdStMI
.getMemSize().getValue()) {
5220 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
5221 return UnableToLegalize
;
5225 int NumLeftover
= -1;
5227 SmallVector
<Register
, 8> NarrowRegs
, NarrowLeftoverRegs
;
5229 std::tie(NumParts
, NumLeftover
) = getNarrowTypeBreakDown(ValTy
, NarrowTy
, LeftoverTy
);
5231 if (extractParts(ValReg
, ValTy
, NarrowTy
, LeftoverTy
, NarrowRegs
,
5232 NarrowLeftoverRegs
, MIRBuilder
, MRI
)) {
5233 NumParts
= NarrowRegs
.size();
5234 NumLeftover
= NarrowLeftoverRegs
.size();
5239 return UnableToLegalize
;
5241 LLT PtrTy
= MRI
.getType(AddrReg
);
5242 const LLT OffsetTy
= LLT::scalar(PtrTy
.getSizeInBits());
5244 unsigned TotalSize
= ValTy
.getSizeInBits();
5246 // Split the load/store into PartTy sized pieces starting at Offset. If this
5247 // is a load, return the new registers in ValRegs. For a store, each elements
5248 // of ValRegs should be PartTy. Returns the next offset that needs to be
5250 bool isBigEndian
= MIRBuilder
.getDataLayout().isBigEndian();
5251 auto MMO
= LdStMI
.getMMO();
5252 auto splitTypePieces
= [=](LLT PartTy
, SmallVectorImpl
<Register
> &ValRegs
,
5253 unsigned NumParts
, unsigned Offset
) -> unsigned {
5254 MachineFunction
&MF
= MIRBuilder
.getMF();
5255 unsigned PartSize
= PartTy
.getSizeInBits();
5256 for (unsigned Idx
= 0, E
= NumParts
; Idx
!= E
&& Offset
< TotalSize
;
5258 unsigned ByteOffset
= Offset
/ 8;
5259 Register NewAddrReg
;
5261 MIRBuilder
.materializePtrAdd(NewAddrReg
, AddrReg
, OffsetTy
, ByteOffset
);
5263 MachineMemOperand
*NewMMO
=
5264 MF
.getMachineMemOperand(&MMO
, ByteOffset
, PartTy
);
5267 Register Dst
= MRI
.createGenericVirtualRegister(PartTy
);
5268 ValRegs
.push_back(Dst
);
5269 MIRBuilder
.buildLoad(Dst
, NewAddrReg
, *NewMMO
);
5271 MIRBuilder
.buildStore(ValRegs
[Idx
], NewAddrReg
, *NewMMO
);
5273 Offset
= isBigEndian
? Offset
- PartSize
: Offset
+ PartSize
;
5279 unsigned Offset
= isBigEndian
? TotalSize
- NarrowTy
.getSizeInBits() : 0;
5280 unsigned HandledOffset
=
5281 splitTypePieces(NarrowTy
, NarrowRegs
, NumParts
, Offset
);
5283 // Handle the rest of the register if this isn't an even type breakdown.
5284 if (LeftoverTy
.isValid())
5285 splitTypePieces(LeftoverTy
, NarrowLeftoverRegs
, NumLeftover
, HandledOffset
);
5288 insertParts(ValReg
, ValTy
, NarrowTy
, NarrowRegs
,
5289 LeftoverTy
, NarrowLeftoverRegs
);
5292 LdStMI
.eraseFromParent();
5296 LegalizerHelper::LegalizeResult
5297 LegalizerHelper::fewerElementsVector(MachineInstr
&MI
, unsigned TypeIdx
,
5299 using namespace TargetOpcode
;
5300 GenericMachineInstr
&GMI
= cast
<GenericMachineInstr
>(MI
);
5301 unsigned NumElts
= NarrowTy
.isVector() ? NarrowTy
.getNumElements() : 1;
5303 switch (MI
.getOpcode()) {
5304 case G_IMPLICIT_DEF
:
5320 case G_FCANONICALIZE
:
5337 case G_INTRINSIC_LRINT
:
5338 case G_INTRINSIC_LLRINT
:
5339 case G_INTRINSIC_ROUND
:
5340 case G_INTRINSIC_ROUNDEVEN
:
5343 case G_INTRINSIC_TRUNC
:
5370 case G_FMINNUM_IEEE
:
5371 case G_FMAXNUM_IEEE
:
5391 case G_CTLZ_ZERO_UNDEF
:
5393 case G_CTTZ_ZERO_UNDEF
:
5409 case G_ADDRSPACE_CAST
:
5422 case G_STRICT_FLDEXP
:
5424 return fewerElementsVectorMultiEltType(GMI
, NumElts
);
5427 return fewerElementsVectorMultiEltType(GMI
, NumElts
, {1 /*cpm predicate*/});
5429 return fewerElementsVectorMultiEltType(GMI
, NumElts
, {2, 3 /*mask,fpsem*/});
5431 if (MRI
.getType(MI
.getOperand(1).getReg()).isVector())
5432 return fewerElementsVectorMultiEltType(GMI
, NumElts
);
5433 return fewerElementsVectorMultiEltType(GMI
, NumElts
, {1 /*scalar cond*/});
5435 return fewerElementsVectorPhi(GMI
, NumElts
);
5436 case G_UNMERGE_VALUES
:
5437 return fewerElementsVectorUnmergeValues(MI
, TypeIdx
, NarrowTy
);
5438 case G_BUILD_VECTOR
:
5439 assert(TypeIdx
== 0 && "not a vector type index");
5440 return fewerElementsVectorMerge(MI
, TypeIdx
, NarrowTy
);
5441 case G_CONCAT_VECTORS
:
5442 if (TypeIdx
!= 1) // TODO: This probably does work as expected already.
5443 return UnableToLegalize
;
5444 return fewerElementsVectorMerge(MI
, TypeIdx
, NarrowTy
);
5445 case G_EXTRACT_VECTOR_ELT
:
5446 case G_INSERT_VECTOR_ELT
:
5447 return fewerElementsVectorExtractInsertVectorElt(MI
, TypeIdx
, NarrowTy
);
5450 return reduceLoadStoreWidth(cast
<GLoadStore
>(MI
), TypeIdx
, NarrowTy
);
5452 return fewerElementsVectorMultiEltType(GMI
, NumElts
, {2 /*imm*/});
5453 GISEL_VECREDUCE_CASES_NONSEQ
5454 return fewerElementsVectorReductions(MI
, TypeIdx
, NarrowTy
);
5455 case TargetOpcode::G_VECREDUCE_SEQ_FADD
:
5456 case TargetOpcode::G_VECREDUCE_SEQ_FMUL
:
5457 return fewerElementsVectorSeqReductions(MI
, TypeIdx
, NarrowTy
);
5458 case G_SHUFFLE_VECTOR
:
5459 return fewerElementsVectorShuffle(MI
, TypeIdx
, NarrowTy
);
5461 return fewerElementsVectorMultiEltType(GMI
, NumElts
, {2 /*pow*/});
5463 return fewerElementsBitcast(MI
, TypeIdx
, NarrowTy
);
5464 case G_INTRINSIC_FPTRUNC_ROUND
:
5465 return fewerElementsVectorMultiEltType(GMI
, NumElts
, {2});
5467 return UnableToLegalize
;
5471 LegalizerHelper::LegalizeResult
5472 LegalizerHelper::fewerElementsBitcast(MachineInstr
&MI
, unsigned int TypeIdx
,
5474 assert(MI
.getOpcode() == TargetOpcode::G_BITCAST
&&
5475 "Not a bitcast operation");
5478 return UnableToLegalize
;
5480 auto [DstReg
, DstTy
, SrcReg
, SrcTy
] = MI
.getFirst2RegLLTs();
5482 unsigned NewElemCount
=
5483 NarrowTy
.getSizeInBits() / SrcTy
.getScalarSizeInBits();
5484 LLT SrcNarrowTy
= LLT::fixed_vector(NewElemCount
, SrcTy
.getElementType());
5486 // Split the Src and Dst Reg into smaller registers
5487 SmallVector
<Register
> SrcVRegs
, BitcastVRegs
;
5488 if (extractGCDType(SrcVRegs
, DstTy
, SrcNarrowTy
, SrcReg
) != SrcNarrowTy
)
5489 return UnableToLegalize
;
5491 // Build new smaller bitcast instructions
5492 // Not supporting Leftover types for now but will have to
5493 for (unsigned i
= 0; i
< SrcVRegs
.size(); i
++)
5494 BitcastVRegs
.push_back(
5495 MIRBuilder
.buildBitcast(NarrowTy
, SrcVRegs
[i
]).getReg(0));
5497 MIRBuilder
.buildMergeLikeInstr(DstReg
, BitcastVRegs
);
5498 MI
.eraseFromParent();
5502 LegalizerHelper::LegalizeResult
LegalizerHelper::fewerElementsVectorShuffle(
5503 MachineInstr
&MI
, unsigned int TypeIdx
, LLT NarrowTy
) {
5504 assert(MI
.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR
);
5506 return UnableToLegalize
;
5508 auto [DstReg
, DstTy
, Src1Reg
, Src1Ty
, Src2Reg
, Src2Ty
] =
5509 MI
.getFirst3RegLLTs();
5510 ArrayRef
<int> Mask
= MI
.getOperand(3).getShuffleMask();
5511 // The shuffle should be canonicalized by now.
5512 if (DstTy
!= Src1Ty
)
5513 return UnableToLegalize
;
5514 if (DstTy
!= Src2Ty
)
5515 return UnableToLegalize
;
5517 if (!isPowerOf2_32(DstTy
.getNumElements()))
5518 return UnableToLegalize
;
5520 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
5521 // Further legalization attempts will be needed to do split further.
5523 DstTy
.changeElementCount(DstTy
.getElementCount().divideCoefficientBy(2));
5524 unsigned NewElts
= NarrowTy
.isVector() ? NarrowTy
.getNumElements() : 1;
5526 SmallVector
<Register
> SplitSrc1Regs
, SplitSrc2Regs
;
5527 extractParts(Src1Reg
, NarrowTy
, 2, SplitSrc1Regs
, MIRBuilder
, MRI
);
5528 extractParts(Src2Reg
, NarrowTy
, 2, SplitSrc2Regs
, MIRBuilder
, MRI
);
5529 Register Inputs
[4] = {SplitSrc1Regs
[0], SplitSrc1Regs
[1], SplitSrc2Regs
[0],
5534 // If Lo or Hi uses elements from at most two of the four input vectors, then
5535 // express it as a vector shuffle of those two inputs. Otherwise extract the
5536 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
5537 SmallVector
<int, 16> Ops
;
5538 for (unsigned High
= 0; High
< 2; ++High
) {
5539 Register
&Output
= High
? Hi
: Lo
;
5541 // Build a shuffle mask for the output, discovering on the fly which
5542 // input vectors to use as shuffle operands (recorded in InputUsed).
5543 // If building a suitable shuffle vector proves too hard, then bail
5544 // out with useBuildVector set.
5545 unsigned InputUsed
[2] = {-1U, -1U}; // Not yet discovered.
5546 unsigned FirstMaskIdx
= High
* NewElts
;
5547 bool UseBuildVector
= false;
5548 for (unsigned MaskOffset
= 0; MaskOffset
< NewElts
; ++MaskOffset
) {
5549 // The mask element. This indexes into the input.
5550 int Idx
= Mask
[FirstMaskIdx
+ MaskOffset
];
5552 // The input vector this mask element indexes into.
5553 unsigned Input
= (unsigned)Idx
/ NewElts
;
5555 if (Input
>= std::size(Inputs
)) {
5556 // The mask element does not index into any input vector.
5561 // Turn the index into an offset from the start of the input vector.
5562 Idx
-= Input
* NewElts
;
5564 // Find or create a shuffle vector operand to hold this input.
5566 for (OpNo
= 0; OpNo
< std::size(InputUsed
); ++OpNo
) {
5567 if (InputUsed
[OpNo
] == Input
) {
5568 // This input vector is already an operand.
5570 } else if (InputUsed
[OpNo
] == -1U) {
5571 // Create a new operand for this input vector.
5572 InputUsed
[OpNo
] = Input
;
5577 if (OpNo
>= std::size(InputUsed
)) {
5578 // More than two input vectors used! Give up on trying to create a
5579 // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
5580 UseBuildVector
= true;
5584 // Add the mask index for the new shuffle vector.
5585 Ops
.push_back(Idx
+ OpNo
* NewElts
);
5588 if (UseBuildVector
) {
5589 LLT EltTy
= NarrowTy
.getElementType();
5590 SmallVector
<Register
, 16> SVOps
;
5592 // Extract the input elements by hand.
5593 for (unsigned MaskOffset
= 0; MaskOffset
< NewElts
; ++MaskOffset
) {
5594 // The mask element. This indexes into the input.
5595 int Idx
= Mask
[FirstMaskIdx
+ MaskOffset
];
5597 // The input vector this mask element indexes into.
5598 unsigned Input
= (unsigned)Idx
/ NewElts
;
5600 if (Input
>= std::size(Inputs
)) {
5601 // The mask element is "undef" or indexes off the end of the input.
5602 SVOps
.push_back(MIRBuilder
.buildUndef(EltTy
).getReg(0));
5606 // Turn the index into an offset from the start of the input vector.
5607 Idx
-= Input
* NewElts
;
5609 // Extract the vector element by hand.
5610 SVOps
.push_back(MIRBuilder
5611 .buildExtractVectorElement(
5612 EltTy
, Inputs
[Input
],
5613 MIRBuilder
.buildConstant(LLT::scalar(32), Idx
))
5617 // Construct the Lo/Hi output using a G_BUILD_VECTOR.
5618 Output
= MIRBuilder
.buildBuildVector(NarrowTy
, SVOps
).getReg(0);
5619 } else if (InputUsed
[0] == -1U) {
5620 // No input vectors were used! The result is undefined.
5621 Output
= MIRBuilder
.buildUndef(NarrowTy
).getReg(0);
5623 Register Op0
= Inputs
[InputUsed
[0]];
5624 // If only one input was used, use an undefined vector for the other.
5625 Register Op1
= InputUsed
[1] == -1U
5626 ? MIRBuilder
.buildUndef(NarrowTy
).getReg(0)
5627 : Inputs
[InputUsed
[1]];
5628 // At least one input vector was used. Create a new shuffle vector.
5629 Output
= MIRBuilder
.buildShuffleVector(NarrowTy
, Op0
, Op1
, Ops
).getReg(0);
5635 MIRBuilder
.buildMergeLikeInstr(DstReg
, {Lo
, Hi
});
5636 MI
.eraseFromParent();
5640 LegalizerHelper::LegalizeResult
LegalizerHelper::fewerElementsVectorReductions(
5641 MachineInstr
&MI
, unsigned int TypeIdx
, LLT NarrowTy
) {
5642 auto &RdxMI
= cast
<GVecReduce
>(MI
);
5645 return UnableToLegalize
;
5647 // The semantics of the normal non-sequential reductions allow us to freely
5648 // re-associate the operation.
5649 auto [DstReg
, DstTy
, SrcReg
, SrcTy
] = RdxMI
.getFirst2RegLLTs();
5651 if (NarrowTy
.isVector() &&
5652 (SrcTy
.getNumElements() % NarrowTy
.getNumElements() != 0))
5653 return UnableToLegalize
;
5655 unsigned ScalarOpc
= RdxMI
.getScalarOpcForReduction();
5656 SmallVector
<Register
> SplitSrcs
;
5657 // If NarrowTy is a scalar then we're being asked to scalarize.
5658 const unsigned NumParts
=
5659 NarrowTy
.isVector() ? SrcTy
.getNumElements() / NarrowTy
.getNumElements()
5660 : SrcTy
.getNumElements();
5662 extractParts(SrcReg
, NarrowTy
, NumParts
, SplitSrcs
, MIRBuilder
, MRI
);
5663 if (NarrowTy
.isScalar()) {
5664 if (DstTy
!= NarrowTy
)
5665 return UnableToLegalize
; // FIXME: handle implicit extensions.
5667 if (isPowerOf2_32(NumParts
)) {
5668 // Generate a tree of scalar operations to reduce the critical path.
5669 SmallVector
<Register
> PartialResults
;
5670 unsigned NumPartsLeft
= NumParts
;
5671 while (NumPartsLeft
> 1) {
5672 for (unsigned Idx
= 0; Idx
< NumPartsLeft
- 1; Idx
+= 2) {
5673 PartialResults
.emplace_back(
5675 .buildInstr(ScalarOpc
, {NarrowTy
},
5676 {SplitSrcs
[Idx
], SplitSrcs
[Idx
+ 1]})
5679 SplitSrcs
= PartialResults
;
5680 PartialResults
.clear();
5681 NumPartsLeft
= SplitSrcs
.size();
5683 assert(SplitSrcs
.size() == 1);
5684 MIRBuilder
.buildCopy(DstReg
, SplitSrcs
[0]);
5685 MI
.eraseFromParent();
5688 // If we can't generate a tree, then just do sequential operations.
5689 Register Acc
= SplitSrcs
[0];
5690 for (unsigned Idx
= 1; Idx
< NumParts
; ++Idx
)
5691 Acc
= MIRBuilder
.buildInstr(ScalarOpc
, {NarrowTy
}, {Acc
, SplitSrcs
[Idx
]})
5693 MIRBuilder
.buildCopy(DstReg
, Acc
);
5694 MI
.eraseFromParent();
5697 SmallVector
<Register
> PartialReductions
;
5698 for (unsigned Part
= 0; Part
< NumParts
; ++Part
) {
5699 PartialReductions
.push_back(
5700 MIRBuilder
.buildInstr(RdxMI
.getOpcode(), {DstTy
}, {SplitSrcs
[Part
]})
5704 // If the types involved are powers of 2, we can generate intermediate vector
5705 // ops, before generating a final reduction operation.
5706 if (isPowerOf2_32(SrcTy
.getNumElements()) &&
5707 isPowerOf2_32(NarrowTy
.getNumElements())) {
5708 return tryNarrowPow2Reduction(MI
, SrcReg
, SrcTy
, NarrowTy
, ScalarOpc
);
5711 Register Acc
= PartialReductions
[0];
5712 for (unsigned Part
= 1; Part
< NumParts
; ++Part
) {
5713 if (Part
== NumParts
- 1) {
5714 MIRBuilder
.buildInstr(ScalarOpc
, {DstReg
},
5715 {Acc
, PartialReductions
[Part
]});
5718 .buildInstr(ScalarOpc
, {DstTy
}, {Acc
, PartialReductions
[Part
]})
5722 MI
.eraseFromParent();
5726 LegalizerHelper::LegalizeResult
5727 LegalizerHelper::fewerElementsVectorSeqReductions(MachineInstr
&MI
,
5728 unsigned int TypeIdx
,
5730 auto [DstReg
, DstTy
, ScalarReg
, ScalarTy
, SrcReg
, SrcTy
] =
5731 MI
.getFirst3RegLLTs();
5732 if (!NarrowTy
.isScalar() || TypeIdx
!= 2 || DstTy
!= ScalarTy
||
5734 return UnableToLegalize
;
5736 assert((MI
.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
||
5737 MI
.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL
) &&
5738 "Unexpected vecreduce opcode");
5739 unsigned ScalarOpc
= MI
.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
5740 ? TargetOpcode::G_FADD
5741 : TargetOpcode::G_FMUL
;
5743 SmallVector
<Register
> SplitSrcs
;
5744 unsigned NumParts
= SrcTy
.getNumElements();
5745 extractParts(SrcReg
, NarrowTy
, NumParts
, SplitSrcs
, MIRBuilder
, MRI
);
5746 Register Acc
= ScalarReg
;
5747 for (unsigned i
= 0; i
< NumParts
; i
++)
5748 Acc
= MIRBuilder
.buildInstr(ScalarOpc
, {NarrowTy
}, {Acc
, SplitSrcs
[i
]})
5751 MIRBuilder
.buildCopy(DstReg
, Acc
);
5752 MI
.eraseFromParent();
5756 LegalizerHelper::LegalizeResult
5757 LegalizerHelper::tryNarrowPow2Reduction(MachineInstr
&MI
, Register SrcReg
,
5758 LLT SrcTy
, LLT NarrowTy
,
5759 unsigned ScalarOpc
) {
5760 SmallVector
<Register
> SplitSrcs
;
5761 // Split the sources into NarrowTy size pieces.
5762 extractParts(SrcReg
, NarrowTy
,
5763 SrcTy
.getNumElements() / NarrowTy
.getNumElements(), SplitSrcs
,
5765 // We're going to do a tree reduction using vector operations until we have
5766 // one NarrowTy size value left.
5767 while (SplitSrcs
.size() > 1) {
5768 SmallVector
<Register
> PartialRdxs
;
5769 for (unsigned Idx
= 0; Idx
< SplitSrcs
.size()-1; Idx
+= 2) {
5770 Register LHS
= SplitSrcs
[Idx
];
5771 Register RHS
= SplitSrcs
[Idx
+ 1];
5772 // Create the intermediate vector op.
5774 MIRBuilder
.buildInstr(ScalarOpc
, {NarrowTy
}, {LHS
, RHS
}).getReg(0);
5775 PartialRdxs
.push_back(Res
);
5777 SplitSrcs
= std::move(PartialRdxs
);
5779 // Finally generate the requested NarrowTy based reduction.
5780 Observer
.changingInstr(MI
);
5781 MI
.getOperand(1).setReg(SplitSrcs
[0]);
5782 Observer
.changedInstr(MI
);
5786 LegalizerHelper::LegalizeResult
5787 LegalizerHelper::narrowScalarShiftByConstant(MachineInstr
&MI
, const APInt
&Amt
,
5788 const LLT HalfTy
, const LLT AmtTy
) {
5790 Register InL
= MRI
.createGenericVirtualRegister(HalfTy
);
5791 Register InH
= MRI
.createGenericVirtualRegister(HalfTy
);
5792 MIRBuilder
.buildUnmerge({InL
, InH
}, MI
.getOperand(1));
5795 MIRBuilder
.buildMergeLikeInstr(MI
.getOperand(0), {InL
, InH
});
5796 MI
.eraseFromParent();
5801 unsigned NVTBits
= HalfTy
.getSizeInBits();
5802 unsigned VTBits
= 2 * NVTBits
;
5804 SrcOp
Lo(Register(0)), Hi(Register(0));
5805 if (MI
.getOpcode() == TargetOpcode::G_SHL
) {
5806 if (Amt
.ugt(VTBits
)) {
5807 Lo
= Hi
= MIRBuilder
.buildConstant(NVT
, 0);
5808 } else if (Amt
.ugt(NVTBits
)) {
5809 Lo
= MIRBuilder
.buildConstant(NVT
, 0);
5810 Hi
= MIRBuilder
.buildShl(NVT
, InL
,
5811 MIRBuilder
.buildConstant(AmtTy
, Amt
- NVTBits
));
5812 } else if (Amt
== NVTBits
) {
5813 Lo
= MIRBuilder
.buildConstant(NVT
, 0);
5816 Lo
= MIRBuilder
.buildShl(NVT
, InL
, MIRBuilder
.buildConstant(AmtTy
, Amt
));
5818 MIRBuilder
.buildShl(NVT
, InH
, MIRBuilder
.buildConstant(AmtTy
, Amt
));
5819 auto OrRHS
= MIRBuilder
.buildLShr(
5820 NVT
, InL
, MIRBuilder
.buildConstant(AmtTy
, -Amt
+ NVTBits
));
5821 Hi
= MIRBuilder
.buildOr(NVT
, OrLHS
, OrRHS
);
5823 } else if (MI
.getOpcode() == TargetOpcode::G_LSHR
) {
5824 if (Amt
.ugt(VTBits
)) {
5825 Lo
= Hi
= MIRBuilder
.buildConstant(NVT
, 0);
5826 } else if (Amt
.ugt(NVTBits
)) {
5827 Lo
= MIRBuilder
.buildLShr(NVT
, InH
,
5828 MIRBuilder
.buildConstant(AmtTy
, Amt
- NVTBits
));
5829 Hi
= MIRBuilder
.buildConstant(NVT
, 0);
5830 } else if (Amt
== NVTBits
) {
5832 Hi
= MIRBuilder
.buildConstant(NVT
, 0);
5834 auto ShiftAmtConst
= MIRBuilder
.buildConstant(AmtTy
, Amt
);
5836 auto OrLHS
= MIRBuilder
.buildLShr(NVT
, InL
, ShiftAmtConst
);
5837 auto OrRHS
= MIRBuilder
.buildShl(
5838 NVT
, InH
, MIRBuilder
.buildConstant(AmtTy
, -Amt
+ NVTBits
));
5840 Lo
= MIRBuilder
.buildOr(NVT
, OrLHS
, OrRHS
);
5841 Hi
= MIRBuilder
.buildLShr(NVT
, InH
, ShiftAmtConst
);
5844 if (Amt
.ugt(VTBits
)) {
5845 Hi
= Lo
= MIRBuilder
.buildAShr(
5846 NVT
, InH
, MIRBuilder
.buildConstant(AmtTy
, NVTBits
- 1));
5847 } else if (Amt
.ugt(NVTBits
)) {
5848 Lo
= MIRBuilder
.buildAShr(NVT
, InH
,
5849 MIRBuilder
.buildConstant(AmtTy
, Amt
- NVTBits
));
5850 Hi
= MIRBuilder
.buildAShr(NVT
, InH
,
5851 MIRBuilder
.buildConstant(AmtTy
, NVTBits
- 1));
5852 } else if (Amt
== NVTBits
) {
5854 Hi
= MIRBuilder
.buildAShr(NVT
, InH
,
5855 MIRBuilder
.buildConstant(AmtTy
, NVTBits
- 1));
5857 auto ShiftAmtConst
= MIRBuilder
.buildConstant(AmtTy
, Amt
);
5859 auto OrLHS
= MIRBuilder
.buildLShr(NVT
, InL
, ShiftAmtConst
);
5860 auto OrRHS
= MIRBuilder
.buildShl(
5861 NVT
, InH
, MIRBuilder
.buildConstant(AmtTy
, -Amt
+ NVTBits
));
5863 Lo
= MIRBuilder
.buildOr(NVT
, OrLHS
, OrRHS
);
5864 Hi
= MIRBuilder
.buildAShr(NVT
, InH
, ShiftAmtConst
);
5868 MIRBuilder
.buildMergeLikeInstr(MI
.getOperand(0), {Lo
, Hi
});
5869 MI
.eraseFromParent();
5874 // TODO: Optimize if constant shift amount.
5875 LegalizerHelper::LegalizeResult
5876 LegalizerHelper::narrowScalarShift(MachineInstr
&MI
, unsigned TypeIdx
,
5879 Observer
.changingInstr(MI
);
5880 narrowScalarSrc(MI
, RequestedTy
, 2);
5881 Observer
.changedInstr(MI
);
5885 Register DstReg
= MI
.getOperand(0).getReg();
5886 LLT DstTy
= MRI
.getType(DstReg
);
5887 if (DstTy
.isVector())
5888 return UnableToLegalize
;
5890 Register Amt
= MI
.getOperand(2).getReg();
5891 LLT ShiftAmtTy
= MRI
.getType(Amt
);
5892 const unsigned DstEltSize
= DstTy
.getScalarSizeInBits();
5893 if (DstEltSize
% 2 != 0)
5894 return UnableToLegalize
;
5896 // Ignore the input type. We can only go to exactly half the size of the
5897 // input. If that isn't small enough, the resulting pieces will be further
5899 const unsigned NewBitSize
= DstEltSize
/ 2;
5900 const LLT HalfTy
= LLT::scalar(NewBitSize
);
5901 const LLT CondTy
= LLT::scalar(1);
5903 if (auto VRegAndVal
= getIConstantVRegValWithLookThrough(Amt
, MRI
)) {
5904 return narrowScalarShiftByConstant(MI
, VRegAndVal
->Value
, HalfTy
,
5908 // TODO: Expand with known bits.
5910 // Handle the fully general expansion by an unknown amount.
5911 auto NewBits
= MIRBuilder
.buildConstant(ShiftAmtTy
, NewBitSize
);
5913 Register InL
= MRI
.createGenericVirtualRegister(HalfTy
);
5914 Register InH
= MRI
.createGenericVirtualRegister(HalfTy
);
5915 MIRBuilder
.buildUnmerge({InL
, InH
}, MI
.getOperand(1));
5917 auto AmtExcess
= MIRBuilder
.buildSub(ShiftAmtTy
, Amt
, NewBits
);
5918 auto AmtLack
= MIRBuilder
.buildSub(ShiftAmtTy
, NewBits
, Amt
);
5920 auto Zero
= MIRBuilder
.buildConstant(ShiftAmtTy
, 0);
5921 auto IsShort
= MIRBuilder
.buildICmp(ICmpInst::ICMP_ULT
, CondTy
, Amt
, NewBits
);
5922 auto IsZero
= MIRBuilder
.buildICmp(ICmpInst::ICMP_EQ
, CondTy
, Amt
, Zero
);
5924 Register ResultRegs
[2];
5925 switch (MI
.getOpcode()) {
5926 case TargetOpcode::G_SHL
: {
5927 // Short: ShAmt < NewBitSize
5928 auto LoS
= MIRBuilder
.buildShl(HalfTy
, InL
, Amt
);
5930 auto LoOr
= MIRBuilder
.buildLShr(HalfTy
, InL
, AmtLack
);
5931 auto HiOr
= MIRBuilder
.buildShl(HalfTy
, InH
, Amt
);
5932 auto HiS
= MIRBuilder
.buildOr(HalfTy
, LoOr
, HiOr
);
5934 // Long: ShAmt >= NewBitSize
5935 auto LoL
= MIRBuilder
.buildConstant(HalfTy
, 0); // Lo part is zero.
5936 auto HiL
= MIRBuilder
.buildShl(HalfTy
, InL
, AmtExcess
); // Hi from Lo part.
5938 auto Lo
= MIRBuilder
.buildSelect(HalfTy
, IsShort
, LoS
, LoL
);
5939 auto Hi
= MIRBuilder
.buildSelect(
5940 HalfTy
, IsZero
, InH
, MIRBuilder
.buildSelect(HalfTy
, IsShort
, HiS
, HiL
));
5942 ResultRegs
[0] = Lo
.getReg(0);
5943 ResultRegs
[1] = Hi
.getReg(0);
5946 case TargetOpcode::G_LSHR
:
5947 case TargetOpcode::G_ASHR
: {
5948 // Short: ShAmt < NewBitSize
5949 auto HiS
= MIRBuilder
.buildInstr(MI
.getOpcode(), {HalfTy
}, {InH
, Amt
});
5951 auto LoOr
= MIRBuilder
.buildLShr(HalfTy
, InL
, Amt
);
5952 auto HiOr
= MIRBuilder
.buildShl(HalfTy
, InH
, AmtLack
);
5953 auto LoS
= MIRBuilder
.buildOr(HalfTy
, LoOr
, HiOr
);
5955 // Long: ShAmt >= NewBitSize
5956 MachineInstrBuilder HiL
;
5957 if (MI
.getOpcode() == TargetOpcode::G_LSHR
) {
5958 HiL
= MIRBuilder
.buildConstant(HalfTy
, 0); // Hi part is zero.
5960 auto ShiftAmt
= MIRBuilder
.buildConstant(ShiftAmtTy
, NewBitSize
- 1);
5961 HiL
= MIRBuilder
.buildAShr(HalfTy
, InH
, ShiftAmt
); // Sign of Hi part.
5963 auto LoL
= MIRBuilder
.buildInstr(MI
.getOpcode(), {HalfTy
},
5964 {InH
, AmtExcess
}); // Lo from Hi part.
5966 auto Lo
= MIRBuilder
.buildSelect(
5967 HalfTy
, IsZero
, InL
, MIRBuilder
.buildSelect(HalfTy
, IsShort
, LoS
, LoL
));
5969 auto Hi
= MIRBuilder
.buildSelect(HalfTy
, IsShort
, HiS
, HiL
);
5971 ResultRegs
[0] = Lo
.getReg(0);
5972 ResultRegs
[1] = Hi
.getReg(0);
5976 llvm_unreachable("not a shift");
5979 MIRBuilder
.buildMergeLikeInstr(DstReg
, ResultRegs
);
5980 MI
.eraseFromParent();
5984 LegalizerHelper::LegalizeResult
5985 LegalizerHelper::moreElementsVectorPhi(MachineInstr
&MI
, unsigned TypeIdx
,
5987 assert(TypeIdx
== 0 && "Expecting only Idx 0");
5989 Observer
.changingInstr(MI
);
5990 for (unsigned I
= 1, E
= MI
.getNumOperands(); I
!= E
; I
+= 2) {
5991 MachineBasicBlock
&OpMBB
= *MI
.getOperand(I
+ 1).getMBB();
5992 MIRBuilder
.setInsertPt(OpMBB
, OpMBB
.getFirstTerminator());
5993 moreElementsVectorSrc(MI
, MoreTy
, I
);
5996 MachineBasicBlock
&MBB
= *MI
.getParent();
5997 MIRBuilder
.setInsertPt(MBB
, --MBB
.getFirstNonPHI());
5998 moreElementsVectorDst(MI
, MoreTy
, 0);
5999 Observer
.changedInstr(MI
);
6003 MachineInstrBuilder
LegalizerHelper::getNeutralElementForVecReduce(
6004 unsigned Opcode
, MachineIRBuilder
&MIRBuilder
, LLT Ty
) {
6005 assert(Ty
.isScalar() && "Expected scalar type to make neutral element for");
6010 "getNeutralElementForVecReduce called with invalid opcode!");
6011 case TargetOpcode::G_VECREDUCE_ADD
:
6012 case TargetOpcode::G_VECREDUCE_OR
:
6013 case TargetOpcode::G_VECREDUCE_XOR
:
6014 case TargetOpcode::G_VECREDUCE_UMAX
:
6015 return MIRBuilder
.buildConstant(Ty
, 0);
6016 case TargetOpcode::G_VECREDUCE_MUL
:
6017 return MIRBuilder
.buildConstant(Ty
, 1);
6018 case TargetOpcode::G_VECREDUCE_AND
:
6019 case TargetOpcode::G_VECREDUCE_UMIN
:
6020 return MIRBuilder
.buildConstant(
6021 Ty
, APInt::getAllOnes(Ty
.getScalarSizeInBits()));
6022 case TargetOpcode::G_VECREDUCE_SMAX
:
6023 return MIRBuilder
.buildConstant(
6024 Ty
, APInt::getSignedMinValue(Ty
.getSizeInBits()));
6025 case TargetOpcode::G_VECREDUCE_SMIN
:
6026 return MIRBuilder
.buildConstant(
6027 Ty
, APInt::getSignedMaxValue(Ty
.getSizeInBits()));
6028 case TargetOpcode::G_VECREDUCE_FADD
:
6029 return MIRBuilder
.buildFConstant(Ty
, -0.0);
6030 case TargetOpcode::G_VECREDUCE_FMUL
:
6031 return MIRBuilder
.buildFConstant(Ty
, 1.0);
6032 case TargetOpcode::G_VECREDUCE_FMINIMUM
:
6033 case TargetOpcode::G_VECREDUCE_FMAXIMUM
:
6034 assert(false && "getNeutralElementForVecReduce unimplemented for "
6035 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
6037 llvm_unreachable("switch expected to return!");
6040 LegalizerHelper::LegalizeResult
6041 LegalizerHelper::moreElementsVector(MachineInstr
&MI
, unsigned TypeIdx
,
6043 unsigned Opc
= MI
.getOpcode();
6045 case TargetOpcode::G_IMPLICIT_DEF
:
6046 case TargetOpcode::G_LOAD
: {
6048 return UnableToLegalize
;
6049 Observer
.changingInstr(MI
);
6050 moreElementsVectorDst(MI
, MoreTy
, 0);
6051 Observer
.changedInstr(MI
);
6054 case TargetOpcode::G_STORE
:
6056 return UnableToLegalize
;
6057 Observer
.changingInstr(MI
);
6058 moreElementsVectorSrc(MI
, MoreTy
, 0);
6059 Observer
.changedInstr(MI
);
6061 case TargetOpcode::G_AND
:
6062 case TargetOpcode::G_OR
:
6063 case TargetOpcode::G_XOR
:
6064 case TargetOpcode::G_ADD
:
6065 case TargetOpcode::G_SUB
:
6066 case TargetOpcode::G_MUL
:
6067 case TargetOpcode::G_FADD
:
6068 case TargetOpcode::G_FSUB
:
6069 case TargetOpcode::G_FMUL
:
6070 case TargetOpcode::G_FDIV
:
6071 case TargetOpcode::G_FCOPYSIGN
:
6072 case TargetOpcode::G_UADDSAT
:
6073 case TargetOpcode::G_USUBSAT
:
6074 case TargetOpcode::G_SADDSAT
:
6075 case TargetOpcode::G_SSUBSAT
:
6076 case TargetOpcode::G_SMIN
:
6077 case TargetOpcode::G_SMAX
:
6078 case TargetOpcode::G_UMIN
:
6079 case TargetOpcode::G_UMAX
:
6080 case TargetOpcode::G_FMINNUM
:
6081 case TargetOpcode::G_FMAXNUM
:
6082 case TargetOpcode::G_FMINNUM_IEEE
:
6083 case TargetOpcode::G_FMAXNUM_IEEE
:
6084 case TargetOpcode::G_FMINIMUM
:
6085 case TargetOpcode::G_FMAXIMUM
:
6086 case TargetOpcode::G_STRICT_FADD
:
6087 case TargetOpcode::G_STRICT_FSUB
:
6088 case TargetOpcode::G_STRICT_FMUL
:
6089 case TargetOpcode::G_SHL
:
6090 case TargetOpcode::G_ASHR
:
6091 case TargetOpcode::G_LSHR
: {
6092 Observer
.changingInstr(MI
);
6093 moreElementsVectorSrc(MI
, MoreTy
, 1);
6094 moreElementsVectorSrc(MI
, MoreTy
, 2);
6095 moreElementsVectorDst(MI
, MoreTy
, 0);
6096 Observer
.changedInstr(MI
);
6099 case TargetOpcode::G_FMA
:
6100 case TargetOpcode::G_STRICT_FMA
:
6101 case TargetOpcode::G_FSHR
:
6102 case TargetOpcode::G_FSHL
: {
6103 Observer
.changingInstr(MI
);
6104 moreElementsVectorSrc(MI
, MoreTy
, 1);
6105 moreElementsVectorSrc(MI
, MoreTy
, 2);
6106 moreElementsVectorSrc(MI
, MoreTy
, 3);
6107 moreElementsVectorDst(MI
, MoreTy
, 0);
6108 Observer
.changedInstr(MI
);
6111 case TargetOpcode::G_EXTRACT_VECTOR_ELT
:
6112 case TargetOpcode::G_EXTRACT
:
6114 return UnableToLegalize
;
6115 Observer
.changingInstr(MI
);
6116 moreElementsVectorSrc(MI
, MoreTy
, 1);
6117 Observer
.changedInstr(MI
);
6119 case TargetOpcode::G_INSERT
:
6120 case TargetOpcode::G_INSERT_VECTOR_ELT
:
6121 case TargetOpcode::G_FREEZE
:
6122 case TargetOpcode::G_FNEG
:
6123 case TargetOpcode::G_FABS
:
6124 case TargetOpcode::G_FSQRT
:
6125 case TargetOpcode::G_FCEIL
:
6126 case TargetOpcode::G_FFLOOR
:
6127 case TargetOpcode::G_FNEARBYINT
:
6128 case TargetOpcode::G_FRINT
:
6129 case TargetOpcode::G_INTRINSIC_ROUND
:
6130 case TargetOpcode::G_INTRINSIC_ROUNDEVEN
:
6131 case TargetOpcode::G_INTRINSIC_TRUNC
:
6132 case TargetOpcode::G_BSWAP
:
6133 case TargetOpcode::G_FCANONICALIZE
:
6134 case TargetOpcode::G_SEXT_INREG
:
6135 case TargetOpcode::G_ABS
:
6137 return UnableToLegalize
;
6138 Observer
.changingInstr(MI
);
6139 moreElementsVectorSrc(MI
, MoreTy
, 1);
6140 moreElementsVectorDst(MI
, MoreTy
, 0);
6141 Observer
.changedInstr(MI
);
6143 case TargetOpcode::G_SELECT
: {
6144 auto [DstReg
, DstTy
, CondReg
, CondTy
] = MI
.getFirst2RegLLTs();
6146 if (!CondTy
.isScalar() ||
6147 DstTy
.getElementCount() != MoreTy
.getElementCount())
6148 return UnableToLegalize
;
6150 // This is turning a scalar select of vectors into a vector
6151 // select. Broadcast the select condition.
6152 auto ShufSplat
= MIRBuilder
.buildShuffleSplat(MoreTy
, CondReg
);
6153 Observer
.changingInstr(MI
);
6154 MI
.getOperand(1).setReg(ShufSplat
.getReg(0));
6155 Observer
.changedInstr(MI
);
6159 if (CondTy
.isVector())
6160 return UnableToLegalize
;
6162 Observer
.changingInstr(MI
);
6163 moreElementsVectorSrc(MI
, MoreTy
, 2);
6164 moreElementsVectorSrc(MI
, MoreTy
, 3);
6165 moreElementsVectorDst(MI
, MoreTy
, 0);
6166 Observer
.changedInstr(MI
);
6169 case TargetOpcode::G_UNMERGE_VALUES
:
6170 return UnableToLegalize
;
6171 case TargetOpcode::G_PHI
:
6172 return moreElementsVectorPhi(MI
, TypeIdx
, MoreTy
);
6173 case TargetOpcode::G_SHUFFLE_VECTOR
:
6174 return moreElementsVectorShuffle(MI
, TypeIdx
, MoreTy
);
6175 case TargetOpcode::G_BUILD_VECTOR
: {
6176 SmallVector
<SrcOp
, 8> Elts
;
6177 for (auto Op
: MI
.uses()) {
6178 Elts
.push_back(Op
.getReg());
6181 for (unsigned i
= Elts
.size(); i
< MoreTy
.getNumElements(); ++i
) {
6182 Elts
.push_back(MIRBuilder
.buildUndef(MoreTy
.getScalarType()));
6185 MIRBuilder
.buildDeleteTrailingVectorElements(
6186 MI
.getOperand(0).getReg(), MIRBuilder
.buildInstr(Opc
, {MoreTy
}, Elts
));
6187 MI
.eraseFromParent();
6190 case TargetOpcode::G_SEXT
:
6191 case TargetOpcode::G_ZEXT
:
6192 case TargetOpcode::G_ANYEXT
:
6193 case TargetOpcode::G_TRUNC
:
6194 case TargetOpcode::G_FPTRUNC
:
6195 case TargetOpcode::G_FPEXT
:
6196 case TargetOpcode::G_FPTOSI
:
6197 case TargetOpcode::G_FPTOUI
:
6198 case TargetOpcode::G_FPTOSI_SAT
:
6199 case TargetOpcode::G_FPTOUI_SAT
:
6200 case TargetOpcode::G_SITOFP
:
6201 case TargetOpcode::G_UITOFP
: {
6202 Observer
.changingInstr(MI
);
6207 SrcExtTy
= LLT::fixed_vector(
6208 MoreTy
.getNumElements(),
6209 MRI
.getType(MI
.getOperand(1).getReg()).getElementType());
6211 DstExtTy
= LLT::fixed_vector(
6212 MoreTy
.getNumElements(),
6213 MRI
.getType(MI
.getOperand(0).getReg()).getElementType());
6216 moreElementsVectorSrc(MI
, SrcExtTy
, 1);
6217 moreElementsVectorDst(MI
, DstExtTy
, 0);
6218 Observer
.changedInstr(MI
);
6221 case TargetOpcode::G_ICMP
:
6222 case TargetOpcode::G_FCMP
: {
6224 return UnableToLegalize
;
6226 Observer
.changingInstr(MI
);
6227 moreElementsVectorSrc(MI
, MoreTy
, 2);
6228 moreElementsVectorSrc(MI
, MoreTy
, 3);
6229 LLT CondTy
= LLT::fixed_vector(
6230 MoreTy
.getNumElements(),
6231 MRI
.getType(MI
.getOperand(0).getReg()).getElementType());
6232 moreElementsVectorDst(MI
, CondTy
, 0);
6233 Observer
.changedInstr(MI
);
6236 case TargetOpcode::G_BITCAST
: {
6238 return UnableToLegalize
;
6240 LLT SrcTy
= MRI
.getType(MI
.getOperand(1).getReg());
6241 LLT DstTy
= MRI
.getType(MI
.getOperand(0).getReg());
6243 unsigned coefficient
= SrcTy
.getNumElements() * MoreTy
.getNumElements();
6244 if (coefficient
% DstTy
.getNumElements() != 0)
6245 return UnableToLegalize
;
6247 coefficient
= coefficient
/ DstTy
.getNumElements();
6249 LLT NewTy
= SrcTy
.changeElementCount(
6250 ElementCount::get(coefficient
, MoreTy
.isScalable()));
6251 Observer
.changingInstr(MI
);
6252 moreElementsVectorSrc(MI
, NewTy
, 1);
6253 moreElementsVectorDst(MI
, MoreTy
, 0);
6254 Observer
.changedInstr(MI
);
6257 case TargetOpcode::G_VECREDUCE_FADD
:
6258 case TargetOpcode::G_VECREDUCE_FMUL
:
6259 case TargetOpcode::G_VECREDUCE_ADD
:
6260 case TargetOpcode::G_VECREDUCE_MUL
:
6261 case TargetOpcode::G_VECREDUCE_AND
:
6262 case TargetOpcode::G_VECREDUCE_OR
:
6263 case TargetOpcode::G_VECREDUCE_XOR
:
6264 case TargetOpcode::G_VECREDUCE_SMAX
:
6265 case TargetOpcode::G_VECREDUCE_SMIN
:
6266 case TargetOpcode::G_VECREDUCE_UMAX
:
6267 case TargetOpcode::G_VECREDUCE_UMIN
: {
6268 LLT OrigTy
= MRI
.getType(MI
.getOperand(1).getReg());
6269 MachineOperand
&MO
= MI
.getOperand(1);
6270 auto NewVec
= MIRBuilder
.buildPadVectorWithUndefElements(MoreTy
, MO
);
6271 auto NeutralElement
= getNeutralElementForVecReduce(
6272 MI
.getOpcode(), MIRBuilder
, MoreTy
.getElementType());
6274 LLT
IdxTy(TLI
.getVectorIdxTy(MIRBuilder
.getDataLayout()));
6275 for (size_t i
= OrigTy
.getNumElements(), e
= MoreTy
.getNumElements();
6277 auto Idx
= MIRBuilder
.buildConstant(IdxTy
, i
);
6278 NewVec
= MIRBuilder
.buildInsertVectorElement(MoreTy
, NewVec
,
6279 NeutralElement
, Idx
);
6282 Observer
.changingInstr(MI
);
6283 MO
.setReg(NewVec
.getReg(0));
6284 Observer
.changedInstr(MI
);
6289 return UnableToLegalize
;
6293 LegalizerHelper::LegalizeResult
6294 LegalizerHelper::equalizeVectorShuffleLengths(MachineInstr
&MI
) {
6295 auto [DstReg
, DstTy
, SrcReg
, SrcTy
] = MI
.getFirst2RegLLTs();
6296 ArrayRef
<int> Mask
= MI
.getOperand(3).getShuffleMask();
6297 unsigned MaskNumElts
= Mask
.size();
6298 unsigned SrcNumElts
= SrcTy
.getNumElements();
6299 LLT DestEltTy
= DstTy
.getElementType();
6301 if (MaskNumElts
== SrcNumElts
)
6304 if (MaskNumElts
< SrcNumElts
) {
6305 // Extend mask to match new destination vector size with
6307 SmallVector
<int, 16> NewMask(SrcNumElts
, -1);
6308 llvm::copy(Mask
, NewMask
.begin());
6310 moreElementsVectorDst(MI
, SrcTy
, 0);
6311 MIRBuilder
.setInstrAndDebugLoc(MI
);
6312 MIRBuilder
.buildShuffleVector(MI
.getOperand(0).getReg(),
6313 MI
.getOperand(1).getReg(),
6314 MI
.getOperand(2).getReg(), NewMask
);
6315 MI
.eraseFromParent();
6320 unsigned PaddedMaskNumElts
= alignTo(MaskNumElts
, SrcNumElts
);
6321 unsigned NumConcat
= PaddedMaskNumElts
/ SrcNumElts
;
6322 LLT PaddedTy
= LLT::fixed_vector(PaddedMaskNumElts
, DestEltTy
);
6324 // Create new source vectors by concatenating the initial
6325 // source vectors with undefined vectors of the same size.
6326 auto Undef
= MIRBuilder
.buildUndef(SrcTy
);
6327 SmallVector
<Register
, 8> MOps1(NumConcat
, Undef
.getReg(0));
6328 SmallVector
<Register
, 8> MOps2(NumConcat
, Undef
.getReg(0));
6329 MOps1
[0] = MI
.getOperand(1).getReg();
6330 MOps2
[0] = MI
.getOperand(2).getReg();
6332 auto Src1
= MIRBuilder
.buildConcatVectors(PaddedTy
, MOps1
);
6333 auto Src2
= MIRBuilder
.buildConcatVectors(PaddedTy
, MOps2
);
6335 // Readjust mask for new input vector length.
6336 SmallVector
<int, 8> MappedOps(PaddedMaskNumElts
, -1);
6337 for (unsigned I
= 0; I
!= MaskNumElts
; ++I
) {
6339 if (Idx
>= static_cast<int>(SrcNumElts
))
6340 Idx
+= PaddedMaskNumElts
- SrcNumElts
;
6344 // If we got more elements than required, extract subvector.
6345 if (MaskNumElts
!= PaddedMaskNumElts
) {
6347 MIRBuilder
.buildShuffleVector(PaddedTy
, Src1
, Src2
, MappedOps
);
6349 SmallVector
<Register
, 16> Elts(MaskNumElts
);
6350 for (unsigned I
= 0; I
< MaskNumElts
; ++I
) {
6352 MIRBuilder
.buildExtractVectorElementConstant(DestEltTy
, Shuffle
, I
)
6355 MIRBuilder
.buildBuildVector(DstReg
, Elts
);
6357 MIRBuilder
.buildShuffleVector(DstReg
, Src1
, Src2
, MappedOps
);
6360 MI
.eraseFromParent();
6361 return LegalizerHelper::LegalizeResult::Legalized
;
6364 LegalizerHelper::LegalizeResult
6365 LegalizerHelper::moreElementsVectorShuffle(MachineInstr
&MI
,
6366 unsigned int TypeIdx
, LLT MoreTy
) {
6367 auto [DstTy
, Src1Ty
, Src2Ty
] = MI
.getFirst3LLTs();
6368 ArrayRef
<int> Mask
= MI
.getOperand(3).getShuffleMask();
6369 unsigned NumElts
= DstTy
.getNumElements();
6370 unsigned WidenNumElts
= MoreTy
.getNumElements();
6372 if (DstTy
.isVector() && Src1Ty
.isVector() &&
6373 DstTy
.getNumElements() != Src1Ty
.getNumElements()) {
6374 return equalizeVectorShuffleLengths(MI
);
6378 return UnableToLegalize
;
6380 // Expect a canonicalized shuffle.
6381 if (DstTy
!= Src1Ty
|| DstTy
!= Src2Ty
)
6382 return UnableToLegalize
;
6384 moreElementsVectorSrc(MI
, MoreTy
, 1);
6385 moreElementsVectorSrc(MI
, MoreTy
, 2);
6387 // Adjust mask based on new input vector length.
6388 SmallVector
<int, 16> NewMask(WidenNumElts
, -1);
6389 for (unsigned I
= 0; I
!= NumElts
; ++I
) {
6391 if (Idx
< static_cast<int>(NumElts
))
6394 NewMask
[I
] = Idx
- NumElts
+ WidenNumElts
;
6396 moreElementsVectorDst(MI
, MoreTy
, 0);
6397 MIRBuilder
.setInstrAndDebugLoc(MI
);
6398 MIRBuilder
.buildShuffleVector(MI
.getOperand(0).getReg(),
6399 MI
.getOperand(1).getReg(),
6400 MI
.getOperand(2).getReg(), NewMask
);
6401 MI
.eraseFromParent();
6405 void LegalizerHelper::multiplyRegisters(SmallVectorImpl
<Register
> &DstRegs
,
6406 ArrayRef
<Register
> Src1Regs
,
6407 ArrayRef
<Register
> Src2Regs
,
6409 MachineIRBuilder
&B
= MIRBuilder
;
6410 unsigned SrcParts
= Src1Regs
.size();
6411 unsigned DstParts
= DstRegs
.size();
6413 unsigned DstIdx
= 0; // Low bits of the result.
6414 Register FactorSum
=
6415 B
.buildMul(NarrowTy
, Src1Regs
[DstIdx
], Src2Regs
[DstIdx
]).getReg(0);
6416 DstRegs
[DstIdx
] = FactorSum
;
6418 unsigned CarrySumPrevDstIdx
;
6419 SmallVector
<Register
, 4> Factors
;
6421 for (DstIdx
= 1; DstIdx
< DstParts
; DstIdx
++) {
6422 // Collect low parts of muls for DstIdx.
6423 for (unsigned i
= DstIdx
+ 1 < SrcParts
? 0 : DstIdx
- SrcParts
+ 1;
6424 i
<= std::min(DstIdx
, SrcParts
- 1); ++i
) {
6425 MachineInstrBuilder Mul
=
6426 B
.buildMul(NarrowTy
, Src1Regs
[DstIdx
- i
], Src2Regs
[i
]);
6427 Factors
.push_back(Mul
.getReg(0));
6429 // Collect high parts of muls from previous DstIdx.
6430 for (unsigned i
= DstIdx
< SrcParts
? 0 : DstIdx
- SrcParts
;
6431 i
<= std::min(DstIdx
- 1, SrcParts
- 1); ++i
) {
6432 MachineInstrBuilder Umulh
=
6433 B
.buildUMulH(NarrowTy
, Src1Regs
[DstIdx
- 1 - i
], Src2Regs
[i
]);
6434 Factors
.push_back(Umulh
.getReg(0));
6436 // Add CarrySum from additions calculated for previous DstIdx.
6438 Factors
.push_back(CarrySumPrevDstIdx
);
6442 // Add all factors and accumulate all carries into CarrySum.
6443 if (DstIdx
!= DstParts
- 1) {
6444 MachineInstrBuilder Uaddo
=
6445 B
.buildUAddo(NarrowTy
, LLT::scalar(1), Factors
[0], Factors
[1]);
6446 FactorSum
= Uaddo
.getReg(0);
6447 CarrySum
= B
.buildZExt(NarrowTy
, Uaddo
.getReg(1)).getReg(0);
6448 for (unsigned i
= 2; i
< Factors
.size(); ++i
) {
6449 MachineInstrBuilder Uaddo
=
6450 B
.buildUAddo(NarrowTy
, LLT::scalar(1), FactorSum
, Factors
[i
]);
6451 FactorSum
= Uaddo
.getReg(0);
6452 MachineInstrBuilder Carry
= B
.buildZExt(NarrowTy
, Uaddo
.getReg(1));
6453 CarrySum
= B
.buildAdd(NarrowTy
, CarrySum
, Carry
).getReg(0);
6456 // Since value for the next index is not calculated, neither is CarrySum.
6457 FactorSum
= B
.buildAdd(NarrowTy
, Factors
[0], Factors
[1]).getReg(0);
6458 for (unsigned i
= 2; i
< Factors
.size(); ++i
)
6459 FactorSum
= B
.buildAdd(NarrowTy
, FactorSum
, Factors
[i
]).getReg(0);
6462 CarrySumPrevDstIdx
= CarrySum
;
6463 DstRegs
[DstIdx
] = FactorSum
;
6468 LegalizerHelper::LegalizeResult
6469 LegalizerHelper::narrowScalarAddSub(MachineInstr
&MI
, unsigned TypeIdx
,
6472 return UnableToLegalize
;
6474 Register DstReg
= MI
.getOperand(0).getReg();
6475 LLT DstType
= MRI
.getType(DstReg
);
6476 // FIXME: add support for vector types
6477 if (DstType
.isVector())
6478 return UnableToLegalize
;
6480 unsigned Opcode
= MI
.getOpcode();
6481 unsigned OpO
, OpE
, OpF
;
6483 case TargetOpcode::G_SADDO
:
6484 case TargetOpcode::G_SADDE
:
6485 case TargetOpcode::G_UADDO
:
6486 case TargetOpcode::G_UADDE
:
6487 case TargetOpcode::G_ADD
:
6488 OpO
= TargetOpcode::G_UADDO
;
6489 OpE
= TargetOpcode::G_UADDE
;
6490 OpF
= TargetOpcode::G_UADDE
;
6491 if (Opcode
== TargetOpcode::G_SADDO
|| Opcode
== TargetOpcode::G_SADDE
)
6492 OpF
= TargetOpcode::G_SADDE
;
6494 case TargetOpcode::G_SSUBO
:
6495 case TargetOpcode::G_SSUBE
:
6496 case TargetOpcode::G_USUBO
:
6497 case TargetOpcode::G_USUBE
:
6498 case TargetOpcode::G_SUB
:
6499 OpO
= TargetOpcode::G_USUBO
;
6500 OpE
= TargetOpcode::G_USUBE
;
6501 OpF
= TargetOpcode::G_USUBE
;
6502 if (Opcode
== TargetOpcode::G_SSUBO
|| Opcode
== TargetOpcode::G_SSUBE
)
6503 OpF
= TargetOpcode::G_SSUBE
;
6506 llvm_unreachable("Unexpected add/sub opcode!");
6509 // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
6510 unsigned NumDefs
= MI
.getNumExplicitDefs();
6511 Register Src1
= MI
.getOperand(NumDefs
).getReg();
6512 Register Src2
= MI
.getOperand(NumDefs
+ 1).getReg();
6513 Register CarryDst
, CarryIn
;
6515 CarryDst
= MI
.getOperand(1).getReg();
6516 if (MI
.getNumOperands() == NumDefs
+ 3)
6517 CarryIn
= MI
.getOperand(NumDefs
+ 2).getReg();
6519 LLT RegTy
= MRI
.getType(MI
.getOperand(0).getReg());
6520 LLT LeftoverTy
, DummyTy
;
6521 SmallVector
<Register
, 2> Src1Regs
, Src2Regs
, Src1Left
, Src2Left
, DstRegs
;
6522 extractParts(Src1
, RegTy
, NarrowTy
, LeftoverTy
, Src1Regs
, Src1Left
,
6524 extractParts(Src2
, RegTy
, NarrowTy
, DummyTy
, Src2Regs
, Src2Left
, MIRBuilder
,
6527 int NarrowParts
= Src1Regs
.size();
6528 Src1Regs
.append(Src1Left
);
6529 Src2Regs
.append(Src2Left
);
6530 DstRegs
.reserve(Src1Regs
.size());
6532 for (int i
= 0, e
= Src1Regs
.size(); i
!= e
; ++i
) {
6534 MRI
.createGenericVirtualRegister(MRI
.getType(Src1Regs
[i
]));
6536 // Forward the final carry-out to the destination register
6537 if (i
== e
- 1 && CarryDst
)
6538 CarryOut
= CarryDst
;
6540 CarryOut
= MRI
.createGenericVirtualRegister(LLT::scalar(1));
6543 MIRBuilder
.buildInstr(OpO
, {DstReg
, CarryOut
},
6544 {Src1Regs
[i
], Src2Regs
[i
]});
6545 } else if (i
== e
- 1) {
6546 MIRBuilder
.buildInstr(OpF
, {DstReg
, CarryOut
},
6547 {Src1Regs
[i
], Src2Regs
[i
], CarryIn
});
6549 MIRBuilder
.buildInstr(OpE
, {DstReg
, CarryOut
},
6550 {Src1Regs
[i
], Src2Regs
[i
], CarryIn
});
6553 DstRegs
.push_back(DstReg
);
6556 insertParts(MI
.getOperand(0).getReg(), RegTy
, NarrowTy
,
6557 ArrayRef(DstRegs
).take_front(NarrowParts
), LeftoverTy
,
6558 ArrayRef(DstRegs
).drop_front(NarrowParts
));
6560 MI
.eraseFromParent();
6564 LegalizerHelper::LegalizeResult
6565 LegalizerHelper::narrowScalarMul(MachineInstr
&MI
, LLT NarrowTy
) {
6566 auto [DstReg
, Src1
, Src2
] = MI
.getFirst3Regs();
6568 LLT Ty
= MRI
.getType(DstReg
);
6570 return UnableToLegalize
;
6572 unsigned Size
= Ty
.getSizeInBits();
6573 unsigned NarrowSize
= NarrowTy
.getSizeInBits();
6574 if (Size
% NarrowSize
!= 0)
6575 return UnableToLegalize
;
6577 unsigned NumParts
= Size
/ NarrowSize
;
6578 bool IsMulHigh
= MI
.getOpcode() == TargetOpcode::G_UMULH
;
6579 unsigned DstTmpParts
= NumParts
* (IsMulHigh
? 2 : 1);
6581 SmallVector
<Register
, 2> Src1Parts
, Src2Parts
;
6582 SmallVector
<Register
, 2> DstTmpRegs(DstTmpParts
);
6583 extractParts(Src1
, NarrowTy
, NumParts
, Src1Parts
, MIRBuilder
, MRI
);
6584 extractParts(Src2
, NarrowTy
, NumParts
, Src2Parts
, MIRBuilder
, MRI
);
6585 multiplyRegisters(DstTmpRegs
, Src1Parts
, Src2Parts
, NarrowTy
);
6587 // Take only high half of registers if this is high mul.
6588 ArrayRef
<Register
> DstRegs(&DstTmpRegs
[DstTmpParts
- NumParts
], NumParts
);
6589 MIRBuilder
.buildMergeLikeInstr(DstReg
, DstRegs
);
6590 MI
.eraseFromParent();
6594 LegalizerHelper::LegalizeResult
6595 LegalizerHelper::narrowScalarFPTOI(MachineInstr
&MI
, unsigned TypeIdx
,
6598 return UnableToLegalize
;
6600 bool IsSigned
= MI
.getOpcode() == TargetOpcode::G_FPTOSI
;
6602 Register Src
= MI
.getOperand(1).getReg();
6603 LLT SrcTy
= MRI
.getType(Src
);
6605 // If all finite floats fit into the narrowed integer type, we can just swap
6606 // out the result type. This is practically only useful for conversions from
6607 // half to at least 16-bits, so just handle the one case.
6608 if (SrcTy
.getScalarType() != LLT::scalar(16) ||
6609 NarrowTy
.getScalarSizeInBits() < (IsSigned
? 17u : 16u))
6610 return UnableToLegalize
;
6612 Observer
.changingInstr(MI
);
6613 narrowScalarDst(MI
, NarrowTy
, 0,
6614 IsSigned
? TargetOpcode::G_SEXT
: TargetOpcode::G_ZEXT
);
6615 Observer
.changedInstr(MI
);
6619 LegalizerHelper::LegalizeResult
6620 LegalizerHelper::narrowScalarExtract(MachineInstr
&MI
, unsigned TypeIdx
,
6623 return UnableToLegalize
;
6625 uint64_t NarrowSize
= NarrowTy
.getSizeInBits();
6627 int64_t SizeOp1
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
6628 // FIXME: add support for when SizeOp1 isn't an exact multiple of
6630 if (SizeOp1
% NarrowSize
!= 0)
6631 return UnableToLegalize
;
6632 int NumParts
= SizeOp1
/ NarrowSize
;
6634 SmallVector
<Register
, 2> SrcRegs
, DstRegs
;
6635 SmallVector
<uint64_t, 2> Indexes
;
6636 extractParts(MI
.getOperand(1).getReg(), NarrowTy
, NumParts
, SrcRegs
,
6639 Register OpReg
= MI
.getOperand(0).getReg();
6640 uint64_t OpStart
= MI
.getOperand(2).getImm();
6641 uint64_t OpSize
= MRI
.getType(OpReg
).getSizeInBits();
6642 for (int i
= 0; i
< NumParts
; ++i
) {
6643 unsigned SrcStart
= i
* NarrowSize
;
6645 if (SrcStart
+ NarrowSize
<= OpStart
|| SrcStart
>= OpStart
+ OpSize
) {
6646 // No part of the extract uses this subregister, ignore it.
6648 } else if (SrcStart
== OpStart
&& NarrowTy
== MRI
.getType(OpReg
)) {
6649 // The entire subregister is extracted, forward the value.
6650 DstRegs
.push_back(SrcRegs
[i
]);
6654 // OpSegStart is where this destination segment would start in OpReg if it
6655 // extended infinitely in both directions.
6656 int64_t ExtractOffset
;
6658 if (OpStart
< SrcStart
) {
6660 SegSize
= std::min(NarrowSize
, OpStart
+ OpSize
- SrcStart
);
6662 ExtractOffset
= OpStart
- SrcStart
;
6663 SegSize
= std::min(SrcStart
+ NarrowSize
- OpStart
, OpSize
);
6666 Register SegReg
= SrcRegs
[i
];
6667 if (ExtractOffset
!= 0 || SegSize
!= NarrowSize
) {
6668 // A genuine extract is needed.
6669 SegReg
= MRI
.createGenericVirtualRegister(LLT::scalar(SegSize
));
6670 MIRBuilder
.buildExtract(SegReg
, SrcRegs
[i
], ExtractOffset
);
6673 DstRegs
.push_back(SegReg
);
6676 Register DstReg
= MI
.getOperand(0).getReg();
6677 if (MRI
.getType(DstReg
).isVector())
6678 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
6679 else if (DstRegs
.size() > 1)
6680 MIRBuilder
.buildMergeLikeInstr(DstReg
, DstRegs
);
6682 MIRBuilder
.buildCopy(DstReg
, DstRegs
[0]);
6683 MI
.eraseFromParent();
6687 LegalizerHelper::LegalizeResult
6688 LegalizerHelper::narrowScalarInsert(MachineInstr
&MI
, unsigned TypeIdx
,
6690 // FIXME: Don't know how to handle secondary types yet.
6692 return UnableToLegalize
;
6694 SmallVector
<Register
, 2> SrcRegs
, LeftoverRegs
, DstRegs
;
6695 SmallVector
<uint64_t, 2> Indexes
;
6696 LLT RegTy
= MRI
.getType(MI
.getOperand(0).getReg());
6698 extractParts(MI
.getOperand(1).getReg(), RegTy
, NarrowTy
, LeftoverTy
, SrcRegs
,
6699 LeftoverRegs
, MIRBuilder
, MRI
);
6701 SrcRegs
.append(LeftoverRegs
);
6703 uint64_t NarrowSize
= NarrowTy
.getSizeInBits();
6704 Register OpReg
= MI
.getOperand(2).getReg();
6705 uint64_t OpStart
= MI
.getOperand(3).getImm();
6706 uint64_t OpSize
= MRI
.getType(OpReg
).getSizeInBits();
6707 for (int I
= 0, E
= SrcRegs
.size(); I
!= E
; ++I
) {
6708 unsigned DstStart
= I
* NarrowSize
;
6710 if (DstStart
== OpStart
&& NarrowTy
== MRI
.getType(OpReg
)) {
6711 // The entire subregister is defined by this insert, forward the new
6713 DstRegs
.push_back(OpReg
);
6717 Register SrcReg
= SrcRegs
[I
];
6718 if (MRI
.getType(SrcRegs
[I
]) == LeftoverTy
) {
6719 // The leftover reg is smaller than NarrowTy, so we need to extend it.
6720 SrcReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
6721 MIRBuilder
.buildAnyExt(SrcReg
, SrcRegs
[I
]);
6724 if (DstStart
+ NarrowSize
<= OpStart
|| DstStart
>= OpStart
+ OpSize
) {
6725 // No part of the insert affects this subregister, forward the original.
6726 DstRegs
.push_back(SrcReg
);
6730 // OpSegStart is where this destination segment would start in OpReg if it
6731 // extended infinitely in both directions.
6732 int64_t ExtractOffset
, InsertOffset
;
6734 if (OpStart
< DstStart
) {
6736 ExtractOffset
= DstStart
- OpStart
;
6737 SegSize
= std::min(NarrowSize
, OpStart
+ OpSize
- DstStart
);
6739 InsertOffset
= OpStart
- DstStart
;
6742 std::min(NarrowSize
- InsertOffset
, OpStart
+ OpSize
- DstStart
);
6745 Register SegReg
= OpReg
;
6746 if (ExtractOffset
!= 0 || SegSize
!= OpSize
) {
6747 // A genuine extract is needed.
6748 SegReg
= MRI
.createGenericVirtualRegister(LLT::scalar(SegSize
));
6749 MIRBuilder
.buildExtract(SegReg
, OpReg
, ExtractOffset
);
6752 Register DstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
6753 MIRBuilder
.buildInsert(DstReg
, SrcReg
, SegReg
, InsertOffset
);
6754 DstRegs
.push_back(DstReg
);
6757 uint64_t WideSize
= DstRegs
.size() * NarrowSize
;
6758 Register DstReg
= MI
.getOperand(0).getReg();
6759 if (WideSize
> RegTy
.getSizeInBits()) {
6760 Register MergeReg
= MRI
.createGenericVirtualRegister(LLT::scalar(WideSize
));
6761 MIRBuilder
.buildMergeLikeInstr(MergeReg
, DstRegs
);
6762 MIRBuilder
.buildTrunc(DstReg
, MergeReg
);
6764 MIRBuilder
.buildMergeLikeInstr(DstReg
, DstRegs
);
6766 MI
.eraseFromParent();
6770 LegalizerHelper::LegalizeResult
6771 LegalizerHelper::narrowScalarBasic(MachineInstr
&MI
, unsigned TypeIdx
,
6773 Register DstReg
= MI
.getOperand(0).getReg();
6774 LLT DstTy
= MRI
.getType(DstReg
);
6776 assert(MI
.getNumOperands() == 3 && TypeIdx
== 0);
6778 SmallVector
<Register
, 4> DstRegs
, DstLeftoverRegs
;
6779 SmallVector
<Register
, 4> Src0Regs
, Src0LeftoverRegs
;
6780 SmallVector
<Register
, 4> Src1Regs
, Src1LeftoverRegs
;
6782 if (!extractParts(MI
.getOperand(1).getReg(), DstTy
, NarrowTy
, LeftoverTy
,
6783 Src0Regs
, Src0LeftoverRegs
, MIRBuilder
, MRI
))
6784 return UnableToLegalize
;
6787 if (!extractParts(MI
.getOperand(2).getReg(), DstTy
, NarrowTy
, Unused
,
6788 Src1Regs
, Src1LeftoverRegs
, MIRBuilder
, MRI
))
6789 llvm_unreachable("inconsistent extractParts result");
6791 for (unsigned I
= 0, E
= Src1Regs
.size(); I
!= E
; ++I
) {
6792 auto Inst
= MIRBuilder
.buildInstr(MI
.getOpcode(), {NarrowTy
},
6793 {Src0Regs
[I
], Src1Regs
[I
]});
6794 DstRegs
.push_back(Inst
.getReg(0));
6797 for (unsigned I
= 0, E
= Src1LeftoverRegs
.size(); I
!= E
; ++I
) {
6798 auto Inst
= MIRBuilder
.buildInstr(
6800 {LeftoverTy
}, {Src0LeftoverRegs
[I
], Src1LeftoverRegs
[I
]});
6801 DstLeftoverRegs
.push_back(Inst
.getReg(0));
6804 insertParts(DstReg
, DstTy
, NarrowTy
, DstRegs
,
6805 LeftoverTy
, DstLeftoverRegs
);
6807 MI
.eraseFromParent();
6811 LegalizerHelper::LegalizeResult
6812 LegalizerHelper::narrowScalarExt(MachineInstr
&MI
, unsigned TypeIdx
,
6815 return UnableToLegalize
;
6817 auto [DstReg
, SrcReg
] = MI
.getFirst2Regs();
6819 LLT DstTy
= MRI
.getType(DstReg
);
6820 if (DstTy
.isVector())
6821 return UnableToLegalize
;
6823 SmallVector
<Register
, 8> Parts
;
6824 LLT GCDTy
= extractGCDType(Parts
, DstTy
, NarrowTy
, SrcReg
);
6825 LLT LCMTy
= buildLCMMergePieces(DstTy
, NarrowTy
, GCDTy
, Parts
, MI
.getOpcode());
6826 buildWidenedRemergeToDst(DstReg
, LCMTy
, Parts
);
6828 MI
.eraseFromParent();
6832 LegalizerHelper::LegalizeResult
6833 LegalizerHelper::narrowScalarSelect(MachineInstr
&MI
, unsigned TypeIdx
,
6836 return UnableToLegalize
;
6838 Register CondReg
= MI
.getOperand(1).getReg();
6839 LLT CondTy
= MRI
.getType(CondReg
);
6840 if (CondTy
.isVector()) // TODO: Handle vselect
6841 return UnableToLegalize
;
6843 Register DstReg
= MI
.getOperand(0).getReg();
6844 LLT DstTy
= MRI
.getType(DstReg
);
6846 SmallVector
<Register
, 4> DstRegs
, DstLeftoverRegs
;
6847 SmallVector
<Register
, 4> Src1Regs
, Src1LeftoverRegs
;
6848 SmallVector
<Register
, 4> Src2Regs
, Src2LeftoverRegs
;
6850 if (!extractParts(MI
.getOperand(2).getReg(), DstTy
, NarrowTy
, LeftoverTy
,
6851 Src1Regs
, Src1LeftoverRegs
, MIRBuilder
, MRI
))
6852 return UnableToLegalize
;
6855 if (!extractParts(MI
.getOperand(3).getReg(), DstTy
, NarrowTy
, Unused
,
6856 Src2Regs
, Src2LeftoverRegs
, MIRBuilder
, MRI
))
6857 llvm_unreachable("inconsistent extractParts result");
6859 for (unsigned I
= 0, E
= Src1Regs
.size(); I
!= E
; ++I
) {
6860 auto Select
= MIRBuilder
.buildSelect(NarrowTy
,
6861 CondReg
, Src1Regs
[I
], Src2Regs
[I
]);
6862 DstRegs
.push_back(Select
.getReg(0));
6865 for (unsigned I
= 0, E
= Src1LeftoverRegs
.size(); I
!= E
; ++I
) {
6866 auto Select
= MIRBuilder
.buildSelect(
6867 LeftoverTy
, CondReg
, Src1LeftoverRegs
[I
], Src2LeftoverRegs
[I
]);
6868 DstLeftoverRegs
.push_back(Select
.getReg(0));
6871 insertParts(DstReg
, DstTy
, NarrowTy
, DstRegs
,
6872 LeftoverTy
, DstLeftoverRegs
);
6874 MI
.eraseFromParent();
6878 LegalizerHelper::LegalizeResult
6879 LegalizerHelper::narrowScalarCTLZ(MachineInstr
&MI
, unsigned TypeIdx
,
6882 return UnableToLegalize
;
6884 auto [DstReg
, DstTy
, SrcReg
, SrcTy
] = MI
.getFirst2RegLLTs();
6885 unsigned NarrowSize
= NarrowTy
.getSizeInBits();
6887 if (SrcTy
.isScalar() && SrcTy
.getSizeInBits() == 2 * NarrowSize
) {
6888 const bool IsUndef
= MI
.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF
;
6890 MachineIRBuilder
&B
= MIRBuilder
;
6891 auto UnmergeSrc
= B
.buildUnmerge(NarrowTy
, SrcReg
);
6892 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
6893 auto C_0
= B
.buildConstant(NarrowTy
, 0);
6894 auto HiIsZero
= B
.buildICmp(CmpInst::ICMP_EQ
, LLT::scalar(1),
6895 UnmergeSrc
.getReg(1), C_0
);
6896 auto LoCTLZ
= IsUndef
?
6897 B
.buildCTLZ_ZERO_UNDEF(DstTy
, UnmergeSrc
.getReg(0)) :
6898 B
.buildCTLZ(DstTy
, UnmergeSrc
.getReg(0));
6899 auto C_NarrowSize
= B
.buildConstant(DstTy
, NarrowSize
);
6900 auto HiIsZeroCTLZ
= B
.buildAdd(DstTy
, LoCTLZ
, C_NarrowSize
);
6901 auto HiCTLZ
= B
.buildCTLZ_ZERO_UNDEF(DstTy
, UnmergeSrc
.getReg(1));
6902 B
.buildSelect(DstReg
, HiIsZero
, HiIsZeroCTLZ
, HiCTLZ
);
6904 MI
.eraseFromParent();
6908 return UnableToLegalize
;
6911 LegalizerHelper::LegalizeResult
6912 LegalizerHelper::narrowScalarCTTZ(MachineInstr
&MI
, unsigned TypeIdx
,
6915 return UnableToLegalize
;
6917 auto [DstReg
, DstTy
, SrcReg
, SrcTy
] = MI
.getFirst2RegLLTs();
6918 unsigned NarrowSize
= NarrowTy
.getSizeInBits();
6920 if (SrcTy
.isScalar() && SrcTy
.getSizeInBits() == 2 * NarrowSize
) {
6921 const bool IsUndef
= MI
.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF
;
6923 MachineIRBuilder
&B
= MIRBuilder
;
6924 auto UnmergeSrc
= B
.buildUnmerge(NarrowTy
, SrcReg
);
6925 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
6926 auto C_0
= B
.buildConstant(NarrowTy
, 0);
6927 auto LoIsZero
= B
.buildICmp(CmpInst::ICMP_EQ
, LLT::scalar(1),
6928 UnmergeSrc
.getReg(0), C_0
);
6929 auto HiCTTZ
= IsUndef
?
6930 B
.buildCTTZ_ZERO_UNDEF(DstTy
, UnmergeSrc
.getReg(1)) :
6931 B
.buildCTTZ(DstTy
, UnmergeSrc
.getReg(1));
6932 auto C_NarrowSize
= B
.buildConstant(DstTy
, NarrowSize
);
6933 auto LoIsZeroCTTZ
= B
.buildAdd(DstTy
, HiCTTZ
, C_NarrowSize
);
6934 auto LoCTTZ
= B
.buildCTTZ_ZERO_UNDEF(DstTy
, UnmergeSrc
.getReg(0));
6935 B
.buildSelect(DstReg
, LoIsZero
, LoIsZeroCTTZ
, LoCTTZ
);
6937 MI
.eraseFromParent();
6941 return UnableToLegalize
;
6944 LegalizerHelper::LegalizeResult
6945 LegalizerHelper::narrowScalarCTPOP(MachineInstr
&MI
, unsigned TypeIdx
,
6948 return UnableToLegalize
;
6950 auto [DstReg
, DstTy
, SrcReg
, SrcTy
] = MI
.getFirst2RegLLTs();
6951 unsigned NarrowSize
= NarrowTy
.getSizeInBits();
6953 if (SrcTy
.isScalar() && SrcTy
.getSizeInBits() == 2 * NarrowSize
) {
6954 auto UnmergeSrc
= MIRBuilder
.buildUnmerge(NarrowTy
, MI
.getOperand(1));
6956 auto LoCTPOP
= MIRBuilder
.buildCTPOP(DstTy
, UnmergeSrc
.getReg(0));
6957 auto HiCTPOP
= MIRBuilder
.buildCTPOP(DstTy
, UnmergeSrc
.getReg(1));
6958 MIRBuilder
.buildAdd(DstReg
, HiCTPOP
, LoCTPOP
);
6960 MI
.eraseFromParent();
6964 return UnableToLegalize
;
6967 LegalizerHelper::LegalizeResult
6968 LegalizerHelper::narrowScalarFLDEXP(MachineInstr
&MI
, unsigned TypeIdx
,
6971 return UnableToLegalize
;
6973 MachineIRBuilder
&B
= MIRBuilder
;
6974 Register ExpReg
= MI
.getOperand(2).getReg();
6975 LLT ExpTy
= MRI
.getType(ExpReg
);
6977 unsigned ClampSize
= NarrowTy
.getScalarSizeInBits();
6979 // Clamp the exponent to the range of the target type.
6980 auto MinExp
= B
.buildConstant(ExpTy
, minIntN(ClampSize
));
6981 auto ClampMin
= B
.buildSMax(ExpTy
, ExpReg
, MinExp
);
6982 auto MaxExp
= B
.buildConstant(ExpTy
, maxIntN(ClampSize
));
6983 auto Clamp
= B
.buildSMin(ExpTy
, ClampMin
, MaxExp
);
6985 auto Trunc
= B
.buildTrunc(NarrowTy
, Clamp
);
6986 Observer
.changingInstr(MI
);
6987 MI
.getOperand(2).setReg(Trunc
.getReg(0));
6988 Observer
.changedInstr(MI
);
6992 LegalizerHelper::LegalizeResult
6993 LegalizerHelper::lowerBitCount(MachineInstr
&MI
) {
6994 unsigned Opc
= MI
.getOpcode();
6995 const auto &TII
= MIRBuilder
.getTII();
6996 auto isSupported
= [this](const LegalityQuery
&Q
) {
6997 auto QAction
= LI
.getAction(Q
).Action
;
6998 return QAction
== Legal
|| QAction
== Libcall
|| QAction
== Custom
;
7002 return UnableToLegalize
;
7003 case TargetOpcode::G_CTLZ_ZERO_UNDEF
: {
7004 // This trivially expands to CTLZ.
7005 Observer
.changingInstr(MI
);
7006 MI
.setDesc(TII
.get(TargetOpcode::G_CTLZ
));
7007 Observer
.changedInstr(MI
);
7010 case TargetOpcode::G_CTLZ
: {
7011 auto [DstReg
, DstTy
, SrcReg
, SrcTy
] = MI
.getFirst2RegLLTs();
7012 unsigned Len
= SrcTy
.getSizeInBits();
7014 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF
, {DstTy
, SrcTy
}})) {
7015 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
7016 auto CtlzZU
= MIRBuilder
.buildCTLZ_ZERO_UNDEF(DstTy
, SrcReg
);
7017 auto ZeroSrc
= MIRBuilder
.buildConstant(SrcTy
, 0);
7018 auto ICmp
= MIRBuilder
.buildICmp(
7019 CmpInst::ICMP_EQ
, SrcTy
.changeElementSize(1), SrcReg
, ZeroSrc
);
7020 auto LenConst
= MIRBuilder
.buildConstant(DstTy
, Len
);
7021 MIRBuilder
.buildSelect(DstReg
, ICmp
, LenConst
, CtlzZU
);
7022 MI
.eraseFromParent();
7025 // for now, we do this:
7026 // NewLen = NextPowerOf2(Len);
7027 // x = x | (x >> 1);
7028 // x = x | (x >> 2);
7030 // x = x | (x >>16);
7031 // x = x | (x >>32); // for 64-bit input
7033 // return Len - popcount(x);
7035 // Ref: "Hacker's Delight" by Henry Warren
7036 Register Op
= SrcReg
;
7037 unsigned NewLen
= PowerOf2Ceil(Len
);
7038 for (unsigned i
= 0; (1U << i
) <= (NewLen
/ 2); ++i
) {
7039 auto MIBShiftAmt
= MIRBuilder
.buildConstant(SrcTy
, 1ULL << i
);
7040 auto MIBOp
= MIRBuilder
.buildOr(
7041 SrcTy
, Op
, MIRBuilder
.buildLShr(SrcTy
, Op
, MIBShiftAmt
));
7042 Op
= MIBOp
.getReg(0);
7044 auto MIBPop
= MIRBuilder
.buildCTPOP(DstTy
, Op
);
7045 MIRBuilder
.buildSub(MI
.getOperand(0), MIRBuilder
.buildConstant(DstTy
, Len
),
7047 MI
.eraseFromParent();
7050 case TargetOpcode::G_CTTZ_ZERO_UNDEF
: {
7051 // This trivially expands to CTTZ.
7052 Observer
.changingInstr(MI
);
7053 MI
.setDesc(TII
.get(TargetOpcode::G_CTTZ
));
7054 Observer
.changedInstr(MI
);
7057 case TargetOpcode::G_CTTZ
: {
7058 auto [DstReg
, DstTy
, SrcReg
, SrcTy
] = MI
.getFirst2RegLLTs();
7060 unsigned Len
= SrcTy
.getSizeInBits();
7061 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF
, {DstTy
, SrcTy
}})) {
7062 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
7064 auto CttzZU
= MIRBuilder
.buildCTTZ_ZERO_UNDEF(DstTy
, SrcReg
);
7065 auto Zero
= MIRBuilder
.buildConstant(SrcTy
, 0);
7066 auto ICmp
= MIRBuilder
.buildICmp(
7067 CmpInst::ICMP_EQ
, DstTy
.changeElementSize(1), SrcReg
, Zero
);
7068 auto LenConst
= MIRBuilder
.buildConstant(DstTy
, Len
);
7069 MIRBuilder
.buildSelect(DstReg
, ICmp
, LenConst
, CttzZU
);
7070 MI
.eraseFromParent();
7073 // for now, we use: { return popcount(~x & (x - 1)); }
7074 // unless the target has ctlz but not ctpop, in which case we use:
7075 // { return 32 - nlz(~x & (x-1)); }
7076 // Ref: "Hacker's Delight" by Henry Warren
7077 auto MIBCstNeg1
= MIRBuilder
.buildConstant(SrcTy
, -1);
7078 auto MIBNot
= MIRBuilder
.buildXor(SrcTy
, SrcReg
, MIBCstNeg1
);
7079 auto MIBTmp
= MIRBuilder
.buildAnd(
7080 SrcTy
, MIBNot
, MIRBuilder
.buildAdd(SrcTy
, SrcReg
, MIBCstNeg1
));
7081 if (!isSupported({TargetOpcode::G_CTPOP
, {SrcTy
, SrcTy
}}) &&
7082 isSupported({TargetOpcode::G_CTLZ
, {SrcTy
, SrcTy
}})) {
7083 auto MIBCstLen
= MIRBuilder
.buildConstant(SrcTy
, Len
);
7084 MIRBuilder
.buildSub(MI
.getOperand(0), MIBCstLen
,
7085 MIRBuilder
.buildCTLZ(SrcTy
, MIBTmp
));
7086 MI
.eraseFromParent();
7089 Observer
.changingInstr(MI
);
7090 MI
.setDesc(TII
.get(TargetOpcode::G_CTPOP
));
7091 MI
.getOperand(1).setReg(MIBTmp
.getReg(0));
7092 Observer
.changedInstr(MI
);
7095 case TargetOpcode::G_CTPOP
: {
7096 Register SrcReg
= MI
.getOperand(1).getReg();
7097 LLT Ty
= MRI
.getType(SrcReg
);
7098 unsigned Size
= Ty
.getSizeInBits();
7099 MachineIRBuilder
&B
= MIRBuilder
;
7101 // Count set bits in blocks of 2 bits. Default approach would be
7102 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
7103 // We use following formula instead:
7104 // B2Count = val - { (val >> 1) & 0x55555555 }
7105 // since it gives same result in blocks of 2 with one instruction less.
7106 auto C_1
= B
.buildConstant(Ty
, 1);
7107 auto B2Set1LoTo1Hi
= B
.buildLShr(Ty
, SrcReg
, C_1
);
7108 APInt B2Mask1HiTo0
= APInt::getSplat(Size
, APInt(8, 0x55));
7109 auto C_B2Mask1HiTo0
= B
.buildConstant(Ty
, B2Mask1HiTo0
);
7110 auto B2Count1Hi
= B
.buildAnd(Ty
, B2Set1LoTo1Hi
, C_B2Mask1HiTo0
);
7111 auto B2Count
= B
.buildSub(Ty
, SrcReg
, B2Count1Hi
);
7113 // In order to get count in blocks of 4 add values from adjacent block of 2.
7114 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
7115 auto C_2
= B
.buildConstant(Ty
, 2);
7116 auto B4Set2LoTo2Hi
= B
.buildLShr(Ty
, B2Count
, C_2
);
7117 APInt B4Mask2HiTo0
= APInt::getSplat(Size
, APInt(8, 0x33));
7118 auto C_B4Mask2HiTo0
= B
.buildConstant(Ty
, B4Mask2HiTo0
);
7119 auto B4HiB2Count
= B
.buildAnd(Ty
, B4Set2LoTo2Hi
, C_B4Mask2HiTo0
);
7120 auto B4LoB2Count
= B
.buildAnd(Ty
, B2Count
, C_B4Mask2HiTo0
);
7121 auto B4Count
= B
.buildAdd(Ty
, B4HiB2Count
, B4LoB2Count
);
7123 // For count in blocks of 8 bits we don't have to mask high 4 bits before
7124 // addition since count value sits in range {0,...,8} and 4 bits are enough
7125 // to hold such binary values. After addition high 4 bits still hold count
7126 // of set bits in high 4 bit block, set them to zero and get 8 bit result.
7127 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
7128 auto C_4
= B
.buildConstant(Ty
, 4);
7129 auto B8HiB4Count
= B
.buildLShr(Ty
, B4Count
, C_4
);
7130 auto B8CountDirty4Hi
= B
.buildAdd(Ty
, B8HiB4Count
, B4Count
);
7131 APInt B8Mask4HiTo0
= APInt::getSplat(Size
, APInt(8, 0x0F));
7132 auto C_B8Mask4HiTo0
= B
.buildConstant(Ty
, B8Mask4HiTo0
);
7133 auto B8Count
= B
.buildAnd(Ty
, B8CountDirty4Hi
, C_B8Mask4HiTo0
);
7135 assert(Size
<=128 && "Scalar size is too large for CTPOP lower algorithm");
7136 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
7137 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
7138 auto MulMask
= B
.buildConstant(Ty
, APInt::getSplat(Size
, APInt(8, 0x01)));
7140 // Shift count result from 8 high bits to low bits.
7141 auto C_SizeM8
= B
.buildConstant(Ty
, Size
- 8);
7143 auto IsMulSupported
= [this](const LLT Ty
) {
7144 auto Action
= LI
.getAction({TargetOpcode::G_MUL
, {Ty
}}).Action
;
7145 return Action
== Legal
|| Action
== WidenScalar
|| Action
== Custom
;
7147 if (IsMulSupported(Ty
)) {
7148 auto ResTmp
= B
.buildMul(Ty
, B8Count
, MulMask
);
7149 B
.buildLShr(MI
.getOperand(0).getReg(), ResTmp
, C_SizeM8
);
7151 auto ResTmp
= B8Count
;
7152 for (unsigned Shift
= 8; Shift
< Size
; Shift
*= 2) {
7153 auto ShiftC
= B
.buildConstant(Ty
, Shift
);
7154 auto Shl
= B
.buildShl(Ty
, ResTmp
, ShiftC
);
7155 ResTmp
= B
.buildAdd(Ty
, ResTmp
, Shl
);
7157 B
.buildLShr(MI
.getOperand(0).getReg(), ResTmp
, C_SizeM8
);
7159 MI
.eraseFromParent();
7165 // Check that (every element of) Reg is undef or not an exact multiple of BW.
7166 static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo
&MRI
,
7167 Register Reg
, unsigned BW
) {
7168 return matchUnaryPredicate(
7170 [=](const Constant
*C
) {
7171 // Null constant here means an undef.
7172 const ConstantInt
*CI
= dyn_cast_or_null
<ConstantInt
>(C
);
7173 return !CI
|| CI
->getValue().urem(BW
) != 0;
7175 /*AllowUndefs*/ true);
7178 LegalizerHelper::LegalizeResult
7179 LegalizerHelper::lowerFunnelShiftWithInverse(MachineInstr
&MI
) {
7180 auto [Dst
, X
, Y
, Z
] = MI
.getFirst4Regs();
7181 LLT Ty
= MRI
.getType(Dst
);
7182 LLT ShTy
= MRI
.getType(Z
);
7184 unsigned BW
= Ty
.getScalarSizeInBits();
7186 if (!isPowerOf2_32(BW
))
7187 return UnableToLegalize
;
7189 const bool IsFSHL
= MI
.getOpcode() == TargetOpcode::G_FSHL
;
7190 unsigned RevOpcode
= IsFSHL
? TargetOpcode::G_FSHR
: TargetOpcode::G_FSHL
;
7192 if (isNonZeroModBitWidthOrUndef(MRI
, Z
, BW
)) {
7193 // fshl X, Y, Z -> fshr X, Y, -Z
7194 // fshr X, Y, Z -> fshl X, Y, -Z
7195 auto Zero
= MIRBuilder
.buildConstant(ShTy
, 0);
7196 Z
= MIRBuilder
.buildSub(Ty
, Zero
, Z
).getReg(0);
7198 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
7199 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
7200 auto One
= MIRBuilder
.buildConstant(ShTy
, 1);
7202 Y
= MIRBuilder
.buildInstr(RevOpcode
, {Ty
}, {X
, Y
, One
}).getReg(0);
7203 X
= MIRBuilder
.buildLShr(Ty
, X
, One
).getReg(0);
7205 X
= MIRBuilder
.buildInstr(RevOpcode
, {Ty
}, {X
, Y
, One
}).getReg(0);
7206 Y
= MIRBuilder
.buildShl(Ty
, Y
, One
).getReg(0);
7209 Z
= MIRBuilder
.buildNot(ShTy
, Z
).getReg(0);
7212 MIRBuilder
.buildInstr(RevOpcode
, {Dst
}, {X
, Y
, Z
});
7213 MI
.eraseFromParent();
7217 LegalizerHelper::LegalizeResult
7218 LegalizerHelper::lowerFunnelShiftAsShifts(MachineInstr
&MI
) {
7219 auto [Dst
, X
, Y
, Z
] = MI
.getFirst4Regs();
7220 LLT Ty
= MRI
.getType(Dst
);
7221 LLT ShTy
= MRI
.getType(Z
);
7223 const unsigned BW
= Ty
.getScalarSizeInBits();
7224 const bool IsFSHL
= MI
.getOpcode() == TargetOpcode::G_FSHL
;
7227 Register ShAmt
, InvShAmt
;
7229 // FIXME: Emit optimized urem by constant instead of letting it expand later.
7230 if (isNonZeroModBitWidthOrUndef(MRI
, Z
, BW
)) {
7231 // fshl: X << C | Y >> (BW - C)
7232 // fshr: X << (BW - C) | Y >> C
7233 // where C = Z % BW is not zero
7234 auto BitWidthC
= MIRBuilder
.buildConstant(ShTy
, BW
);
7235 ShAmt
= MIRBuilder
.buildURem(ShTy
, Z
, BitWidthC
).getReg(0);
7236 InvShAmt
= MIRBuilder
.buildSub(ShTy
, BitWidthC
, ShAmt
).getReg(0);
7237 ShX
= MIRBuilder
.buildShl(Ty
, X
, IsFSHL
? ShAmt
: InvShAmt
).getReg(0);
7238 ShY
= MIRBuilder
.buildLShr(Ty
, Y
, IsFSHL
? InvShAmt
: ShAmt
).getReg(0);
7240 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7241 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7242 auto Mask
= MIRBuilder
.buildConstant(ShTy
, BW
- 1);
7243 if (isPowerOf2_32(BW
)) {
7244 // Z % BW -> Z & (BW - 1)
7245 ShAmt
= MIRBuilder
.buildAnd(ShTy
, Z
, Mask
).getReg(0);
7246 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7247 auto NotZ
= MIRBuilder
.buildNot(ShTy
, Z
);
7248 InvShAmt
= MIRBuilder
.buildAnd(ShTy
, NotZ
, Mask
).getReg(0);
7250 auto BitWidthC
= MIRBuilder
.buildConstant(ShTy
, BW
);
7251 ShAmt
= MIRBuilder
.buildURem(ShTy
, Z
, BitWidthC
).getReg(0);
7252 InvShAmt
= MIRBuilder
.buildSub(ShTy
, Mask
, ShAmt
).getReg(0);
7255 auto One
= MIRBuilder
.buildConstant(ShTy
, 1);
7257 ShX
= MIRBuilder
.buildShl(Ty
, X
, ShAmt
).getReg(0);
7258 auto ShY1
= MIRBuilder
.buildLShr(Ty
, Y
, One
);
7259 ShY
= MIRBuilder
.buildLShr(Ty
, ShY1
, InvShAmt
).getReg(0);
7261 auto ShX1
= MIRBuilder
.buildShl(Ty
, X
, One
);
7262 ShX
= MIRBuilder
.buildShl(Ty
, ShX1
, InvShAmt
).getReg(0);
7263 ShY
= MIRBuilder
.buildLShr(Ty
, Y
, ShAmt
).getReg(0);
7267 MIRBuilder
.buildOr(Dst
, ShX
, ShY
);
7268 MI
.eraseFromParent();
7272 LegalizerHelper::LegalizeResult
7273 LegalizerHelper::lowerFunnelShift(MachineInstr
&MI
) {
7274 // These operations approximately do the following (while avoiding undefined
7276 // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
7277 // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
7278 Register Dst
= MI
.getOperand(0).getReg();
7279 LLT Ty
= MRI
.getType(Dst
);
7280 LLT ShTy
= MRI
.getType(MI
.getOperand(3).getReg());
7282 bool IsFSHL
= MI
.getOpcode() == TargetOpcode::G_FSHL
;
7283 unsigned RevOpcode
= IsFSHL
? TargetOpcode::G_FSHR
: TargetOpcode::G_FSHL
;
7285 // TODO: Use smarter heuristic that accounts for vector legalization.
7286 if (LI
.getAction({RevOpcode
, {Ty
, ShTy
}}).Action
== Lower
)
7287 return lowerFunnelShiftAsShifts(MI
);
7289 // This only works for powers of 2, fallback to shifts if it fails.
7290 LegalizerHelper::LegalizeResult Result
= lowerFunnelShiftWithInverse(MI
);
7291 if (Result
== UnableToLegalize
)
7292 return lowerFunnelShiftAsShifts(MI
);
7296 LegalizerHelper::LegalizeResult
LegalizerHelper::lowerEXT(MachineInstr
&MI
) {
7297 auto [Dst
, Src
] = MI
.getFirst2Regs();
7298 LLT DstTy
= MRI
.getType(Dst
);
7299 LLT SrcTy
= MRI
.getType(Src
);
7301 uint32_t DstTySize
= DstTy
.getSizeInBits();
7302 uint32_t DstTyScalarSize
= DstTy
.getScalarSizeInBits();
7303 uint32_t SrcTyScalarSize
= SrcTy
.getScalarSizeInBits();
7305 if (!isPowerOf2_32(DstTySize
) || !isPowerOf2_32(DstTyScalarSize
) ||
7306 !isPowerOf2_32(SrcTyScalarSize
))
7307 return UnableToLegalize
;
7309 // The step between extend is too large, split it by creating an intermediate
7310 // extend instruction
7311 if (SrcTyScalarSize
* 2 < DstTyScalarSize
) {
7312 LLT MidTy
= SrcTy
.changeElementSize(SrcTyScalarSize
* 2);
7313 // If the destination type is illegal, split it into multiple statements
7314 // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
7315 auto NewExt
= MIRBuilder
.buildInstr(MI
.getOpcode(), {MidTy
}, {Src
});
7316 // Unmerge the vector
7317 LLT EltTy
= MidTy
.changeElementCount(
7318 MidTy
.getElementCount().divideCoefficientBy(2));
7319 auto UnmergeSrc
= MIRBuilder
.buildUnmerge(EltTy
, NewExt
);
7322 LLT ZExtResTy
= DstTy
.changeElementCount(
7323 DstTy
.getElementCount().divideCoefficientBy(2));
7324 auto ZExtRes1
= MIRBuilder
.buildInstr(MI
.getOpcode(), {ZExtResTy
},
7325 {UnmergeSrc
.getReg(0)});
7326 auto ZExtRes2
= MIRBuilder
.buildInstr(MI
.getOpcode(), {ZExtResTy
},
7327 {UnmergeSrc
.getReg(1)});
7329 // Merge the ending vectors
7330 MIRBuilder
.buildMergeLikeInstr(Dst
, {ZExtRes1
, ZExtRes2
});
7332 MI
.eraseFromParent();
7335 return UnableToLegalize
;
7338 LegalizerHelper::LegalizeResult
LegalizerHelper::lowerTRUNC(MachineInstr
&MI
) {
7339 // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
7340 MachineRegisterInfo
&MRI
= *MIRBuilder
.getMRI();
7341 // Similar to how operand splitting is done in SelectiondDAG, we can handle
7342 // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
7343 // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
7344 // %lo16(<4 x s16>) = G_TRUNC %inlo
7345 // %hi16(<4 x s16>) = G_TRUNC %inhi
7346 // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
7347 // %res(<8 x s8>) = G_TRUNC %in16
7349 assert(MI
.getOpcode() == TargetOpcode::G_TRUNC
);
7351 Register DstReg
= MI
.getOperand(0).getReg();
7352 Register SrcReg
= MI
.getOperand(1).getReg();
7353 LLT DstTy
= MRI
.getType(DstReg
);
7354 LLT SrcTy
= MRI
.getType(SrcReg
);
7356 if (DstTy
.isVector() && isPowerOf2_32(DstTy
.getNumElements()) &&
7357 isPowerOf2_32(DstTy
.getScalarSizeInBits()) &&
7358 isPowerOf2_32(SrcTy
.getNumElements()) &&
7359 isPowerOf2_32(SrcTy
.getScalarSizeInBits())) {
7360 // Split input type.
7361 LLT SplitSrcTy
= SrcTy
.changeElementCount(
7362 SrcTy
.getElementCount().divideCoefficientBy(2));
7364 // First, split the source into two smaller vectors.
7365 SmallVector
<Register
, 2> SplitSrcs
;
7366 extractParts(SrcReg
, SplitSrcTy
, 2, SplitSrcs
, MIRBuilder
, MRI
);
7368 // Truncate the splits into intermediate narrower elements.
7370 if (DstTy
.getScalarSizeInBits() * 2 < SrcTy
.getScalarSizeInBits())
7371 InterTy
= SplitSrcTy
.changeElementSize(DstTy
.getScalarSizeInBits() * 2);
7373 InterTy
= SplitSrcTy
.changeElementSize(DstTy
.getScalarSizeInBits());
7374 for (unsigned I
= 0; I
< SplitSrcs
.size(); ++I
) {
7375 SplitSrcs
[I
] = MIRBuilder
.buildTrunc(InterTy
, SplitSrcs
[I
]).getReg(0);
7378 // Combine the new truncates into one vector
7379 auto Merge
= MIRBuilder
.buildMergeLikeInstr(
7380 DstTy
.changeElementSize(InterTy
.getScalarSizeInBits()), SplitSrcs
);
7382 // Truncate the new vector to the final result type
7383 if (DstTy
.getScalarSizeInBits() * 2 < SrcTy
.getScalarSizeInBits())
7384 MIRBuilder
.buildTrunc(MI
.getOperand(0).getReg(), Merge
.getReg(0));
7386 MIRBuilder
.buildCopy(MI
.getOperand(0).getReg(), Merge
.getReg(0));
7388 MI
.eraseFromParent();
7392 return UnableToLegalize
;
7395 LegalizerHelper::LegalizeResult
7396 LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr
&MI
) {
7397 auto [Dst
, DstTy
, Src
, SrcTy
, Amt
, AmtTy
] = MI
.getFirst3RegLLTs();
7398 auto Zero
= MIRBuilder
.buildConstant(AmtTy
, 0);
7399 bool IsLeft
= MI
.getOpcode() == TargetOpcode::G_ROTL
;
7400 unsigned RevRot
= IsLeft
? TargetOpcode::G_ROTR
: TargetOpcode::G_ROTL
;
7401 auto Neg
= MIRBuilder
.buildSub(AmtTy
, Zero
, Amt
);
7402 MIRBuilder
.buildInstr(RevRot
, {Dst
}, {Src
, Neg
});
7403 MI
.eraseFromParent();
7407 LegalizerHelper::LegalizeResult
LegalizerHelper::lowerRotate(MachineInstr
&MI
) {
7408 auto [Dst
, DstTy
, Src
, SrcTy
, Amt
, AmtTy
] = MI
.getFirst3RegLLTs();
7410 unsigned EltSizeInBits
= DstTy
.getScalarSizeInBits();
7411 bool IsLeft
= MI
.getOpcode() == TargetOpcode::G_ROTL
;
7413 MIRBuilder
.setInstrAndDebugLoc(MI
);
7415 // If a rotate in the other direction is supported, use it.
7416 unsigned RevRot
= IsLeft
? TargetOpcode::G_ROTR
: TargetOpcode::G_ROTL
;
7417 if (LI
.isLegalOrCustom({RevRot
, {DstTy
, SrcTy
}}) &&
7418 isPowerOf2_32(EltSizeInBits
))
7419 return lowerRotateWithReverseRotate(MI
);
7421 // If a funnel shift is supported, use it.
7422 unsigned FShOpc
= IsLeft
? TargetOpcode::G_FSHL
: TargetOpcode::G_FSHR
;
7423 unsigned RevFsh
= !IsLeft
? TargetOpcode::G_FSHL
: TargetOpcode::G_FSHR
;
7424 bool IsFShLegal
= false;
7425 if ((IsFShLegal
= LI
.isLegalOrCustom({FShOpc
, {DstTy
, AmtTy
}})) ||
7426 LI
.isLegalOrCustom({RevFsh
, {DstTy
, AmtTy
}})) {
7427 auto buildFunnelShift
= [&](unsigned Opc
, Register R1
, Register R2
,
7429 MIRBuilder
.buildInstr(Opc
, {R1
}, {R2
, R2
, R3
});
7430 MI
.eraseFromParent();
7433 // If a funnel shift in the other direction is supported, use it.
7435 return buildFunnelShift(FShOpc
, Dst
, Src
, Amt
);
7436 } else if (isPowerOf2_32(EltSizeInBits
)) {
7437 Amt
= MIRBuilder
.buildNeg(DstTy
, Amt
).getReg(0);
7438 return buildFunnelShift(RevFsh
, Dst
, Src
, Amt
);
7442 auto Zero
= MIRBuilder
.buildConstant(AmtTy
, 0);
7443 unsigned ShOpc
= IsLeft
? TargetOpcode::G_SHL
: TargetOpcode::G_LSHR
;
7444 unsigned RevShiftOpc
= IsLeft
? TargetOpcode::G_LSHR
: TargetOpcode::G_SHL
;
7445 auto BitWidthMinusOneC
= MIRBuilder
.buildConstant(AmtTy
, EltSizeInBits
- 1);
7447 Register RevShiftVal
;
7448 if (isPowerOf2_32(EltSizeInBits
)) {
7449 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
7450 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
7451 auto NegAmt
= MIRBuilder
.buildSub(AmtTy
, Zero
, Amt
);
7452 auto ShAmt
= MIRBuilder
.buildAnd(AmtTy
, Amt
, BitWidthMinusOneC
);
7453 ShVal
= MIRBuilder
.buildInstr(ShOpc
, {DstTy
}, {Src
, ShAmt
}).getReg(0);
7454 auto RevAmt
= MIRBuilder
.buildAnd(AmtTy
, NegAmt
, BitWidthMinusOneC
);
7456 MIRBuilder
.buildInstr(RevShiftOpc
, {DstTy
}, {Src
, RevAmt
}).getReg(0);
7458 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
7459 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
7460 auto BitWidthC
= MIRBuilder
.buildConstant(AmtTy
, EltSizeInBits
);
7461 auto ShAmt
= MIRBuilder
.buildURem(AmtTy
, Amt
, BitWidthC
);
7462 ShVal
= MIRBuilder
.buildInstr(ShOpc
, {DstTy
}, {Src
, ShAmt
}).getReg(0);
7463 auto RevAmt
= MIRBuilder
.buildSub(AmtTy
, BitWidthMinusOneC
, ShAmt
);
7464 auto One
= MIRBuilder
.buildConstant(AmtTy
, 1);
7465 auto Inner
= MIRBuilder
.buildInstr(RevShiftOpc
, {DstTy
}, {Src
, One
});
7467 MIRBuilder
.buildInstr(RevShiftOpc
, {DstTy
}, {Inner
, RevAmt
}).getReg(0);
7469 MIRBuilder
.buildOr(Dst
, ShVal
, RevShiftVal
);
7470 MI
.eraseFromParent();
7474 // Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
7476 LegalizerHelper::LegalizeResult
7477 LegalizerHelper::lowerU64ToF32BitOps(MachineInstr
&MI
) {
7478 auto [Dst
, Src
] = MI
.getFirst2Regs();
7479 const LLT S64
= LLT::scalar(64);
7480 const LLT S32
= LLT::scalar(32);
7481 const LLT S1
= LLT::scalar(1);
7483 assert(MRI
.getType(Src
) == S64
&& MRI
.getType(Dst
) == S32
);
7485 // unsigned cul2f(ulong u) {
7486 // uint lz = clz(u);
7487 // uint e = (u != 0) ? 127U + 63U - lz : 0;
7488 // u = (u << lz) & 0x7fffffffffffffffUL;
7489 // ulong t = u & 0xffffffffffUL;
7490 // uint v = (e << 23) | (uint)(u >> 40);
7491 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
7492 // return as_float(v + r);
7495 auto Zero32
= MIRBuilder
.buildConstant(S32
, 0);
7496 auto Zero64
= MIRBuilder
.buildConstant(S64
, 0);
7498 auto LZ
= MIRBuilder
.buildCTLZ_ZERO_UNDEF(S32
, Src
);
7500 auto K
= MIRBuilder
.buildConstant(S32
, 127U + 63U);
7501 auto Sub
= MIRBuilder
.buildSub(S32
, K
, LZ
);
7503 auto NotZero
= MIRBuilder
.buildICmp(CmpInst::ICMP_NE
, S1
, Src
, Zero64
);
7504 auto E
= MIRBuilder
.buildSelect(S32
, NotZero
, Sub
, Zero32
);
7506 auto Mask0
= MIRBuilder
.buildConstant(S64
, (-1ULL) >> 1);
7507 auto ShlLZ
= MIRBuilder
.buildShl(S64
, Src
, LZ
);
7509 auto U
= MIRBuilder
.buildAnd(S64
, ShlLZ
, Mask0
);
7511 auto Mask1
= MIRBuilder
.buildConstant(S64
, 0xffffffffffULL
);
7512 auto T
= MIRBuilder
.buildAnd(S64
, U
, Mask1
);
7514 auto UShl
= MIRBuilder
.buildLShr(S64
, U
, MIRBuilder
.buildConstant(S64
, 40));
7515 auto ShlE
= MIRBuilder
.buildShl(S32
, E
, MIRBuilder
.buildConstant(S32
, 23));
7516 auto V
= MIRBuilder
.buildOr(S32
, ShlE
, MIRBuilder
.buildTrunc(S32
, UShl
));
7518 auto C
= MIRBuilder
.buildConstant(S64
, 0x8000000000ULL
);
7519 auto RCmp
= MIRBuilder
.buildICmp(CmpInst::ICMP_UGT
, S1
, T
, C
);
7520 auto TCmp
= MIRBuilder
.buildICmp(CmpInst::ICMP_EQ
, S1
, T
, C
);
7521 auto One
= MIRBuilder
.buildConstant(S32
, 1);
7523 auto VTrunc1
= MIRBuilder
.buildAnd(S32
, V
, One
);
7524 auto Select0
= MIRBuilder
.buildSelect(S32
, TCmp
, VTrunc1
, Zero32
);
7525 auto R
= MIRBuilder
.buildSelect(S32
, RCmp
, One
, Select0
);
7526 MIRBuilder
.buildAdd(Dst
, V
, R
);
7528 MI
.eraseFromParent();
7532 // Expand s32 = G_UITOFP s64 to an IEEE float representation using bit
7533 // operations and G_SITOFP
7534 LegalizerHelper::LegalizeResult
7535 LegalizerHelper::lowerU64ToF32WithSITOFP(MachineInstr
&MI
) {
7536 auto [Dst
, Src
] = MI
.getFirst2Regs();
7537 const LLT S64
= LLT::scalar(64);
7538 const LLT S32
= LLT::scalar(32);
7539 const LLT S1
= LLT::scalar(1);
7541 assert(MRI
.getType(Src
) == S64
&& MRI
.getType(Dst
) == S32
);
7543 // For i64 < INT_MAX we simply reuse SITOFP.
7544 // Otherwise, divide i64 by 2, round result by ORing with the lowest bit
7545 // saved before division, convert to float by SITOFP, multiply the result
7547 auto One
= MIRBuilder
.buildConstant(S64
, 1);
7548 auto Zero
= MIRBuilder
.buildConstant(S64
, 0);
7549 // Result if Src < INT_MAX
7550 auto SmallResult
= MIRBuilder
.buildSITOFP(S32
, Src
);
7551 // Result if Src >= INT_MAX
7552 auto Halved
= MIRBuilder
.buildLShr(S64
, Src
, One
);
7553 auto LowerBit
= MIRBuilder
.buildAnd(S64
, Src
, One
);
7554 auto RoundedHalved
= MIRBuilder
.buildOr(S64
, Halved
, LowerBit
);
7555 auto HalvedFP
= MIRBuilder
.buildSITOFP(S32
, RoundedHalved
);
7556 auto LargeResult
= MIRBuilder
.buildFAdd(S32
, HalvedFP
, HalvedFP
);
7557 // Check if the original value is larger than INT_MAX by comparing with
7558 // zero to pick one of the two conversions.
7560 MIRBuilder
.buildICmp(CmpInst::Predicate::ICMP_SLT
, S1
, Src
, Zero
);
7561 MIRBuilder
.buildSelect(Dst
, IsLarge
, LargeResult
, SmallResult
);
7563 MI
.eraseFromParent();
7567 // Expand s64 = G_UITOFP s64 using bit and float arithmetic operations to an
7568 // IEEE double representation.
7569 LegalizerHelper::LegalizeResult
7570 LegalizerHelper::lowerU64ToF64BitFloatOps(MachineInstr
&MI
) {
7571 auto [Dst
, Src
] = MI
.getFirst2Regs();
7572 const LLT S64
= LLT::scalar(64);
7573 const LLT S32
= LLT::scalar(32);
7575 assert(MRI
.getType(Src
) == S64
&& MRI
.getType(Dst
) == S64
);
7577 // We create double value from 32 bit parts with 32 exponent difference.
7578 // Note that + and - are float operations that adjust the implicit leading
7579 // one, the bases 2^52 and 2^84 are for illustrative purposes.
7581 // X = 2^52 * 1.0...LowBits
7582 // Y = 2^84 * 1.0...HighBits
7583 // Scratch = 2^84 * 1.0...HighBits - 2^84 * 1.0 - 2^52 * 1.0
7584 // = - 2^52 * 1.0...HighBits
7585 // Result = - 2^52 * 1.0...HighBits + 2^52 * 1.0...LowBits
7586 auto TwoP52
= MIRBuilder
.buildConstant(S64
, UINT64_C(0x4330000000000000));
7587 auto TwoP84
= MIRBuilder
.buildConstant(S64
, UINT64_C(0x4530000000000000));
7588 auto TwoP52P84
= llvm::bit_cast
<double>(UINT64_C(0x4530000000100000));
7589 auto TwoP52P84FP
= MIRBuilder
.buildFConstant(S64
, TwoP52P84
);
7590 auto HalfWidth
= MIRBuilder
.buildConstant(S64
, 32);
7592 auto LowBits
= MIRBuilder
.buildTrunc(S32
, Src
);
7593 LowBits
= MIRBuilder
.buildZExt(S64
, LowBits
);
7594 auto LowBitsFP
= MIRBuilder
.buildOr(S64
, TwoP52
, LowBits
);
7595 auto HighBits
= MIRBuilder
.buildLShr(S64
, Src
, HalfWidth
);
7596 auto HighBitsFP
= MIRBuilder
.buildOr(S64
, TwoP84
, HighBits
);
7597 auto Scratch
= MIRBuilder
.buildFSub(S64
, HighBitsFP
, TwoP52P84FP
);
7598 MIRBuilder
.buildFAdd(Dst
, Scratch
, LowBitsFP
);
7600 MI
.eraseFromParent();
7604 LegalizerHelper::LegalizeResult
LegalizerHelper::lowerUITOFP(MachineInstr
&MI
) {
7605 auto [Dst
, DstTy
, Src
, SrcTy
] = MI
.getFirst2RegLLTs();
7607 if (SrcTy
== LLT::scalar(1)) {
7608 auto True
= MIRBuilder
.buildFConstant(DstTy
, 1.0);
7609 auto False
= MIRBuilder
.buildFConstant(DstTy
, 0.0);
7610 MIRBuilder
.buildSelect(Dst
, Src
, True
, False
);
7611 MI
.eraseFromParent();
7615 if (SrcTy
!= LLT::scalar(64))
7616 return UnableToLegalize
;
7618 if (DstTy
== LLT::scalar(32))
7619 // TODO: SelectionDAG has several alternative expansions to port which may
7620 // be more reasonable depending on the available instructions. We also need
7621 // a more advanced mechanism to choose an optimal version depending on
7622 // target features such as sitofp or CTLZ availability.
7623 return lowerU64ToF32WithSITOFP(MI
);
7625 if (DstTy
== LLT::scalar(64))
7626 return lowerU64ToF64BitFloatOps(MI
);
7628 return UnableToLegalize
;
7631 LegalizerHelper::LegalizeResult
LegalizerHelper::lowerSITOFP(MachineInstr
&MI
) {
7632 auto [Dst
, DstTy
, Src
, SrcTy
] = MI
.getFirst2RegLLTs();
7634 const LLT S64
= LLT::scalar(64);
7635 const LLT S32
= LLT::scalar(32);
7636 const LLT S1
= LLT::scalar(1);
7639 auto True
= MIRBuilder
.buildFConstant(DstTy
, -1.0);
7640 auto False
= MIRBuilder
.buildFConstant(DstTy
, 0.0);
7641 MIRBuilder
.buildSelect(Dst
, Src
, True
, False
);
7642 MI
.eraseFromParent();
7647 return UnableToLegalize
;
7650 // signed cl2f(long l) {
7651 // long s = l >> 63;
7652 // float r = cul2f((l + s) ^ s);
7653 // return s ? -r : r;
7656 auto SignBit
= MIRBuilder
.buildConstant(S64
, 63);
7657 auto S
= MIRBuilder
.buildAShr(S64
, L
, SignBit
);
7659 auto LPlusS
= MIRBuilder
.buildAdd(S64
, L
, S
);
7660 auto Xor
= MIRBuilder
.buildXor(S64
, LPlusS
, S
);
7661 auto R
= MIRBuilder
.buildUITOFP(S32
, Xor
);
7663 auto RNeg
= MIRBuilder
.buildFNeg(S32
, R
);
7664 auto SignNotZero
= MIRBuilder
.buildICmp(CmpInst::ICMP_NE
, S1
, S
,
7665 MIRBuilder
.buildConstant(S64
, 0));
7666 MIRBuilder
.buildSelect(Dst
, SignNotZero
, RNeg
, R
);
7667 MI
.eraseFromParent();
7671 return UnableToLegalize
;
7674 LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFPTOUI(MachineInstr
&MI
) {
7675 auto [Dst
, DstTy
, Src
, SrcTy
] = MI
.getFirst2RegLLTs();
7676 const LLT S64
= LLT::scalar(64);
7677 const LLT S32
= LLT::scalar(32);
7679 if (SrcTy
!= S64
&& SrcTy
!= S32
)
7680 return UnableToLegalize
;
7681 if (DstTy
!= S32
&& DstTy
!= S64
)
7682 return UnableToLegalize
;
7684 // FPTOSI gives same result as FPTOUI for positive signed integers.
7685 // FPTOUI needs to deal with fp values that convert to unsigned integers
7686 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
7688 APInt TwoPExpInt
= APInt::getSignMask(DstTy
.getSizeInBits());
7689 APFloat
TwoPExpFP(SrcTy
.getSizeInBits() == 32 ? APFloat::IEEEsingle()
7690 : APFloat::IEEEdouble(),
7691 APInt::getZero(SrcTy
.getSizeInBits()));
7692 TwoPExpFP
.convertFromAPInt(TwoPExpInt
, false, APFloat::rmNearestTiesToEven
);
7694 MachineInstrBuilder FPTOSI
= MIRBuilder
.buildFPTOSI(DstTy
, Src
);
7696 MachineInstrBuilder Threshold
= MIRBuilder
.buildFConstant(SrcTy
, TwoPExpFP
);
7697 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
7698 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
7699 MachineInstrBuilder FSub
= MIRBuilder
.buildFSub(SrcTy
, Src
, Threshold
);
7700 MachineInstrBuilder ResLowBits
= MIRBuilder
.buildFPTOSI(DstTy
, FSub
);
7701 MachineInstrBuilder ResHighBit
= MIRBuilder
.buildConstant(DstTy
, TwoPExpInt
);
7702 MachineInstrBuilder Res
= MIRBuilder
.buildXor(DstTy
, ResLowBits
, ResHighBit
);
7704 const LLT S1
= LLT::scalar(1);
7706 MachineInstrBuilder FCMP
=
7707 MIRBuilder
.buildFCmp(CmpInst::FCMP_ULT
, S1
, Src
, Threshold
);
7708 MIRBuilder
.buildSelect(Dst
, FCMP
, FPTOSI
, Res
);
7710 MI
.eraseFromParent();
7714 LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFPTOSI(MachineInstr
&MI
) {
7715 auto [Dst
, DstTy
, Src
, SrcTy
] = MI
.getFirst2RegLLTs();
7716 const LLT S64
= LLT::scalar(64);
7717 const LLT S32
= LLT::scalar(32);
7719 // FIXME: Only f32 to i64 conversions are supported.
7720 if (SrcTy
.getScalarType() != S32
|| DstTy
.getScalarType() != S64
)
7721 return UnableToLegalize
;
7723 // Expand f32 -> i64 conversion
7724 // This algorithm comes from compiler-rt's implementation of fixsfdi:
7725 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
7727 unsigned SrcEltBits
= SrcTy
.getScalarSizeInBits();
7729 auto ExponentMask
= MIRBuilder
.buildConstant(SrcTy
, 0x7F800000);
7730 auto ExponentLoBit
= MIRBuilder
.buildConstant(SrcTy
, 23);
7732 auto AndExpMask
= MIRBuilder
.buildAnd(SrcTy
, Src
, ExponentMask
);
7733 auto ExponentBits
= MIRBuilder
.buildLShr(SrcTy
, AndExpMask
, ExponentLoBit
);
7735 auto SignMask
= MIRBuilder
.buildConstant(SrcTy
,
7736 APInt::getSignMask(SrcEltBits
));
7737 auto AndSignMask
= MIRBuilder
.buildAnd(SrcTy
, Src
, SignMask
);
7738 auto SignLowBit
= MIRBuilder
.buildConstant(SrcTy
, SrcEltBits
- 1);
7739 auto Sign
= MIRBuilder
.buildAShr(SrcTy
, AndSignMask
, SignLowBit
);
7740 Sign
= MIRBuilder
.buildSExt(DstTy
, Sign
);
7742 auto MantissaMask
= MIRBuilder
.buildConstant(SrcTy
, 0x007FFFFF);
7743 auto AndMantissaMask
= MIRBuilder
.buildAnd(SrcTy
, Src
, MantissaMask
);
7744 auto K
= MIRBuilder
.buildConstant(SrcTy
, 0x00800000);
7746 auto R
= MIRBuilder
.buildOr(SrcTy
, AndMantissaMask
, K
);
7747 R
= MIRBuilder
.buildZExt(DstTy
, R
);
7749 auto Bias
= MIRBuilder
.buildConstant(SrcTy
, 127);
7750 auto Exponent
= MIRBuilder
.buildSub(SrcTy
, ExponentBits
, Bias
);
7751 auto SubExponent
= MIRBuilder
.buildSub(SrcTy
, Exponent
, ExponentLoBit
);
7752 auto ExponentSub
= MIRBuilder
.buildSub(SrcTy
, ExponentLoBit
, Exponent
);
7754 auto Shl
= MIRBuilder
.buildShl(DstTy
, R
, SubExponent
);
7755 auto Srl
= MIRBuilder
.buildLShr(DstTy
, R
, ExponentSub
);
7757 const LLT S1
= LLT::scalar(1);
7758 auto CmpGt
= MIRBuilder
.buildICmp(CmpInst::ICMP_SGT
,
7759 S1
, Exponent
, ExponentLoBit
);
7761 R
= MIRBuilder
.buildSelect(DstTy
, CmpGt
, Shl
, Srl
);
7763 auto XorSign
= MIRBuilder
.buildXor(DstTy
, R
, Sign
);
7764 auto Ret
= MIRBuilder
.buildSub(DstTy
, XorSign
, Sign
);
7766 auto ZeroSrcTy
= MIRBuilder
.buildConstant(SrcTy
, 0);
7768 auto ExponentLt0
= MIRBuilder
.buildICmp(CmpInst::ICMP_SLT
,
7769 S1
, Exponent
, ZeroSrcTy
);
7771 auto ZeroDstTy
= MIRBuilder
.buildConstant(DstTy
, 0);
7772 MIRBuilder
.buildSelect(Dst
, ExponentLt0
, ZeroDstTy
, Ret
);
7774 MI
.eraseFromParent();
7778 LegalizerHelper::LegalizeResult
7779 LegalizerHelper::lowerFPTOINT_SAT(MachineInstr
&MI
) {
7780 auto [Dst
, DstTy
, Src
, SrcTy
] = MI
.getFirst2RegLLTs();
7782 bool IsSigned
= MI
.getOpcode() == TargetOpcode::G_FPTOSI_SAT
;
7783 unsigned SatWidth
= DstTy
.getScalarSizeInBits();
7785 // Determine minimum and maximum integer values and their corresponding
7786 // floating-point values.
7787 APInt MinInt
, MaxInt
;
7789 MinInt
= APInt::getSignedMinValue(SatWidth
);
7790 MaxInt
= APInt::getSignedMaxValue(SatWidth
);
7792 MinInt
= APInt::getMinValue(SatWidth
);
7793 MaxInt
= APInt::getMaxValue(SatWidth
);
7796 const fltSemantics
&Semantics
= getFltSemanticForLLT(SrcTy
.getScalarType());
7797 APFloat
MinFloat(Semantics
);
7798 APFloat
MaxFloat(Semantics
);
7800 APFloat::opStatus MinStatus
=
7801 MinFloat
.convertFromAPInt(MinInt
, IsSigned
, APFloat::rmTowardZero
);
7802 APFloat::opStatus MaxStatus
=
7803 MaxFloat
.convertFromAPInt(MaxInt
, IsSigned
, APFloat::rmTowardZero
);
7804 bool AreExactFloatBounds
= !(MinStatus
& APFloat::opStatus::opInexact
) &&
7805 !(MaxStatus
& APFloat::opStatus::opInexact
);
7807 // If the integer bounds are exactly representable as floats, emit a
7808 // min+max+fptoi sequence. Otherwise we have to use a sequence of comparisons
7810 if (AreExactFloatBounds
) {
7811 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
7812 auto MaxC
= MIRBuilder
.buildFConstant(SrcTy
, MinFloat
);
7813 auto MaxP
= MIRBuilder
.buildFCmp(CmpInst::FCMP_ULT
,
7814 SrcTy
.changeElementSize(1), Src
, MaxC
);
7815 auto Max
= MIRBuilder
.buildSelect(SrcTy
, MaxP
, Src
, MaxC
);
7816 // Clamp by MaxFloat from above. NaN cannot occur.
7817 auto MinC
= MIRBuilder
.buildFConstant(SrcTy
, MaxFloat
);
7819 MIRBuilder
.buildFCmp(CmpInst::FCMP_OGT
, SrcTy
.changeElementSize(1), Max
,
7820 MinC
, MachineInstr::FmNoNans
);
7822 MIRBuilder
.buildSelect(SrcTy
, MinP
, Max
, MinC
, MachineInstr::FmNoNans
);
7823 // Convert clamped value to integer. In the unsigned case we're done,
7824 // because we mapped NaN to MinFloat, which will cast to zero.
7826 MIRBuilder
.buildFPTOUI(Dst
, Min
);
7827 MI
.eraseFromParent();
7831 // Otherwise, select 0 if Src is NaN.
7832 auto FpToInt
= MIRBuilder
.buildFPTOSI(DstTy
, Min
);
7833 auto IsZero
= MIRBuilder
.buildFCmp(CmpInst::FCMP_UNO
,
7834 DstTy
.changeElementSize(1), Src
, Src
);
7835 MIRBuilder
.buildSelect(Dst
, IsZero
, MIRBuilder
.buildConstant(DstTy
, 0),
7837 MI
.eraseFromParent();
7841 // Result of direct conversion. The assumption here is that the operation is
7842 // non-trapping and it's fine to apply it to an out-of-range value if we
7843 // select it away later.
7844 auto FpToInt
= IsSigned
? MIRBuilder
.buildFPTOSI(DstTy
, Src
)
7845 : MIRBuilder
.buildFPTOUI(DstTy
, Src
);
7847 // If Src ULT MinFloat, select MinInt. In particular, this also selects
7848 // MinInt if Src is NaN.
7850 MIRBuilder
.buildFCmp(CmpInst::FCMP_ULT
, SrcTy
.changeElementSize(1), Src
,
7851 MIRBuilder
.buildFConstant(SrcTy
, MinFloat
));
7852 auto Max
= MIRBuilder
.buildSelect(
7853 DstTy
, ULT
, MIRBuilder
.buildConstant(DstTy
, MinInt
), FpToInt
);
7854 // If Src OGT MaxFloat, select MaxInt.
7856 MIRBuilder
.buildFCmp(CmpInst::FCMP_OGT
, SrcTy
.changeElementSize(1), Src
,
7857 MIRBuilder
.buildFConstant(SrcTy
, MaxFloat
));
7859 // In the unsigned case we are done, because we mapped NaN to MinInt, which
7862 MIRBuilder
.buildSelect(Dst
, OGT
, MIRBuilder
.buildConstant(DstTy
, MaxInt
),
7864 MI
.eraseFromParent();
7868 // Otherwise, select 0 if Src is NaN.
7869 auto Min
= MIRBuilder
.buildSelect(
7870 DstTy
, OGT
, MIRBuilder
.buildConstant(DstTy
, MaxInt
), Max
);
7871 auto IsZero
= MIRBuilder
.buildFCmp(CmpInst::FCMP_UNO
,
7872 DstTy
.changeElementSize(1), Src
, Src
);
7873 MIRBuilder
.buildSelect(Dst
, IsZero
, MIRBuilder
.buildConstant(DstTy
, 0), Min
);
7874 MI
.eraseFromParent();
7878 // f64 -> f16 conversion using round-to-nearest-even rounding mode.
7879 LegalizerHelper::LegalizeResult
7880 LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr
&MI
) {
7881 const LLT S1
= LLT::scalar(1);
7882 const LLT S32
= LLT::scalar(32);
7884 auto [Dst
, Src
] = MI
.getFirst2Regs();
7885 assert(MRI
.getType(Dst
).getScalarType() == LLT::scalar(16) &&
7886 MRI
.getType(Src
).getScalarType() == LLT::scalar(64));
7888 if (MRI
.getType(Src
).isVector()) // TODO: Handle vectors directly.
7889 return UnableToLegalize
;
7891 if (MIRBuilder
.getMF().getTarget().Options
.UnsafeFPMath
) {
7892 unsigned Flags
= MI
.getFlags();
7893 auto Src32
= MIRBuilder
.buildFPTrunc(S32
, Src
, Flags
);
7894 MIRBuilder
.buildFPTrunc(Dst
, Src32
, Flags
);
7895 MI
.eraseFromParent();
7899 const unsigned ExpMask
= 0x7ff;
7900 const unsigned ExpBiasf64
= 1023;
7901 const unsigned ExpBiasf16
= 15;
7903 auto Unmerge
= MIRBuilder
.buildUnmerge(S32
, Src
);
7904 Register U
= Unmerge
.getReg(0);
7905 Register UH
= Unmerge
.getReg(1);
7907 auto E
= MIRBuilder
.buildLShr(S32
, UH
, MIRBuilder
.buildConstant(S32
, 20));
7908 E
= MIRBuilder
.buildAnd(S32
, E
, MIRBuilder
.buildConstant(S32
, ExpMask
));
7910 // Subtract the fp64 exponent bias (1023) to get the real exponent and
7911 // add the f16 bias (15) to get the biased exponent for the f16 format.
7912 E
= MIRBuilder
.buildAdd(
7913 S32
, E
, MIRBuilder
.buildConstant(S32
, -ExpBiasf64
+ ExpBiasf16
));
7915 auto M
= MIRBuilder
.buildLShr(S32
, UH
, MIRBuilder
.buildConstant(S32
, 8));
7916 M
= MIRBuilder
.buildAnd(S32
, M
, MIRBuilder
.buildConstant(S32
, 0xffe));
7918 auto MaskedSig
= MIRBuilder
.buildAnd(S32
, UH
,
7919 MIRBuilder
.buildConstant(S32
, 0x1ff));
7920 MaskedSig
= MIRBuilder
.buildOr(S32
, MaskedSig
, U
);
7922 auto Zero
= MIRBuilder
.buildConstant(S32
, 0);
7923 auto SigCmpNE0
= MIRBuilder
.buildICmp(CmpInst::ICMP_NE
, S1
, MaskedSig
, Zero
);
7924 auto Lo40Set
= MIRBuilder
.buildZExt(S32
, SigCmpNE0
);
7925 M
= MIRBuilder
.buildOr(S32
, M
, Lo40Set
);
7927 // (M != 0 ? 0x0200 : 0) | 0x7c00;
7928 auto Bits0x200
= MIRBuilder
.buildConstant(S32
, 0x0200);
7929 auto CmpM_NE0
= MIRBuilder
.buildICmp(CmpInst::ICMP_NE
, S1
, M
, Zero
);
7930 auto SelectCC
= MIRBuilder
.buildSelect(S32
, CmpM_NE0
, Bits0x200
, Zero
);
7932 auto Bits0x7c00
= MIRBuilder
.buildConstant(S32
, 0x7c00);
7933 auto I
= MIRBuilder
.buildOr(S32
, SelectCC
, Bits0x7c00
);
7935 // N = M | (E << 12);
7936 auto EShl12
= MIRBuilder
.buildShl(S32
, E
, MIRBuilder
.buildConstant(S32
, 12));
7937 auto N
= MIRBuilder
.buildOr(S32
, M
, EShl12
);
7939 // B = clamp(1-E, 0, 13);
7940 auto One
= MIRBuilder
.buildConstant(S32
, 1);
7941 auto OneSubExp
= MIRBuilder
.buildSub(S32
, One
, E
);
7942 auto B
= MIRBuilder
.buildSMax(S32
, OneSubExp
, Zero
);
7943 B
= MIRBuilder
.buildSMin(S32
, B
, MIRBuilder
.buildConstant(S32
, 13));
7945 auto SigSetHigh
= MIRBuilder
.buildOr(S32
, M
,
7946 MIRBuilder
.buildConstant(S32
, 0x1000));
7948 auto D
= MIRBuilder
.buildLShr(S32
, SigSetHigh
, B
);
7949 auto D0
= MIRBuilder
.buildShl(S32
, D
, B
);
7951 auto D0_NE_SigSetHigh
= MIRBuilder
.buildICmp(CmpInst::ICMP_NE
, S1
,
7953 auto D1
= MIRBuilder
.buildZExt(S32
, D0_NE_SigSetHigh
);
7954 D
= MIRBuilder
.buildOr(S32
, D
, D1
);
7956 auto CmpELtOne
= MIRBuilder
.buildICmp(CmpInst::ICMP_SLT
, S1
, E
, One
);
7957 auto V
= MIRBuilder
.buildSelect(S32
, CmpELtOne
, D
, N
);
7959 auto VLow3
= MIRBuilder
.buildAnd(S32
, V
, MIRBuilder
.buildConstant(S32
, 7));
7960 V
= MIRBuilder
.buildLShr(S32
, V
, MIRBuilder
.buildConstant(S32
, 2));
7962 auto VLow3Eq3
= MIRBuilder
.buildICmp(CmpInst::ICMP_EQ
, S1
, VLow3
,
7963 MIRBuilder
.buildConstant(S32
, 3));
7964 auto V0
= MIRBuilder
.buildZExt(S32
, VLow3Eq3
);
7966 auto VLow3Gt5
= MIRBuilder
.buildICmp(CmpInst::ICMP_SGT
, S1
, VLow3
,
7967 MIRBuilder
.buildConstant(S32
, 5));
7968 auto V1
= MIRBuilder
.buildZExt(S32
, VLow3Gt5
);
7970 V1
= MIRBuilder
.buildOr(S32
, V0
, V1
);
7971 V
= MIRBuilder
.buildAdd(S32
, V
, V1
);
7973 auto CmpEGt30
= MIRBuilder
.buildICmp(CmpInst::ICMP_SGT
, S1
,
7974 E
, MIRBuilder
.buildConstant(S32
, 30));
7975 V
= MIRBuilder
.buildSelect(S32
, CmpEGt30
,
7976 MIRBuilder
.buildConstant(S32
, 0x7c00), V
);
7978 auto CmpEGt1039
= MIRBuilder
.buildICmp(CmpInst::ICMP_EQ
, S1
,
7979 E
, MIRBuilder
.buildConstant(S32
, 1039));
7980 V
= MIRBuilder
.buildSelect(S32
, CmpEGt1039
, I
, V
);
7982 // Extract the sign bit.
7983 auto Sign
= MIRBuilder
.buildLShr(S32
, UH
, MIRBuilder
.buildConstant(S32
, 16));
7984 Sign
= MIRBuilder
.buildAnd(S32
, Sign
, MIRBuilder
.buildConstant(S32
, 0x8000));
7986 // Insert the sign bit
7987 V
= MIRBuilder
.buildOr(S32
, Sign
, V
);
7989 MIRBuilder
.buildTrunc(Dst
, V
);
7990 MI
.eraseFromParent();
7994 LegalizerHelper::LegalizeResult
7995 LegalizerHelper::lowerFPTRUNC(MachineInstr
&MI
) {
7996 auto [DstTy
, SrcTy
] = MI
.getFirst2LLTs();
7997 const LLT S64
= LLT::scalar(64);
7998 const LLT S16
= LLT::scalar(16);
8000 if (DstTy
.getScalarType() == S16
&& SrcTy
.getScalarType() == S64
)
8001 return lowerFPTRUNC_F64_TO_F16(MI
);
8003 return UnableToLegalize
;
8006 LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFPOWI(MachineInstr
&MI
) {
8007 auto [Dst
, Src0
, Src1
] = MI
.getFirst3Regs();
8008 LLT Ty
= MRI
.getType(Dst
);
8010 auto CvtSrc1
= MIRBuilder
.buildSITOFP(Ty
, Src1
);
8011 MIRBuilder
.buildFPow(Dst
, Src0
, CvtSrc1
, MI
.getFlags());
8012 MI
.eraseFromParent();
8016 static CmpInst::Predicate
minMaxToCompare(unsigned Opc
) {
8018 case TargetOpcode::G_SMIN
:
8019 return CmpInst::ICMP_SLT
;
8020 case TargetOpcode::G_SMAX
:
8021 return CmpInst::ICMP_SGT
;
8022 case TargetOpcode::G_UMIN
:
8023 return CmpInst::ICMP_ULT
;
8024 case TargetOpcode::G_UMAX
:
8025 return CmpInst::ICMP_UGT
;
8027 llvm_unreachable("not in integer min/max");
8031 LegalizerHelper::LegalizeResult
LegalizerHelper::lowerMinMax(MachineInstr
&MI
) {
8032 auto [Dst
, Src0
, Src1
] = MI
.getFirst3Regs();
8034 const CmpInst::Predicate Pred
= minMaxToCompare(MI
.getOpcode());
8035 LLT CmpType
= MRI
.getType(Dst
).changeElementSize(1);
8037 auto Cmp
= MIRBuilder
.buildICmp(Pred
, CmpType
, Src0
, Src1
);
8038 MIRBuilder
.buildSelect(Dst
, Cmp
, Src0
, Src1
);
8040 MI
.eraseFromParent();
8044 LegalizerHelper::LegalizeResult
8045 LegalizerHelper::lowerThreewayCompare(MachineInstr
&MI
) {
8046 GSUCmp
*Cmp
= cast
<GSUCmp
>(&MI
);
8048 Register Dst
= Cmp
->getReg(0);
8049 LLT DstTy
= MRI
.getType(Dst
);
8050 LLT SrcTy
= MRI
.getType(Cmp
->getReg(1));
8051 LLT CmpTy
= DstTy
.changeElementSize(1);
8053 CmpInst::Predicate LTPredicate
= Cmp
->isSigned()
8054 ? CmpInst::Predicate::ICMP_SLT
8055 : CmpInst::Predicate::ICMP_ULT
;
8056 CmpInst::Predicate GTPredicate
= Cmp
->isSigned()
8057 ? CmpInst::Predicate::ICMP_SGT
8058 : CmpInst::Predicate::ICMP_UGT
;
8060 auto Zero
= MIRBuilder
.buildConstant(DstTy
, 0);
8061 auto IsGT
= MIRBuilder
.buildICmp(GTPredicate
, CmpTy
, Cmp
->getLHSReg(),
8063 auto IsLT
= MIRBuilder
.buildICmp(LTPredicate
, CmpTy
, Cmp
->getLHSReg(),
8066 auto &Ctx
= MIRBuilder
.getMF().getFunction().getContext();
8067 auto BC
= TLI
.getBooleanContents(DstTy
.isVector(), /*isFP=*/false);
8068 if (TLI
.shouldExpandCmpUsingSelects(getApproximateEVTForLLT(SrcTy
, Ctx
)) ||
8069 BC
== TargetLowering::UndefinedBooleanContent
) {
8070 auto One
= MIRBuilder
.buildConstant(DstTy
, 1);
8071 auto SelectZeroOrOne
= MIRBuilder
.buildSelect(DstTy
, IsGT
, One
, Zero
);
8073 auto MinusOne
= MIRBuilder
.buildConstant(DstTy
, -1);
8074 MIRBuilder
.buildSelect(Dst
, IsLT
, MinusOne
, SelectZeroOrOne
);
8076 if (BC
== TargetLowering::ZeroOrNegativeOneBooleanContent
)
8077 std::swap(IsGT
, IsLT
);
8078 // Extend boolean results to DstTy, which is at least i2, before subtracting
8080 unsigned BoolExtOp
=
8081 MIRBuilder
.getBoolExtOp(DstTy
.isVector(), /*isFP=*/false);
8082 IsGT
= MIRBuilder
.buildInstr(BoolExtOp
, {DstTy
}, {IsGT
});
8083 IsLT
= MIRBuilder
.buildInstr(BoolExtOp
, {DstTy
}, {IsLT
});
8084 MIRBuilder
.buildSub(Dst
, IsGT
, IsLT
);
8087 MI
.eraseFromParent();
8091 LegalizerHelper::LegalizeResult
8092 LegalizerHelper::lowerFCopySign(MachineInstr
&MI
) {
8093 auto [Dst
, DstTy
, Src0
, Src0Ty
, Src1
, Src1Ty
] = MI
.getFirst3RegLLTs();
8094 const int Src0Size
= Src0Ty
.getScalarSizeInBits();
8095 const int Src1Size
= Src1Ty
.getScalarSizeInBits();
8097 auto SignBitMask
= MIRBuilder
.buildConstant(
8098 Src0Ty
, APInt::getSignMask(Src0Size
));
8100 auto NotSignBitMask
= MIRBuilder
.buildConstant(
8101 Src0Ty
, APInt::getLowBitsSet(Src0Size
, Src0Size
- 1));
8103 Register And0
= MIRBuilder
.buildAnd(Src0Ty
, Src0
, NotSignBitMask
).getReg(0);
8105 if (Src0Ty
== Src1Ty
) {
8106 And1
= MIRBuilder
.buildAnd(Src1Ty
, Src1
, SignBitMask
).getReg(0);
8107 } else if (Src0Size
> Src1Size
) {
8108 auto ShiftAmt
= MIRBuilder
.buildConstant(Src0Ty
, Src0Size
- Src1Size
);
8109 auto Zext
= MIRBuilder
.buildZExt(Src0Ty
, Src1
);
8110 auto Shift
= MIRBuilder
.buildShl(Src0Ty
, Zext
, ShiftAmt
);
8111 And1
= MIRBuilder
.buildAnd(Src0Ty
, Shift
, SignBitMask
).getReg(0);
8113 auto ShiftAmt
= MIRBuilder
.buildConstant(Src1Ty
, Src1Size
- Src0Size
);
8114 auto Shift
= MIRBuilder
.buildLShr(Src1Ty
, Src1
, ShiftAmt
);
8115 auto Trunc
= MIRBuilder
.buildTrunc(Src0Ty
, Shift
);
8116 And1
= MIRBuilder
.buildAnd(Src0Ty
, Trunc
, SignBitMask
).getReg(0);
8119 // Be careful about setting nsz/nnan/ninf on every instruction, since the
8120 // constants are a nan and -0.0, but the final result should preserve
8122 unsigned Flags
= MI
.getFlags();
8124 // We masked the sign bit and the not-sign bit, so these are disjoint.
8125 Flags
|= MachineInstr::Disjoint
;
8127 MIRBuilder
.buildOr(Dst
, And0
, And1
, Flags
);
8129 MI
.eraseFromParent();
8133 LegalizerHelper::LegalizeResult
8134 LegalizerHelper::lowerFMinNumMaxNum(MachineInstr
&MI
) {
8135 unsigned NewOp
= MI
.getOpcode() == TargetOpcode::G_FMINNUM
?
8136 TargetOpcode::G_FMINNUM_IEEE
: TargetOpcode::G_FMAXNUM_IEEE
;
8138 auto [Dst
, Src0
, Src1
] = MI
.getFirst3Regs();
8139 LLT Ty
= MRI
.getType(Dst
);
8141 if (!MI
.getFlag(MachineInstr::FmNoNans
)) {
8142 // Insert canonicalizes if it's possible we need to quiet to get correct
8145 // Note this must be done here, and not as an optimization combine in the
8146 // absence of a dedicate quiet-snan instruction as we're using an
8147 // omni-purpose G_FCANONICALIZE.
8148 if (!isKnownNeverSNaN(Src0
, MRI
))
8149 Src0
= MIRBuilder
.buildFCanonicalize(Ty
, Src0
, MI
.getFlags()).getReg(0);
8151 if (!isKnownNeverSNaN(Src1
, MRI
))
8152 Src1
= MIRBuilder
.buildFCanonicalize(Ty
, Src1
, MI
.getFlags()).getReg(0);
8155 // If there are no nans, it's safe to simply replace this with the non-IEEE
8157 MIRBuilder
.buildInstr(NewOp
, {Dst
}, {Src0
, Src1
}, MI
.getFlags());
8158 MI
.eraseFromParent();
8162 LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFMad(MachineInstr
&MI
) {
8163 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
8164 Register DstReg
= MI
.getOperand(0).getReg();
8165 LLT Ty
= MRI
.getType(DstReg
);
8166 unsigned Flags
= MI
.getFlags();
8168 auto Mul
= MIRBuilder
.buildFMul(Ty
, MI
.getOperand(1), MI
.getOperand(2),
8170 MIRBuilder
.buildFAdd(DstReg
, Mul
, MI
.getOperand(3), Flags
);
8171 MI
.eraseFromParent();
8175 LegalizerHelper::LegalizeResult
8176 LegalizerHelper::lowerIntrinsicRound(MachineInstr
&MI
) {
8177 auto [DstReg
, X
] = MI
.getFirst2Regs();
8178 const unsigned Flags
= MI
.getFlags();
8179 const LLT Ty
= MRI
.getType(DstReg
);
8180 const LLT CondTy
= Ty
.changeElementSize(1);
8185 // o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
8188 auto T
= MIRBuilder
.buildIntrinsicTrunc(Ty
, X
, Flags
);
8190 auto Diff
= MIRBuilder
.buildFSub(Ty
, X
, T
, Flags
);
8191 auto AbsDiff
= MIRBuilder
.buildFAbs(Ty
, Diff
, Flags
);
8193 auto Half
= MIRBuilder
.buildFConstant(Ty
, 0.5);
8195 MIRBuilder
.buildFCmp(CmpInst::FCMP_OGE
, CondTy
, AbsDiff
, Half
, Flags
);
8197 // Could emit G_UITOFP instead
8198 auto One
= MIRBuilder
.buildFConstant(Ty
, 1.0);
8199 auto Zero
= MIRBuilder
.buildFConstant(Ty
, 0.0);
8200 auto BoolFP
= MIRBuilder
.buildSelect(Ty
, Cmp
, One
, Zero
);
8201 auto SignedOffset
= MIRBuilder
.buildFCopysign(Ty
, BoolFP
, X
);
8203 MIRBuilder
.buildFAdd(DstReg
, T
, SignedOffset
, Flags
);
8205 MI
.eraseFromParent();
8209 LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFFloor(MachineInstr
&MI
) {
8210 auto [DstReg
, SrcReg
] = MI
.getFirst2Regs();
8211 unsigned Flags
= MI
.getFlags();
8212 LLT Ty
= MRI
.getType(DstReg
);
8213 const LLT CondTy
= Ty
.changeElementSize(1);
8215 // result = trunc(src);
8216 // if (src < 0.0 && src != result)
8219 auto Trunc
= MIRBuilder
.buildIntrinsicTrunc(Ty
, SrcReg
, Flags
);
8220 auto Zero
= MIRBuilder
.buildFConstant(Ty
, 0.0);
8222 auto Lt0
= MIRBuilder
.buildFCmp(CmpInst::FCMP_OLT
, CondTy
,
8223 SrcReg
, Zero
, Flags
);
8224 auto NeTrunc
= MIRBuilder
.buildFCmp(CmpInst::FCMP_ONE
, CondTy
,
8225 SrcReg
, Trunc
, Flags
);
8226 auto And
= MIRBuilder
.buildAnd(CondTy
, Lt0
, NeTrunc
);
8227 auto AddVal
= MIRBuilder
.buildSITOFP(Ty
, And
);
8229 MIRBuilder
.buildFAdd(DstReg
, Trunc
, AddVal
, Flags
);
8230 MI
.eraseFromParent();
8234 LegalizerHelper::LegalizeResult
8235 LegalizerHelper::lowerMergeValues(MachineInstr
&MI
) {
8236 const unsigned NumOps
= MI
.getNumOperands();
8237 auto [DstReg
, DstTy
, Src0Reg
, Src0Ty
] = MI
.getFirst2RegLLTs();
8238 unsigned PartSize
= Src0Ty
.getSizeInBits();
8240 LLT WideTy
= LLT::scalar(DstTy
.getSizeInBits());
8241 Register ResultReg
= MIRBuilder
.buildZExt(WideTy
, Src0Reg
).getReg(0);
8243 for (unsigned I
= 2; I
!= NumOps
; ++I
) {
8244 const unsigned Offset
= (I
- 1) * PartSize
;
8246 Register SrcReg
= MI
.getOperand(I
).getReg();
8247 auto ZextInput
= MIRBuilder
.buildZExt(WideTy
, SrcReg
);
8249 Register NextResult
= I
+ 1 == NumOps
&& WideTy
== DstTy
? DstReg
:
8250 MRI
.createGenericVirtualRegister(WideTy
);
8252 auto ShiftAmt
= MIRBuilder
.buildConstant(WideTy
, Offset
);
8253 auto Shl
= MIRBuilder
.buildShl(WideTy
, ZextInput
, ShiftAmt
);
8254 MIRBuilder
.buildOr(NextResult
, ResultReg
, Shl
);
8255 ResultReg
= NextResult
;
8258 if (DstTy
.isPointer()) {
8259 if (MIRBuilder
.getDataLayout().isNonIntegralAddressSpace(
8260 DstTy
.getAddressSpace())) {
8261 LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
8262 return UnableToLegalize
;
8265 MIRBuilder
.buildIntToPtr(DstReg
, ResultReg
);
8268 MI
.eraseFromParent();
8272 LegalizerHelper::LegalizeResult
8273 LegalizerHelper::lowerUnmergeValues(MachineInstr
&MI
) {
8274 const unsigned NumDst
= MI
.getNumOperands() - 1;
8275 Register SrcReg
= MI
.getOperand(NumDst
).getReg();
8276 Register Dst0Reg
= MI
.getOperand(0).getReg();
8277 LLT DstTy
= MRI
.getType(Dst0Reg
);
8278 if (DstTy
.isPointer())
8279 return UnableToLegalize
; // TODO
8281 SrcReg
= coerceToScalar(SrcReg
);
8283 return UnableToLegalize
;
8285 // Expand scalarizing unmerge as bitcast to integer and shift.
8286 LLT IntTy
= MRI
.getType(SrcReg
);
8288 MIRBuilder
.buildTrunc(Dst0Reg
, SrcReg
);
8290 const unsigned DstSize
= DstTy
.getSizeInBits();
8291 unsigned Offset
= DstSize
;
8292 for (unsigned I
= 1; I
!= NumDst
; ++I
, Offset
+= DstSize
) {
8293 auto ShiftAmt
= MIRBuilder
.buildConstant(IntTy
, Offset
);
8294 auto Shift
= MIRBuilder
.buildLShr(IntTy
, SrcReg
, ShiftAmt
);
8295 MIRBuilder
.buildTrunc(MI
.getOperand(I
), Shift
);
8298 MI
.eraseFromParent();
8302 /// Lower a vector extract or insert by writing the vector to a stack temporary
8303 /// and reloading the element or vector.
8305 /// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
8307 /// %stack_temp = G_FRAME_INDEX
8308 /// G_STORE %vec, %stack_temp
8309 /// %idx = clamp(%idx, %vec.getNumElements())
8310 /// %element_ptr = G_PTR_ADD %stack_temp, %idx
8311 /// %dst = G_LOAD %element_ptr
8312 LegalizerHelper::LegalizeResult
8313 LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr
&MI
) {
8314 Register DstReg
= MI
.getOperand(0).getReg();
8315 Register SrcVec
= MI
.getOperand(1).getReg();
8317 if (MI
.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT
)
8318 InsertVal
= MI
.getOperand(2).getReg();
8320 Register Idx
= MI
.getOperand(MI
.getNumOperands() - 1).getReg();
8322 LLT VecTy
= MRI
.getType(SrcVec
);
8323 LLT EltTy
= VecTy
.getElementType();
8324 unsigned NumElts
= VecTy
.getNumElements();
8327 if (mi_match(Idx
, MRI
, m_ICst(IdxVal
)) && IdxVal
<= NumElts
) {
8328 SmallVector
<Register
, 8> SrcRegs
;
8329 extractParts(SrcVec
, EltTy
, NumElts
, SrcRegs
, MIRBuilder
, MRI
);
8332 SrcRegs
[IdxVal
] = MI
.getOperand(2).getReg();
8333 MIRBuilder
.buildMergeLikeInstr(DstReg
, SrcRegs
);
8335 MIRBuilder
.buildCopy(DstReg
, SrcRegs
[IdxVal
]);
8338 MI
.eraseFromParent();
8342 if (!EltTy
.isByteSized()) { // Not implemented.
8343 LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
8344 return UnableToLegalize
;
8347 unsigned EltBytes
= EltTy
.getSizeInBytes();
8348 Align VecAlign
= getStackTemporaryAlignment(VecTy
);
8351 MachinePointerInfo PtrInfo
;
8352 auto StackTemp
= createStackTemporary(
8353 TypeSize::getFixed(VecTy
.getSizeInBytes()), VecAlign
, PtrInfo
);
8354 MIRBuilder
.buildStore(SrcVec
, StackTemp
, PtrInfo
, VecAlign
);
8356 // Get the pointer to the element, and be sure not to hit undefined behavior
8357 // if the index is out of bounds.
8358 Register EltPtr
= getVectorElementPointer(StackTemp
.getReg(0), VecTy
, Idx
);
8360 if (mi_match(Idx
, MRI
, m_ICst(IdxVal
))) {
8361 int64_t Offset
= IdxVal
* EltBytes
;
8362 PtrInfo
= PtrInfo
.getWithOffset(Offset
);
8363 EltAlign
= commonAlignment(VecAlign
, Offset
);
8365 // We lose information with a variable offset.
8366 EltAlign
= getStackTemporaryAlignment(EltTy
);
8367 PtrInfo
= MachinePointerInfo(MRI
.getType(EltPtr
).getAddressSpace());
8371 // Write the inserted element
8372 MIRBuilder
.buildStore(InsertVal
, EltPtr
, PtrInfo
, EltAlign
);
8374 // Reload the whole vector.
8375 MIRBuilder
.buildLoad(DstReg
, StackTemp
, PtrInfo
, VecAlign
);
8377 MIRBuilder
.buildLoad(DstReg
, EltPtr
, PtrInfo
, EltAlign
);
8380 MI
.eraseFromParent();
8384 LegalizerHelper::LegalizeResult
8385 LegalizerHelper::lowerShuffleVector(MachineInstr
&MI
) {
8386 auto [DstReg
, DstTy
, Src0Reg
, Src0Ty
, Src1Reg
, Src1Ty
] =
8387 MI
.getFirst3RegLLTs();
8388 LLT IdxTy
= LLT::scalar(32);
8390 ArrayRef
<int> Mask
= MI
.getOperand(3).getShuffleMask();
8392 SmallVector
<Register
, 32> BuildVec
;
8393 LLT EltTy
= DstTy
.getScalarType();
8395 for (int Idx
: Mask
) {
8397 if (!Undef
.isValid())
8398 Undef
= MIRBuilder
.buildUndef(EltTy
).getReg(0);
8399 BuildVec
.push_back(Undef
);
8403 if (Src0Ty
.isScalar()) {
8404 BuildVec
.push_back(Idx
== 0 ? Src0Reg
: Src1Reg
);
8406 int NumElts
= Src0Ty
.getNumElements();
8407 Register SrcVec
= Idx
< NumElts
? Src0Reg
: Src1Reg
;
8408 int ExtractIdx
= Idx
< NumElts
? Idx
: Idx
- NumElts
;
8409 auto IdxK
= MIRBuilder
.buildConstant(IdxTy
, ExtractIdx
);
8410 auto Extract
= MIRBuilder
.buildExtractVectorElement(EltTy
, SrcVec
, IdxK
);
8411 BuildVec
.push_back(Extract
.getReg(0));
8415 if (DstTy
.isScalar())
8416 MIRBuilder
.buildCopy(DstReg
, BuildVec
[0]);
8418 MIRBuilder
.buildBuildVector(DstReg
, BuildVec
);
8419 MI
.eraseFromParent();
8423 LegalizerHelper::LegalizeResult
8424 LegalizerHelper::lowerVECTOR_COMPRESS(llvm::MachineInstr
&MI
) {
8425 auto [Dst
, DstTy
, Vec
, VecTy
, Mask
, MaskTy
, Passthru
, PassthruTy
] =
8426 MI
.getFirst4RegLLTs();
8428 if (VecTy
.isScalableVector())
8429 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
8431 Align VecAlign
= getStackTemporaryAlignment(VecTy
);
8432 MachinePointerInfo PtrInfo
;
8434 createStackTemporary(TypeSize::getFixed(VecTy
.getSizeInBytes()), VecAlign
,
8437 MachinePointerInfo ValPtrInfo
=
8438 MachinePointerInfo::getUnknownStack(*MI
.getMF());
8440 LLT IdxTy
= LLT::scalar(32);
8441 LLT ValTy
= VecTy
.getElementType();
8442 Align ValAlign
= getStackTemporaryAlignment(ValTy
);
8444 auto OutPos
= MIRBuilder
.buildConstant(IdxTy
, 0);
8447 MRI
.getVRegDef(Passthru
)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF
;
8450 MIRBuilder
.buildStore(Passthru
, StackPtr
, PtrInfo
, VecAlign
);
8452 Register LastWriteVal
;
8453 std::optional
<APInt
> PassthruSplatVal
=
8454 isConstantOrConstantSplatVector(*MRI
.getVRegDef(Passthru
), MRI
);
8456 if (PassthruSplatVal
.has_value()) {
8458 MIRBuilder
.buildConstant(ValTy
, PassthruSplatVal
.value()).getReg(0);
8459 } else if (HasPassthru
) {
8460 auto Popcount
= MIRBuilder
.buildZExt(MaskTy
.changeElementSize(32), Mask
);
8461 Popcount
= MIRBuilder
.buildInstr(TargetOpcode::G_VECREDUCE_ADD
,
8462 {LLT::scalar(32)}, {Popcount
});
8464 Register LastElmtPtr
=
8465 getVectorElementPointer(StackPtr
, VecTy
, Popcount
.getReg(0));
8467 MIRBuilder
.buildLoad(ValTy
, LastElmtPtr
, ValPtrInfo
, ValAlign
)
8471 unsigned NumElmts
= VecTy
.getNumElements();
8472 for (unsigned I
= 0; I
< NumElmts
; ++I
) {
8473 auto Idx
= MIRBuilder
.buildConstant(IdxTy
, I
);
8474 auto Val
= MIRBuilder
.buildExtractVectorElement(ValTy
, Vec
, Idx
);
8476 getVectorElementPointer(StackPtr
, VecTy
, OutPos
.getReg(0));
8477 MIRBuilder
.buildStore(Val
, ElmtPtr
, ValPtrInfo
, ValAlign
);
8479 LLT MaskITy
= MaskTy
.getElementType();
8480 auto MaskI
= MIRBuilder
.buildExtractVectorElement(MaskITy
, Mask
, Idx
);
8481 if (MaskITy
.getSizeInBits() > 1)
8482 MaskI
= MIRBuilder
.buildTrunc(LLT::scalar(1), MaskI
);
8484 MaskI
= MIRBuilder
.buildZExt(IdxTy
, MaskI
);
8485 OutPos
= MIRBuilder
.buildAdd(IdxTy
, OutPos
, MaskI
);
8487 if (HasPassthru
&& I
== NumElmts
- 1) {
8489 MIRBuilder
.buildConstant(IdxTy
, VecTy
.getNumElements() - 1);
8490 auto AllLanesSelected
= MIRBuilder
.buildICmp(
8491 CmpInst::ICMP_UGT
, LLT::scalar(1), OutPos
, EndOfVector
);
8492 OutPos
= MIRBuilder
.buildInstr(TargetOpcode::G_UMIN
, {IdxTy
},
8493 {OutPos
, EndOfVector
});
8494 ElmtPtr
= getVectorElementPointer(StackPtr
, VecTy
, OutPos
.getReg(0));
8497 MIRBuilder
.buildSelect(ValTy
, AllLanesSelected
, Val
, LastWriteVal
)
8499 MIRBuilder
.buildStore(LastWriteVal
, ElmtPtr
, ValPtrInfo
, ValAlign
);
8503 // TODO: Use StackPtr's FrameIndex alignment.
8504 MIRBuilder
.buildLoad(Dst
, StackPtr
, PtrInfo
, VecAlign
);
8506 MI
.eraseFromParent();
8510 Register
LegalizerHelper::getDynStackAllocTargetPtr(Register SPReg
,
8514 LLT IntPtrTy
= LLT::scalar(PtrTy
.getSizeInBits());
8516 auto SPTmp
= MIRBuilder
.buildCopy(PtrTy
, SPReg
);
8517 SPTmp
= MIRBuilder
.buildCast(IntPtrTy
, SPTmp
);
8519 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
8520 // have to generate an extra instruction to negate the alloc and then use
8521 // G_PTR_ADD to add the negative offset.
8522 auto Alloc
= MIRBuilder
.buildSub(IntPtrTy
, SPTmp
, AllocSize
);
8523 if (Alignment
> Align(1)) {
8524 APInt
AlignMask(IntPtrTy
.getSizeInBits(), Alignment
.value(), true);
8526 auto AlignCst
= MIRBuilder
.buildConstant(IntPtrTy
, AlignMask
);
8527 Alloc
= MIRBuilder
.buildAnd(IntPtrTy
, Alloc
, AlignCst
);
8530 return MIRBuilder
.buildCast(PtrTy
, Alloc
).getReg(0);
8533 LegalizerHelper::LegalizeResult
8534 LegalizerHelper::lowerDynStackAlloc(MachineInstr
&MI
) {
8535 const auto &MF
= *MI
.getMF();
8536 const auto &TFI
= *MF
.getSubtarget().getFrameLowering();
8537 if (TFI
.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp
)
8538 return UnableToLegalize
;
8540 Register Dst
= MI
.getOperand(0).getReg();
8541 Register AllocSize
= MI
.getOperand(1).getReg();
8542 Align Alignment
= assumeAligned(MI
.getOperand(2).getImm());
8544 LLT PtrTy
= MRI
.getType(Dst
);
8545 Register SPReg
= TLI
.getStackPointerRegisterToSaveRestore();
8547 getDynStackAllocTargetPtr(SPReg
, AllocSize
, Alignment
, PtrTy
);
8549 MIRBuilder
.buildCopy(SPReg
, SPTmp
);
8550 MIRBuilder
.buildCopy(Dst
, SPTmp
);
8552 MI
.eraseFromParent();
8556 LegalizerHelper::LegalizeResult
8557 LegalizerHelper::lowerStackSave(MachineInstr
&MI
) {
8558 Register StackPtr
= TLI
.getStackPointerRegisterToSaveRestore();
8560 return UnableToLegalize
;
8562 MIRBuilder
.buildCopy(MI
.getOperand(0), StackPtr
);
8563 MI
.eraseFromParent();
8567 LegalizerHelper::LegalizeResult
8568 LegalizerHelper::lowerStackRestore(MachineInstr
&MI
) {
8569 Register StackPtr
= TLI
.getStackPointerRegisterToSaveRestore();
8571 return UnableToLegalize
;
8573 MIRBuilder
.buildCopy(StackPtr
, MI
.getOperand(0));
8574 MI
.eraseFromParent();
8578 LegalizerHelper::LegalizeResult
8579 LegalizerHelper::lowerExtract(MachineInstr
&MI
) {
8580 auto [DstReg
, DstTy
, SrcReg
, SrcTy
] = MI
.getFirst2RegLLTs();
8581 unsigned Offset
= MI
.getOperand(2).getImm();
8583 // Extract sub-vector or one element
8584 if (SrcTy
.isVector()) {
8585 unsigned SrcEltSize
= SrcTy
.getElementType().getSizeInBits();
8586 unsigned DstSize
= DstTy
.getSizeInBits();
8588 if ((Offset
% SrcEltSize
== 0) && (DstSize
% SrcEltSize
== 0) &&
8589 (Offset
+ DstSize
<= SrcTy
.getSizeInBits())) {
8590 // Unmerge and allow access to each Src element for the artifact combiner.
8591 auto Unmerge
= MIRBuilder
.buildUnmerge(SrcTy
.getElementType(), SrcReg
);
8593 // Take element(s) we need to extract and copy it (merge them).
8594 SmallVector
<Register
, 8> SubVectorElts
;
8595 for (unsigned Idx
= Offset
/ SrcEltSize
;
8596 Idx
< (Offset
+ DstSize
) / SrcEltSize
; ++Idx
) {
8597 SubVectorElts
.push_back(Unmerge
.getReg(Idx
));
8599 if (SubVectorElts
.size() == 1)
8600 MIRBuilder
.buildCopy(DstReg
, SubVectorElts
[0]);
8602 MIRBuilder
.buildMergeLikeInstr(DstReg
, SubVectorElts
);
8604 MI
.eraseFromParent();
8609 if (DstTy
.isScalar() &&
8610 (SrcTy
.isScalar() ||
8611 (SrcTy
.isVector() && DstTy
== SrcTy
.getElementType()))) {
8612 LLT SrcIntTy
= SrcTy
;
8613 if (!SrcTy
.isScalar()) {
8614 SrcIntTy
= LLT::scalar(SrcTy
.getSizeInBits());
8615 SrcReg
= MIRBuilder
.buildBitcast(SrcIntTy
, SrcReg
).getReg(0);
8619 MIRBuilder
.buildTrunc(DstReg
, SrcReg
);
8621 auto ShiftAmt
= MIRBuilder
.buildConstant(SrcIntTy
, Offset
);
8622 auto Shr
= MIRBuilder
.buildLShr(SrcIntTy
, SrcReg
, ShiftAmt
);
8623 MIRBuilder
.buildTrunc(DstReg
, Shr
);
8626 MI
.eraseFromParent();
8630 return UnableToLegalize
;
8633 LegalizerHelper::LegalizeResult
LegalizerHelper::lowerInsert(MachineInstr
&MI
) {
8634 auto [Dst
, Src
, InsertSrc
] = MI
.getFirst3Regs();
8635 uint64_t Offset
= MI
.getOperand(3).getImm();
8637 LLT DstTy
= MRI
.getType(Src
);
8638 LLT InsertTy
= MRI
.getType(InsertSrc
);
8640 // Insert sub-vector or one element
8641 if (DstTy
.isVector() && !InsertTy
.isPointer()) {
8642 LLT EltTy
= DstTy
.getElementType();
8643 unsigned EltSize
= EltTy
.getSizeInBits();
8644 unsigned InsertSize
= InsertTy
.getSizeInBits();
8646 if ((Offset
% EltSize
== 0) && (InsertSize
% EltSize
== 0) &&
8647 (Offset
+ InsertSize
<= DstTy
.getSizeInBits())) {
8648 auto UnmergeSrc
= MIRBuilder
.buildUnmerge(EltTy
, Src
);
8649 SmallVector
<Register
, 8> DstElts
;
8651 // Elements from Src before insert start Offset
8652 for (; Idx
< Offset
/ EltSize
; ++Idx
) {
8653 DstElts
.push_back(UnmergeSrc
.getReg(Idx
));
8656 // Replace elements in Src with elements from InsertSrc
8657 if (InsertTy
.getSizeInBits() > EltSize
) {
8658 auto UnmergeInsertSrc
= MIRBuilder
.buildUnmerge(EltTy
, InsertSrc
);
8659 for (unsigned i
= 0; Idx
< (Offset
+ InsertSize
) / EltSize
;
8661 DstElts
.push_back(UnmergeInsertSrc
.getReg(i
));
8664 DstElts
.push_back(InsertSrc
);
8668 // Remaining elements from Src after insert
8669 for (; Idx
< DstTy
.getNumElements(); ++Idx
) {
8670 DstElts
.push_back(UnmergeSrc
.getReg(Idx
));
8673 MIRBuilder
.buildMergeLikeInstr(Dst
, DstElts
);
8674 MI
.eraseFromParent();
8679 if (InsertTy
.isVector() ||
8680 (DstTy
.isVector() && DstTy
.getElementType() != InsertTy
))
8681 return UnableToLegalize
;
8683 const DataLayout
&DL
= MIRBuilder
.getDataLayout();
8684 if ((DstTy
.isPointer() &&
8685 DL
.isNonIntegralAddressSpace(DstTy
.getAddressSpace())) ||
8686 (InsertTy
.isPointer() &&
8687 DL
.isNonIntegralAddressSpace(InsertTy
.getAddressSpace()))) {
8688 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
8689 return UnableToLegalize
;
8692 LLT IntDstTy
= DstTy
;
8694 if (!DstTy
.isScalar()) {
8695 IntDstTy
= LLT::scalar(DstTy
.getSizeInBits());
8696 Src
= MIRBuilder
.buildCast(IntDstTy
, Src
).getReg(0);
8699 if (!InsertTy
.isScalar()) {
8700 const LLT IntInsertTy
= LLT::scalar(InsertTy
.getSizeInBits());
8701 InsertSrc
= MIRBuilder
.buildPtrToInt(IntInsertTy
, InsertSrc
).getReg(0);
8704 Register ExtInsSrc
= MIRBuilder
.buildZExt(IntDstTy
, InsertSrc
).getReg(0);
8706 auto ShiftAmt
= MIRBuilder
.buildConstant(IntDstTy
, Offset
);
8707 ExtInsSrc
= MIRBuilder
.buildShl(IntDstTy
, ExtInsSrc
, ShiftAmt
).getReg(0);
8710 APInt MaskVal
= APInt::getBitsSetWithWrap(
8711 DstTy
.getSizeInBits(), Offset
+ InsertTy
.getSizeInBits(), Offset
);
8713 auto Mask
= MIRBuilder
.buildConstant(IntDstTy
, MaskVal
);
8714 auto MaskedSrc
= MIRBuilder
.buildAnd(IntDstTy
, Src
, Mask
);
8715 auto Or
= MIRBuilder
.buildOr(IntDstTy
, MaskedSrc
, ExtInsSrc
);
8717 MIRBuilder
.buildCast(Dst
, Or
);
8718 MI
.eraseFromParent();
8722 LegalizerHelper::LegalizeResult
8723 LegalizerHelper::lowerSADDO_SSUBO(MachineInstr
&MI
) {
8724 auto [Dst0
, Dst0Ty
, Dst1
, Dst1Ty
, LHS
, LHSTy
, RHS
, RHSTy
] =
8725 MI
.getFirst4RegLLTs();
8726 const bool IsAdd
= MI
.getOpcode() == TargetOpcode::G_SADDO
;
8729 LLT BoolTy
= Dst1Ty
;
8731 Register NewDst0
= MRI
.cloneVirtualRegister(Dst0
);
8734 MIRBuilder
.buildAdd(NewDst0
, LHS
, RHS
);
8736 MIRBuilder
.buildSub(NewDst0
, LHS
, RHS
);
8738 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
8740 auto Zero
= MIRBuilder
.buildConstant(Ty
, 0);
8742 // For an addition, the result should be less than one of the operands (LHS)
8743 // if and only if the other operand (RHS) is negative, otherwise there will
8745 // For a subtraction, the result should be less than one of the operands
8746 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
8747 // otherwise there will be overflow.
8748 auto ResultLowerThanLHS
=
8749 MIRBuilder
.buildICmp(CmpInst::ICMP_SLT
, BoolTy
, NewDst0
, LHS
);
8750 auto ConditionRHS
= MIRBuilder
.buildICmp(
8751 IsAdd
? CmpInst::ICMP_SLT
: CmpInst::ICMP_SGT
, BoolTy
, RHS
, Zero
);
8753 MIRBuilder
.buildXor(Dst1
, ConditionRHS
, ResultLowerThanLHS
);
8755 MIRBuilder
.buildCopy(Dst0
, NewDst0
);
8756 MI
.eraseFromParent();
8761 LegalizerHelper::LegalizeResult
8762 LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr
&MI
) {
8763 auto [Res
, LHS
, RHS
] = MI
.getFirst3Regs();
8764 LLT Ty
= MRI
.getType(Res
);
8768 switch (MI
.getOpcode()) {
8770 llvm_unreachable("unexpected addsat/subsat opcode");
8771 case TargetOpcode::G_UADDSAT
:
8774 BaseOp
= TargetOpcode::G_ADD
;
8776 case TargetOpcode::G_SADDSAT
:
8779 BaseOp
= TargetOpcode::G_ADD
;
8781 case TargetOpcode::G_USUBSAT
:
8784 BaseOp
= TargetOpcode::G_SUB
;
8786 case TargetOpcode::G_SSUBSAT
:
8789 BaseOp
= TargetOpcode::G_SUB
;
8794 // sadd.sat(a, b) ->
8795 // hi = 0x7fffffff - smax(a, 0)
8796 // lo = 0x80000000 - smin(a, 0)
8797 // a + smin(smax(lo, b), hi)
8798 // ssub.sat(a, b) ->
8799 // lo = smax(a, -1) - 0x7fffffff
8800 // hi = smin(a, -1) - 0x80000000
8801 // a - smin(smax(lo, b), hi)
8802 // TODO: AMDGPU can use a "median of 3" instruction here:
8803 // a +/- med3(lo, b, hi)
8804 uint64_t NumBits
= Ty
.getScalarSizeInBits();
8806 MIRBuilder
.buildConstant(Ty
, APInt::getSignedMaxValue(NumBits
));
8808 MIRBuilder
.buildConstant(Ty
, APInt::getSignedMinValue(NumBits
));
8809 MachineInstrBuilder Hi
, Lo
;
8811 auto Zero
= MIRBuilder
.buildConstant(Ty
, 0);
8812 Hi
= MIRBuilder
.buildSub(Ty
, MaxVal
, MIRBuilder
.buildSMax(Ty
, LHS
, Zero
));
8813 Lo
= MIRBuilder
.buildSub(Ty
, MinVal
, MIRBuilder
.buildSMin(Ty
, LHS
, Zero
));
8815 auto NegOne
= MIRBuilder
.buildConstant(Ty
, -1);
8816 Lo
= MIRBuilder
.buildSub(Ty
, MIRBuilder
.buildSMax(Ty
, LHS
, NegOne
),
8818 Hi
= MIRBuilder
.buildSub(Ty
, MIRBuilder
.buildSMin(Ty
, LHS
, NegOne
),
8822 MIRBuilder
.buildSMin(Ty
, MIRBuilder
.buildSMax(Ty
, Lo
, RHS
), Hi
);
8823 MIRBuilder
.buildInstr(BaseOp
, {Res
}, {LHS
, RHSClamped
});
8825 // uadd.sat(a, b) -> a + umin(~a, b)
8826 // usub.sat(a, b) -> a - umin(a, b)
8827 Register Not
= IsAdd
? MIRBuilder
.buildNot(Ty
, LHS
).getReg(0) : LHS
;
8828 auto Min
= MIRBuilder
.buildUMin(Ty
, Not
, RHS
);
8829 MIRBuilder
.buildInstr(BaseOp
, {Res
}, {LHS
, Min
});
8832 MI
.eraseFromParent();
8836 LegalizerHelper::LegalizeResult
8837 LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr
&MI
) {
8838 auto [Res
, LHS
, RHS
] = MI
.getFirst3Regs();
8839 LLT Ty
= MRI
.getType(Res
);
8840 LLT BoolTy
= Ty
.changeElementSize(1);
8843 unsigned OverflowOp
;
8844 switch (MI
.getOpcode()) {
8846 llvm_unreachable("unexpected addsat/subsat opcode");
8847 case TargetOpcode::G_UADDSAT
:
8850 OverflowOp
= TargetOpcode::G_UADDO
;
8852 case TargetOpcode::G_SADDSAT
:
8855 OverflowOp
= TargetOpcode::G_SADDO
;
8857 case TargetOpcode::G_USUBSAT
:
8860 OverflowOp
= TargetOpcode::G_USUBO
;
8862 case TargetOpcode::G_SSUBSAT
:
8865 OverflowOp
= TargetOpcode::G_SSUBO
;
8870 MIRBuilder
.buildInstr(OverflowOp
, {Ty
, BoolTy
}, {LHS
, RHS
});
8871 Register Tmp
= OverflowRes
.getReg(0);
8872 Register Ov
= OverflowRes
.getReg(1);
8873 MachineInstrBuilder Clamp
;
8875 // sadd.sat(a, b) ->
8876 // {tmp, ov} = saddo(a, b)
8877 // ov ? (tmp >>s 31) + 0x80000000 : r
8878 // ssub.sat(a, b) ->
8879 // {tmp, ov} = ssubo(a, b)
8880 // ov ? (tmp >>s 31) + 0x80000000 : r
8881 uint64_t NumBits
= Ty
.getScalarSizeInBits();
8882 auto ShiftAmount
= MIRBuilder
.buildConstant(Ty
, NumBits
- 1);
8883 auto Sign
= MIRBuilder
.buildAShr(Ty
, Tmp
, ShiftAmount
);
8885 MIRBuilder
.buildConstant(Ty
, APInt::getSignedMinValue(NumBits
));
8886 Clamp
= MIRBuilder
.buildAdd(Ty
, Sign
, MinVal
);
8888 // uadd.sat(a, b) ->
8889 // {tmp, ov} = uaddo(a, b)
8890 // ov ? 0xffffffff : tmp
8891 // usub.sat(a, b) ->
8892 // {tmp, ov} = usubo(a, b)
8894 Clamp
= MIRBuilder
.buildConstant(Ty
, IsAdd
? -1 : 0);
8896 MIRBuilder
.buildSelect(Res
, Ov
, Clamp
, Tmp
);
8898 MI
.eraseFromParent();
8902 LegalizerHelper::LegalizeResult
8903 LegalizerHelper::lowerShlSat(MachineInstr
&MI
) {
8904 assert((MI
.getOpcode() == TargetOpcode::G_SSHLSAT
||
8905 MI
.getOpcode() == TargetOpcode::G_USHLSAT
) &&
8906 "Expected shlsat opcode!");
8907 bool IsSigned
= MI
.getOpcode() == TargetOpcode::G_SSHLSAT
;
8908 auto [Res
, LHS
, RHS
] = MI
.getFirst3Regs();
8909 LLT Ty
= MRI
.getType(Res
);
8910 LLT BoolTy
= Ty
.changeElementSize(1);
8912 unsigned BW
= Ty
.getScalarSizeInBits();
8913 auto Result
= MIRBuilder
.buildShl(Ty
, LHS
, RHS
);
8914 auto Orig
= IsSigned
? MIRBuilder
.buildAShr(Ty
, Result
, RHS
)
8915 : MIRBuilder
.buildLShr(Ty
, Result
, RHS
);
8917 MachineInstrBuilder SatVal
;
8919 auto SatMin
= MIRBuilder
.buildConstant(Ty
, APInt::getSignedMinValue(BW
));
8920 auto SatMax
= MIRBuilder
.buildConstant(Ty
, APInt::getSignedMaxValue(BW
));
8921 auto Cmp
= MIRBuilder
.buildICmp(CmpInst::ICMP_SLT
, BoolTy
, LHS
,
8922 MIRBuilder
.buildConstant(Ty
, 0));
8923 SatVal
= MIRBuilder
.buildSelect(Ty
, Cmp
, SatMin
, SatMax
);
8925 SatVal
= MIRBuilder
.buildConstant(Ty
, APInt::getMaxValue(BW
));
8927 auto Ov
= MIRBuilder
.buildICmp(CmpInst::ICMP_NE
, BoolTy
, LHS
, Orig
);
8928 MIRBuilder
.buildSelect(Res
, Ov
, SatVal
, Result
);
8930 MI
.eraseFromParent();
8934 LegalizerHelper::LegalizeResult
LegalizerHelper::lowerBswap(MachineInstr
&MI
) {
8935 auto [Dst
, Src
] = MI
.getFirst2Regs();
8936 const LLT Ty
= MRI
.getType(Src
);
8937 unsigned SizeInBytes
= (Ty
.getScalarSizeInBits() + 7) / 8;
8938 unsigned BaseShiftAmt
= (SizeInBytes
- 1) * 8;
8940 // Swap most and least significant byte, set remaining bytes in Res to zero.
8941 auto ShiftAmt
= MIRBuilder
.buildConstant(Ty
, BaseShiftAmt
);
8942 auto LSByteShiftedLeft
= MIRBuilder
.buildShl(Ty
, Src
, ShiftAmt
);
8943 auto MSByteShiftedRight
= MIRBuilder
.buildLShr(Ty
, Src
, ShiftAmt
);
8944 auto Res
= MIRBuilder
.buildOr(Ty
, MSByteShiftedRight
, LSByteShiftedLeft
);
8946 // Set i-th high/low byte in Res to i-th low/high byte from Src.
8947 for (unsigned i
= 1; i
< SizeInBytes
/ 2; ++i
) {
8948 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
8949 APInt
APMask(SizeInBytes
* 8, 0xFF << (i
* 8));
8950 auto Mask
= MIRBuilder
.buildConstant(Ty
, APMask
);
8951 auto ShiftAmt
= MIRBuilder
.buildConstant(Ty
, BaseShiftAmt
- 16 * i
);
8952 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
8953 auto LoByte
= MIRBuilder
.buildAnd(Ty
, Src
, Mask
);
8954 auto LoShiftedLeft
= MIRBuilder
.buildShl(Ty
, LoByte
, ShiftAmt
);
8955 Res
= MIRBuilder
.buildOr(Ty
, Res
, LoShiftedLeft
);
8956 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
8957 auto SrcShiftedRight
= MIRBuilder
.buildLShr(Ty
, Src
, ShiftAmt
);
8958 auto HiShiftedRight
= MIRBuilder
.buildAnd(Ty
, SrcShiftedRight
, Mask
);
8959 Res
= MIRBuilder
.buildOr(Ty
, Res
, HiShiftedRight
);
8961 Res
.getInstr()->getOperand(0).setReg(Dst
);
8963 MI
.eraseFromParent();
8967 //{ (Src & Mask) >> N } | { (Src << N) & Mask }
8968 static MachineInstrBuilder
SwapN(unsigned N
, DstOp Dst
, MachineIRBuilder
&B
,
8969 MachineInstrBuilder Src
, const APInt
&Mask
) {
8970 const LLT Ty
= Dst
.getLLTTy(*B
.getMRI());
8971 MachineInstrBuilder C_N
= B
.buildConstant(Ty
, N
);
8972 MachineInstrBuilder MaskLoNTo0
= B
.buildConstant(Ty
, Mask
);
8973 auto LHS
= B
.buildLShr(Ty
, B
.buildAnd(Ty
, Src
, MaskLoNTo0
), C_N
);
8974 auto RHS
= B
.buildAnd(Ty
, B
.buildShl(Ty
, Src
, C_N
), MaskLoNTo0
);
8975 return B
.buildOr(Dst
, LHS
, RHS
);
8978 LegalizerHelper::LegalizeResult
8979 LegalizerHelper::lowerBitreverse(MachineInstr
&MI
) {
8980 auto [Dst
, Src
] = MI
.getFirst2Regs();
8981 const LLT Ty
= MRI
.getType(Src
);
8982 unsigned Size
= Ty
.getScalarSizeInBits();
8985 MachineInstrBuilder BSWAP
=
8986 MIRBuilder
.buildInstr(TargetOpcode::G_BSWAP
, {Ty
}, {Src
});
8988 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
8989 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
8990 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
8991 MachineInstrBuilder Swap4
=
8992 SwapN(4, Ty
, MIRBuilder
, BSWAP
, APInt::getSplat(Size
, APInt(8, 0xF0)));
8994 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
8995 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
8996 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
8997 MachineInstrBuilder Swap2
=
8998 SwapN(2, Ty
, MIRBuilder
, Swap4
, APInt::getSplat(Size
, APInt(8, 0xCC)));
9000 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
9002 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
9003 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
9004 SwapN(1, Dst
, MIRBuilder
, Swap2
, APInt::getSplat(Size
, APInt(8, 0xAA)));
9006 // Expand bitreverse for types smaller than 8 bits.
9007 MachineInstrBuilder Tmp
;
9008 for (unsigned I
= 0, J
= Size
- 1; I
< Size
; ++I
, --J
) {
9009 MachineInstrBuilder Tmp2
;
9011 auto ShAmt
= MIRBuilder
.buildConstant(Ty
, J
- I
);
9012 Tmp2
= MIRBuilder
.buildShl(Ty
, Src
, ShAmt
);
9014 auto ShAmt
= MIRBuilder
.buildConstant(Ty
, I
- J
);
9015 Tmp2
= MIRBuilder
.buildLShr(Ty
, Src
, ShAmt
);
9018 auto Mask
= MIRBuilder
.buildConstant(Ty
, 1ULL << J
);
9019 Tmp2
= MIRBuilder
.buildAnd(Ty
, Tmp2
, Mask
);
9023 Tmp
= MIRBuilder
.buildOr(Ty
, Tmp
, Tmp2
);
9025 MIRBuilder
.buildCopy(Dst
, Tmp
);
9028 MI
.eraseFromParent();
9032 LegalizerHelper::LegalizeResult
9033 LegalizerHelper::lowerReadWriteRegister(MachineInstr
&MI
) {
9034 MachineFunction
&MF
= MIRBuilder
.getMF();
9036 bool IsRead
= MI
.getOpcode() == TargetOpcode::G_READ_REGISTER
;
9037 int NameOpIdx
= IsRead
? 1 : 0;
9038 int ValRegIndex
= IsRead
? 0 : 1;
9040 Register ValReg
= MI
.getOperand(ValRegIndex
).getReg();
9041 const LLT Ty
= MRI
.getType(ValReg
);
9042 const MDString
*RegStr
= cast
<MDString
>(
9043 cast
<MDNode
>(MI
.getOperand(NameOpIdx
).getMetadata())->getOperand(0));
9045 Register PhysReg
= TLI
.getRegisterByName(RegStr
->getString().data(), Ty
, MF
);
9046 if (!PhysReg
.isValid())
9047 return UnableToLegalize
;
9050 MIRBuilder
.buildCopy(ValReg
, PhysReg
);
9052 MIRBuilder
.buildCopy(PhysReg
, ValReg
);
9054 MI
.eraseFromParent();
9058 LegalizerHelper::LegalizeResult
9059 LegalizerHelper::lowerSMULH_UMULH(MachineInstr
&MI
) {
9060 bool IsSigned
= MI
.getOpcode() == TargetOpcode::G_SMULH
;
9061 unsigned ExtOp
= IsSigned
? TargetOpcode::G_SEXT
: TargetOpcode::G_ZEXT
;
9062 Register Result
= MI
.getOperand(0).getReg();
9063 LLT OrigTy
= MRI
.getType(Result
);
9064 auto SizeInBits
= OrigTy
.getScalarSizeInBits();
9065 LLT WideTy
= OrigTy
.changeElementSize(SizeInBits
* 2);
9067 auto LHS
= MIRBuilder
.buildInstr(ExtOp
, {WideTy
}, {MI
.getOperand(1)});
9068 auto RHS
= MIRBuilder
.buildInstr(ExtOp
, {WideTy
}, {MI
.getOperand(2)});
9069 auto Mul
= MIRBuilder
.buildMul(WideTy
, LHS
, RHS
);
9070 unsigned ShiftOp
= IsSigned
? TargetOpcode::G_ASHR
: TargetOpcode::G_LSHR
;
9072 auto ShiftAmt
= MIRBuilder
.buildConstant(WideTy
, SizeInBits
);
9073 auto Shifted
= MIRBuilder
.buildInstr(ShiftOp
, {WideTy
}, {Mul
, ShiftAmt
});
9074 MIRBuilder
.buildTrunc(Result
, Shifted
);
9076 MI
.eraseFromParent();
9080 LegalizerHelper::LegalizeResult
9081 LegalizerHelper::lowerISFPCLASS(MachineInstr
&MI
) {
9082 auto [DstReg
, DstTy
, SrcReg
, SrcTy
] = MI
.getFirst2RegLLTs();
9083 FPClassTest Mask
= static_cast<FPClassTest
>(MI
.getOperand(2).getImm());
9085 if (Mask
== fcNone
) {
9086 MIRBuilder
.buildConstant(DstReg
, 0);
9087 MI
.eraseFromParent();
9090 if (Mask
== fcAllFlags
) {
9091 MIRBuilder
.buildConstant(DstReg
, 1);
9092 MI
.eraseFromParent();
9096 // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
9099 unsigned BitSize
= SrcTy
.getScalarSizeInBits();
9100 const fltSemantics
&Semantics
= getFltSemanticForLLT(SrcTy
.getScalarType());
9102 LLT IntTy
= LLT::scalar(BitSize
);
9103 if (SrcTy
.isVector())
9104 IntTy
= LLT::vector(SrcTy
.getElementCount(), IntTy
);
9105 auto AsInt
= MIRBuilder
.buildCopy(IntTy
, SrcReg
);
9108 APInt SignBit
= APInt::getSignMask(BitSize
);
9109 APInt ValueMask
= APInt::getSignedMaxValue(BitSize
); // All bits but sign.
9110 APInt Inf
= APFloat::getInf(Semantics
).bitcastToAPInt(); // Exp and int bit.
9111 APInt ExpMask
= Inf
;
9112 APInt AllOneMantissa
= APFloat::getLargest(Semantics
).bitcastToAPInt() & ~Inf
;
9114 APInt::getOneBitSet(BitSize
, AllOneMantissa
.getActiveBits() - 1);
9115 APInt InvertionMask
= APInt::getAllOnes(DstTy
.getScalarSizeInBits());
9117 auto SignBitC
= MIRBuilder
.buildConstant(IntTy
, SignBit
);
9118 auto ValueMaskC
= MIRBuilder
.buildConstant(IntTy
, ValueMask
);
9119 auto InfC
= MIRBuilder
.buildConstant(IntTy
, Inf
);
9120 auto ExpMaskC
= MIRBuilder
.buildConstant(IntTy
, ExpMask
);
9121 auto ZeroC
= MIRBuilder
.buildConstant(IntTy
, 0);
9123 auto Abs
= MIRBuilder
.buildAnd(IntTy
, AsInt
, ValueMaskC
);
9125 MIRBuilder
.buildICmp(CmpInst::Predicate::ICMP_NE
, DstTy
, AsInt
, Abs
);
9127 auto Res
= MIRBuilder
.buildConstant(DstTy
, 0);
9128 // Clang doesn't support capture of structured bindings:
9129 LLT DstTyCopy
= DstTy
;
9130 const auto appendToRes
= [&](MachineInstrBuilder ToAppend
) {
9131 Res
= MIRBuilder
.buildOr(DstTyCopy
, Res
, ToAppend
);
9134 // Tests that involve more than one class should be processed first.
9135 if ((Mask
& fcFinite
) == fcFinite
) {
9136 // finite(V) ==> abs(V) u< exp_mask
9137 appendToRes(MIRBuilder
.buildICmp(CmpInst::Predicate::ICMP_ULT
, DstTy
, Abs
,
9140 } else if ((Mask
& fcFinite
) == fcPosFinite
) {
9141 // finite(V) && V > 0 ==> V u< exp_mask
9142 appendToRes(MIRBuilder
.buildICmp(CmpInst::Predicate::ICMP_ULT
, DstTy
, AsInt
,
9144 Mask
&= ~fcPosFinite
;
9145 } else if ((Mask
& fcFinite
) == fcNegFinite
) {
9146 // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
9147 auto Cmp
= MIRBuilder
.buildICmp(CmpInst::Predicate::ICMP_ULT
, DstTy
, Abs
,
9149 auto And
= MIRBuilder
.buildAnd(DstTy
, Cmp
, Sign
);
9151 Mask
&= ~fcNegFinite
;
9154 if (FPClassTest PartialCheck
= Mask
& (fcZero
| fcSubnormal
)) {
9155 // fcZero | fcSubnormal => test all exponent bits are 0
9156 // TODO: Handle sign bit specific cases
9157 // TODO: Handle inverted case
9158 if (PartialCheck
== (fcZero
| fcSubnormal
)) {
9159 auto ExpBits
= MIRBuilder
.buildAnd(IntTy
, AsInt
, ExpMaskC
);
9160 appendToRes(MIRBuilder
.buildICmp(CmpInst::Predicate::ICMP_EQ
, DstTy
,
9162 Mask
&= ~PartialCheck
;
9166 // Check for individual classes.
9167 if (FPClassTest PartialCheck
= Mask
& fcZero
) {
9168 if (PartialCheck
== fcPosZero
)
9169 appendToRes(MIRBuilder
.buildICmp(CmpInst::Predicate::ICMP_EQ
, DstTy
,
9171 else if (PartialCheck
== fcZero
)
9173 MIRBuilder
.buildICmp(CmpInst::Predicate::ICMP_EQ
, DstTy
, Abs
, ZeroC
));
9175 appendToRes(MIRBuilder
.buildICmp(CmpInst::Predicate::ICMP_EQ
, DstTy
,
9179 if (FPClassTest PartialCheck
= Mask
& fcSubnormal
) {
9180 // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
9181 // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
9182 auto V
= (PartialCheck
== fcPosSubnormal
) ? AsInt
: Abs
;
9183 auto OneC
= MIRBuilder
.buildConstant(IntTy
, 1);
9184 auto VMinusOne
= MIRBuilder
.buildSub(IntTy
, V
, OneC
);
9186 MIRBuilder
.buildICmp(CmpInst::Predicate::ICMP_ULT
, DstTy
, VMinusOne
,
9187 MIRBuilder
.buildConstant(IntTy
, AllOneMantissa
));
9188 if (PartialCheck
== fcNegSubnormal
)
9189 SubnormalRes
= MIRBuilder
.buildAnd(DstTy
, SubnormalRes
, Sign
);
9190 appendToRes(SubnormalRes
);
9193 if (FPClassTest PartialCheck
= Mask
& fcInf
) {
9194 if (PartialCheck
== fcPosInf
)
9195 appendToRes(MIRBuilder
.buildICmp(CmpInst::Predicate::ICMP_EQ
, DstTy
,
9197 else if (PartialCheck
== fcInf
)
9199 MIRBuilder
.buildICmp(CmpInst::Predicate::ICMP_EQ
, DstTy
, Abs
, InfC
));
9201 APInt NegInf
= APFloat::getInf(Semantics
, true).bitcastToAPInt();
9202 auto NegInfC
= MIRBuilder
.buildConstant(IntTy
, NegInf
);
9203 appendToRes(MIRBuilder
.buildICmp(CmpInst::Predicate::ICMP_EQ
, DstTy
,
9208 if (FPClassTest PartialCheck
= Mask
& fcNan
) {
9209 auto InfWithQnanBitC
= MIRBuilder
.buildConstant(IntTy
, Inf
| QNaNBitMask
);
9210 if (PartialCheck
== fcNan
) {
9211 // isnan(V) ==> abs(V) u> int(inf)
9213 MIRBuilder
.buildICmp(CmpInst::Predicate::ICMP_UGT
, DstTy
, Abs
, InfC
));
9214 } else if (PartialCheck
== fcQNan
) {
9215 // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
9216 appendToRes(MIRBuilder
.buildICmp(CmpInst::Predicate::ICMP_UGE
, DstTy
, Abs
,
9219 // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
9220 // abs(V) u< (unsigned(Inf) | quiet_bit)
9222 MIRBuilder
.buildICmp(CmpInst::Predicate::ICMP_UGT
, DstTy
, Abs
, InfC
);
9223 auto IsNotQnan
= MIRBuilder
.buildICmp(CmpInst::Predicate::ICMP_ULT
, DstTy
,
9224 Abs
, InfWithQnanBitC
);
9225 appendToRes(MIRBuilder
.buildAnd(DstTy
, IsNan
, IsNotQnan
));
9229 if (FPClassTest PartialCheck
= Mask
& fcNormal
) {
9230 // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
9232 APInt ExpLSB
= ExpMask
& ~(ExpMask
.shl(1));
9233 auto ExpMinusOne
= MIRBuilder
.buildSub(
9234 IntTy
, Abs
, MIRBuilder
.buildConstant(IntTy
, ExpLSB
));
9235 APInt MaxExpMinusOne
= ExpMask
- ExpLSB
;
9237 MIRBuilder
.buildICmp(CmpInst::Predicate::ICMP_ULT
, DstTy
, ExpMinusOne
,
9238 MIRBuilder
.buildConstant(IntTy
, MaxExpMinusOne
));
9239 if (PartialCheck
== fcNegNormal
)
9240 NormalRes
= MIRBuilder
.buildAnd(DstTy
, NormalRes
, Sign
);
9241 else if (PartialCheck
== fcPosNormal
) {
9242 auto PosSign
= MIRBuilder
.buildXor(
9243 DstTy
, Sign
, MIRBuilder
.buildConstant(DstTy
, InvertionMask
));
9244 NormalRes
= MIRBuilder
.buildAnd(DstTy
, NormalRes
, PosSign
);
9246 appendToRes(NormalRes
);
9249 MIRBuilder
.buildCopy(DstReg
, Res
);
9250 MI
.eraseFromParent();
9254 LegalizerHelper::LegalizeResult
LegalizerHelper::lowerSelect(MachineInstr
&MI
) {
9255 // Implement G_SELECT in terms of XOR, AND, OR.
9256 auto [DstReg
, DstTy
, MaskReg
, MaskTy
, Op1Reg
, Op1Ty
, Op2Reg
, Op2Ty
] =
9257 MI
.getFirst4RegLLTs();
9259 bool IsEltPtr
= DstTy
.isPointerOrPointerVector();
9261 LLT ScalarPtrTy
= LLT::scalar(DstTy
.getScalarSizeInBits());
9262 LLT NewTy
= DstTy
.changeElementType(ScalarPtrTy
);
9263 Op1Reg
= MIRBuilder
.buildPtrToInt(NewTy
, Op1Reg
).getReg(0);
9264 Op2Reg
= MIRBuilder
.buildPtrToInt(NewTy
, Op2Reg
).getReg(0);
9268 if (MaskTy
.isScalar()) {
9269 // Turn the scalar condition into a vector condition mask if needed.
9271 Register MaskElt
= MaskReg
;
9273 // The condition was potentially zero extended before, but we want a sign
9274 // extended boolean.
9275 if (MaskTy
!= LLT::scalar(1))
9276 MaskElt
= MIRBuilder
.buildSExtInReg(MaskTy
, MaskElt
, 1).getReg(0);
9278 // Continue the sign extension (or truncate) to match the data type.
9280 MIRBuilder
.buildSExtOrTrunc(DstTy
.getScalarType(), MaskElt
).getReg(0);
9282 if (DstTy
.isVector()) {
9283 // Generate a vector splat idiom.
9284 auto ShufSplat
= MIRBuilder
.buildShuffleSplat(DstTy
, MaskElt
);
9285 MaskReg
= ShufSplat
.getReg(0);
9290 } else if (!DstTy
.isVector()) {
9291 // Cannot handle the case that mask is a vector and dst is a scalar.
9292 return UnableToLegalize
;
9295 if (MaskTy
.getSizeInBits() != DstTy
.getSizeInBits()) {
9296 return UnableToLegalize
;
9299 auto NotMask
= MIRBuilder
.buildNot(MaskTy
, MaskReg
);
9300 auto NewOp1
= MIRBuilder
.buildAnd(MaskTy
, Op1Reg
, MaskReg
);
9301 auto NewOp2
= MIRBuilder
.buildAnd(MaskTy
, Op2Reg
, NotMask
);
9303 auto Or
= MIRBuilder
.buildOr(DstTy
, NewOp1
, NewOp2
);
9304 MIRBuilder
.buildIntToPtr(DstReg
, Or
);
9306 MIRBuilder
.buildOr(DstReg
, NewOp1
, NewOp2
);
9308 MI
.eraseFromParent();
9312 LegalizerHelper::LegalizeResult
LegalizerHelper::lowerDIVREM(MachineInstr
&MI
) {
9313 // Split DIVREM into individual instructions.
9314 unsigned Opcode
= MI
.getOpcode();
9316 MIRBuilder
.buildInstr(
9317 Opcode
== TargetOpcode::G_SDIVREM
? TargetOpcode::G_SDIV
9318 : TargetOpcode::G_UDIV
,
9319 {MI
.getOperand(0).getReg()}, {MI
.getOperand(2), MI
.getOperand(3)});
9320 MIRBuilder
.buildInstr(
9321 Opcode
== TargetOpcode::G_SDIVREM
? TargetOpcode::G_SREM
9322 : TargetOpcode::G_UREM
,
9323 {MI
.getOperand(1).getReg()}, {MI
.getOperand(2), MI
.getOperand(3)});
9324 MI
.eraseFromParent();
9328 LegalizerHelper::LegalizeResult
9329 LegalizerHelper::lowerAbsToAddXor(MachineInstr
&MI
) {
9330 // Expand %res = G_ABS %a into:
9331 // %v1 = G_ASHR %a, scalar_size-1
9332 // %v2 = G_ADD %a, %v1
9333 // %res = G_XOR %v2, %v1
9334 LLT DstTy
= MRI
.getType(MI
.getOperand(0).getReg());
9335 Register OpReg
= MI
.getOperand(1).getReg();
9337 MIRBuilder
.buildConstant(DstTy
, DstTy
.getScalarSizeInBits() - 1);
9338 auto Shift
= MIRBuilder
.buildAShr(DstTy
, OpReg
, ShiftAmt
);
9339 auto Add
= MIRBuilder
.buildAdd(DstTy
, OpReg
, Shift
);
9340 MIRBuilder
.buildXor(MI
.getOperand(0).getReg(), Add
, Shift
);
9341 MI
.eraseFromParent();
9345 LegalizerHelper::LegalizeResult
9346 LegalizerHelper::lowerAbsToMaxNeg(MachineInstr
&MI
) {
9347 // Expand %res = G_ABS %a into:
9348 // %v1 = G_CONSTANT 0
9349 // %v2 = G_SUB %v1, %a
9350 // %res = G_SMAX %a, %v2
9351 Register SrcReg
= MI
.getOperand(1).getReg();
9352 LLT Ty
= MRI
.getType(SrcReg
);
9353 auto Zero
= MIRBuilder
.buildConstant(Ty
, 0);
9354 auto Sub
= MIRBuilder
.buildSub(Ty
, Zero
, SrcReg
);
9355 MIRBuilder
.buildSMax(MI
.getOperand(0), SrcReg
, Sub
);
9356 MI
.eraseFromParent();
9360 LegalizerHelper::LegalizeResult
9361 LegalizerHelper::lowerAbsToCNeg(MachineInstr
&MI
) {
9362 Register SrcReg
= MI
.getOperand(1).getReg();
9363 Register DestReg
= MI
.getOperand(0).getReg();
9364 LLT Ty
= MRI
.getType(SrcReg
), IType
= LLT::scalar(1);
9365 auto Zero
= MIRBuilder
.buildConstant(Ty
, 0).getReg(0);
9366 auto Sub
= MIRBuilder
.buildSub(Ty
, Zero
, SrcReg
).getReg(0);
9367 auto ICmp
= MIRBuilder
.buildICmp(CmpInst::ICMP_SGT
, IType
, SrcReg
, Zero
);
9368 MIRBuilder
.buildSelect(DestReg
, ICmp
, SrcReg
, Sub
);
9369 MI
.eraseFromParent();
9373 LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFAbs(MachineInstr
&MI
) {
9374 Register SrcReg
= MI
.getOperand(1).getReg();
9375 Register DstReg
= MI
.getOperand(0).getReg();
9377 LLT Ty
= MRI
.getType(DstReg
);
9380 MIRBuilder
.buildAnd(
9382 MIRBuilder
.buildConstant(
9383 Ty
, APInt::getSignedMaxValue(Ty
.getScalarSizeInBits())));
9385 MI
.eraseFromParent();
9389 LegalizerHelper::LegalizeResult
9390 LegalizerHelper::lowerVectorReduction(MachineInstr
&MI
) {
9391 Register SrcReg
= MI
.getOperand(1).getReg();
9392 LLT SrcTy
= MRI
.getType(SrcReg
);
9393 LLT DstTy
= MRI
.getType(SrcReg
);
9395 // The source could be a scalar if the IR type was <1 x sN>.
9396 if (SrcTy
.isScalar()) {
9397 if (DstTy
.getSizeInBits() > SrcTy
.getSizeInBits())
9398 return UnableToLegalize
; // FIXME: handle extension.
9399 // This can be just a plain copy.
9400 Observer
.changingInstr(MI
);
9401 MI
.setDesc(MIRBuilder
.getTII().get(TargetOpcode::COPY
));
9402 Observer
.changedInstr(MI
);
9405 return UnableToLegalize
;
9408 LegalizerHelper::LegalizeResult
LegalizerHelper::lowerVAArg(MachineInstr
&MI
) {
9409 MachineFunction
&MF
= *MI
.getMF();
9410 const DataLayout
&DL
= MIRBuilder
.getDataLayout();
9411 LLVMContext
&Ctx
= MF
.getFunction().getContext();
9412 Register ListPtr
= MI
.getOperand(1).getReg();
9413 LLT PtrTy
= MRI
.getType(ListPtr
);
9415 // LstPtr is a pointer to the head of the list. Get the address
9416 // of the head of the list.
9417 Align PtrAlignment
= DL
.getABITypeAlign(getTypeForLLT(PtrTy
, Ctx
));
9418 MachineMemOperand
*PtrLoadMMO
= MF
.getMachineMemOperand(
9419 MachinePointerInfo(), MachineMemOperand::MOLoad
, PtrTy
, PtrAlignment
);
9420 auto VAList
= MIRBuilder
.buildLoad(PtrTy
, ListPtr
, *PtrLoadMMO
).getReg(0);
9422 const Align
A(MI
.getOperand(2).getImm());
9423 LLT PtrTyAsScalarTy
= LLT::scalar(PtrTy
.getSizeInBits());
9424 if (A
> TLI
.getMinStackArgumentAlignment()) {
9426 MIRBuilder
.buildConstant(PtrTyAsScalarTy
, A
.value() - 1).getReg(0);
9427 auto AddDst
= MIRBuilder
.buildPtrAdd(PtrTy
, VAList
, AlignAmt
);
9428 auto AndDst
= MIRBuilder
.buildMaskLowPtrBits(PtrTy
, AddDst
, Log2(A
));
9429 VAList
= AndDst
.getReg(0);
9432 // Increment the pointer, VAList, to the next vaarg
9433 // The list should be bumped by the size of element in the current head of
9435 Register Dst
= MI
.getOperand(0).getReg();
9436 LLT LLTTy
= MRI
.getType(Dst
);
9437 Type
*Ty
= getTypeForLLT(LLTTy
, Ctx
);
9439 MIRBuilder
.buildConstant(PtrTyAsScalarTy
, DL
.getTypeAllocSize(Ty
));
9440 auto Succ
= MIRBuilder
.buildPtrAdd(PtrTy
, VAList
, IncAmt
);
9442 // Store the increment VAList to the legalized pointer
9443 MachineMemOperand
*StoreMMO
= MF
.getMachineMemOperand(
9444 MachinePointerInfo(), MachineMemOperand::MOStore
, PtrTy
, PtrAlignment
);
9445 MIRBuilder
.buildStore(Succ
, ListPtr
, *StoreMMO
);
9446 // Load the actual argument out of the pointer VAList
9447 Align EltAlignment
= DL
.getABITypeAlign(Ty
);
9448 MachineMemOperand
*EltLoadMMO
= MF
.getMachineMemOperand(
9449 MachinePointerInfo(), MachineMemOperand::MOLoad
, LLTTy
, EltAlignment
);
9450 MIRBuilder
.buildLoad(Dst
, VAList
, *EltLoadMMO
);
9452 MI
.eraseFromParent();
9456 static bool shouldLowerMemFuncForSize(const MachineFunction
&MF
) {
9457 // On Darwin, -Os means optimize for size without hurting performance, so
9458 // only really optimize for size when -Oz (MinSize) is used.
9459 if (MF
.getTarget().getTargetTriple().isOSDarwin())
9460 return MF
.getFunction().hasMinSize();
9461 return MF
.getFunction().hasOptSize();
9464 // Returns a list of types to use for memory op lowering in MemOps. A partial
9465 // port of findOptimalMemOpLowering in TargetLowering.
9466 static bool findGISelOptimalMemOpLowering(std::vector
<LLT
> &MemOps
,
9467 unsigned Limit
, const MemOp
&Op
,
9468 unsigned DstAS
, unsigned SrcAS
,
9469 const AttributeList
&FuncAttributes
,
9470 const TargetLowering
&TLI
) {
9471 if (Op
.isMemcpyWithFixedDstAlign() && Op
.getSrcAlign() < Op
.getDstAlign())
9474 LLT Ty
= TLI
.getOptimalMemOpLLT(Op
, FuncAttributes
);
9477 // Use the largest scalar type whose alignment constraints are satisfied.
9478 // We only need to check DstAlign here as SrcAlign is always greater or
9479 // equal to DstAlign (or zero).
9480 Ty
= LLT::scalar(64);
9481 if (Op
.isFixedDstAlign())
9482 while (Op
.getDstAlign() < Ty
.getSizeInBytes() &&
9483 !TLI
.allowsMisalignedMemoryAccesses(Ty
, DstAS
, Op
.getDstAlign()))
9484 Ty
= LLT::scalar(Ty
.getSizeInBytes());
9485 assert(Ty
.getSizeInBits() > 0 && "Could not find valid type");
9486 // FIXME: check for the largest legal type we can load/store to.
9489 unsigned NumMemOps
= 0;
9490 uint64_t Size
= Op
.size();
9492 unsigned TySize
= Ty
.getSizeInBytes();
9493 while (TySize
> Size
) {
9494 // For now, only use non-vector load / store's for the left-over pieces.
9496 // FIXME: check for mem op safety and legality of the types. Not all of
9497 // SDAGisms map cleanly to GISel concepts.
9498 if (NewTy
.isVector())
9499 NewTy
= NewTy
.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
9500 NewTy
= LLT::scalar(llvm::bit_floor(NewTy
.getSizeInBits() - 1));
9501 unsigned NewTySize
= NewTy
.getSizeInBytes();
9502 assert(NewTySize
> 0 && "Could not find appropriate type");
9504 // If the new LLT cannot cover all of the remaining bits, then consider
9505 // issuing a (or a pair of) unaligned and overlapping load / store.
9507 // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
9508 MVT VT
= getMVTForLLT(Ty
);
9509 if (NumMemOps
&& Op
.allowOverlap() && NewTySize
< Size
&&
9510 TLI
.allowsMisalignedMemoryAccesses(
9511 VT
, DstAS
, Op
.isFixedDstAlign() ? Op
.getDstAlign() : Align(1),
9512 MachineMemOperand::MONone
, &Fast
) &&
9521 if (++NumMemOps
> Limit
)
9524 MemOps
.push_back(Ty
);
9531 // Get a vectorized representation of the memset value operand, GISel edition.
9532 static Register
getMemsetValue(Register Val
, LLT Ty
, MachineIRBuilder
&MIB
) {
9533 MachineRegisterInfo
&MRI
= *MIB
.getMRI();
9534 unsigned NumBits
= Ty
.getScalarSizeInBits();
9535 auto ValVRegAndVal
= getIConstantVRegValWithLookThrough(Val
, MRI
);
9536 if (!Ty
.isVector() && ValVRegAndVal
) {
9537 APInt Scalar
= ValVRegAndVal
->Value
.trunc(8);
9538 APInt SplatVal
= APInt::getSplat(NumBits
, Scalar
);
9539 return MIB
.buildConstant(Ty
, SplatVal
).getReg(0);
9542 // Extend the byte value to the larger type, and then multiply by a magic
9543 // value 0x010101... in order to replicate it across every byte.
9544 // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
9545 if (ValVRegAndVal
&& ValVRegAndVal
->Value
== 0) {
9546 return MIB
.buildConstant(Ty
, 0).getReg(0);
9549 LLT ExtType
= Ty
.getScalarType();
9550 auto ZExt
= MIB
.buildZExtOrTrunc(ExtType
, Val
);
9552 APInt Magic
= APInt::getSplat(NumBits
, APInt(8, 0x01));
9553 auto MagicMI
= MIB
.buildConstant(ExtType
, Magic
);
9554 Val
= MIB
.buildMul(ExtType
, ZExt
, MagicMI
).getReg(0);
9557 // For vector types create a G_BUILD_VECTOR.
9559 Val
= MIB
.buildSplatBuildVector(Ty
, Val
).getReg(0);
9564 LegalizerHelper::LegalizeResult
9565 LegalizerHelper::lowerMemset(MachineInstr
&MI
, Register Dst
, Register Val
,
9566 uint64_t KnownLen
, Align Alignment
,
9568 auto &MF
= *MI
.getParent()->getParent();
9569 const auto &TLI
= *MF
.getSubtarget().getTargetLowering();
9570 auto &DL
= MF
.getDataLayout();
9571 LLVMContext
&C
= MF
.getFunction().getContext();
9573 assert(KnownLen
!= 0 && "Have a zero length memset length!");
9575 bool DstAlignCanChange
= false;
9576 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
9577 bool OptSize
= shouldLowerMemFuncForSize(MF
);
9579 MachineInstr
*FIDef
= getOpcodeDef(TargetOpcode::G_FRAME_INDEX
, Dst
, MRI
);
9580 if (FIDef
&& !MFI
.isFixedObjectIndex(FIDef
->getOperand(1).getIndex()))
9581 DstAlignCanChange
= true;
9583 unsigned Limit
= TLI
.getMaxStoresPerMemset(OptSize
);
9584 std::vector
<LLT
> MemOps
;
9586 const auto &DstMMO
= **MI
.memoperands_begin();
9587 MachinePointerInfo DstPtrInfo
= DstMMO
.getPointerInfo();
9589 auto ValVRegAndVal
= getIConstantVRegValWithLookThrough(Val
, MRI
);
9590 bool IsZeroVal
= ValVRegAndVal
&& ValVRegAndVal
->Value
== 0;
9592 if (!findGISelOptimalMemOpLowering(MemOps
, Limit
,
9593 MemOp::Set(KnownLen
, DstAlignCanChange
,
9595 /*IsZeroMemset=*/IsZeroVal
,
9596 /*IsVolatile=*/IsVolatile
),
9597 DstPtrInfo
.getAddrSpace(), ~0u,
9598 MF
.getFunction().getAttributes(), TLI
))
9599 return UnableToLegalize
;
9601 if (DstAlignCanChange
) {
9602 // Get an estimate of the type from the LLT.
9603 Type
*IRTy
= getTypeForLLT(MemOps
[0], C
);
9604 Align NewAlign
= DL
.getABITypeAlign(IRTy
);
9605 if (NewAlign
> Alignment
) {
9606 Alignment
= NewAlign
;
9607 unsigned FI
= FIDef
->getOperand(1).getIndex();
9608 // Give the stack frame object a larger alignment if needed.
9609 if (MFI
.getObjectAlign(FI
) < Alignment
)
9610 MFI
.setObjectAlignment(FI
, Alignment
);
9614 MachineIRBuilder
MIB(MI
);
9615 // Find the largest store and generate the bit pattern for it.
9616 LLT LargestTy
= MemOps
[0];
9617 for (unsigned i
= 1; i
< MemOps
.size(); i
++)
9618 if (MemOps
[i
].getSizeInBits() > LargestTy
.getSizeInBits())
9619 LargestTy
= MemOps
[i
];
9621 // The memset stored value is always defined as an s8, so in order to make it
9622 // work with larger store types we need to repeat the bit pattern across the
9624 Register MemSetValue
= getMemsetValue(Val
, LargestTy
, MIB
);
9627 return UnableToLegalize
;
9629 // Generate the stores. For each store type in the list, we generate the
9630 // matching store of that type to the destination address.
9631 LLT PtrTy
= MRI
.getType(Dst
);
9632 unsigned DstOff
= 0;
9633 unsigned Size
= KnownLen
;
9634 for (unsigned I
= 0; I
< MemOps
.size(); I
++) {
9636 unsigned TySize
= Ty
.getSizeInBytes();
9637 if (TySize
> Size
) {
9638 // Issuing an unaligned load / store pair that overlaps with the previous
9639 // pair. Adjust the offset accordingly.
9640 assert(I
== MemOps
.size() - 1 && I
!= 0);
9641 DstOff
-= TySize
- Size
;
9644 // If this store is smaller than the largest store see whether we can get
9645 // the smaller value for free with a truncate.
9646 Register Value
= MemSetValue
;
9647 if (Ty
.getSizeInBits() < LargestTy
.getSizeInBits()) {
9648 MVT VT
= getMVTForLLT(Ty
);
9649 MVT LargestVT
= getMVTForLLT(LargestTy
);
9650 if (!LargestTy
.isVector() && !Ty
.isVector() &&
9651 TLI
.isTruncateFree(LargestVT
, VT
))
9652 Value
= MIB
.buildTrunc(Ty
, MemSetValue
).getReg(0);
9654 Value
= getMemsetValue(Val
, Ty
, MIB
);
9656 return UnableToLegalize
;
9659 auto *StoreMMO
= MF
.getMachineMemOperand(&DstMMO
, DstOff
, Ty
);
9664 MIB
.buildConstant(LLT::scalar(PtrTy
.getSizeInBits()), DstOff
);
9665 Ptr
= MIB
.buildPtrAdd(PtrTy
, Dst
, Offset
).getReg(0);
9668 MIB
.buildStore(Value
, Ptr
, *StoreMMO
);
9669 DstOff
+= Ty
.getSizeInBytes();
9673 MI
.eraseFromParent();
9677 LegalizerHelper::LegalizeResult
9678 LegalizerHelper::lowerMemcpyInline(MachineInstr
&MI
) {
9679 assert(MI
.getOpcode() == TargetOpcode::G_MEMCPY_INLINE
);
9681 auto [Dst
, Src
, Len
] = MI
.getFirst3Regs();
9683 const auto *MMOIt
= MI
.memoperands_begin();
9684 const MachineMemOperand
*MemOp
= *MMOIt
;
9685 bool IsVolatile
= MemOp
->isVolatile();
9687 // See if this is a constant length copy
9688 auto LenVRegAndVal
= getIConstantVRegValWithLookThrough(Len
, MRI
);
9689 // FIXME: support dynamically sized G_MEMCPY_INLINE
9690 assert(LenVRegAndVal
&&
9691 "inline memcpy with dynamic size is not yet supported");
9692 uint64_t KnownLen
= LenVRegAndVal
->Value
.getZExtValue();
9693 if (KnownLen
== 0) {
9694 MI
.eraseFromParent();
9698 const auto &DstMMO
= **MI
.memoperands_begin();
9699 const auto &SrcMMO
= **std::next(MI
.memoperands_begin());
9700 Align DstAlign
= DstMMO
.getBaseAlign();
9701 Align SrcAlign
= SrcMMO
.getBaseAlign();
9703 return lowerMemcpyInline(MI
, Dst
, Src
, KnownLen
, DstAlign
, SrcAlign
,
9707 LegalizerHelper::LegalizeResult
9708 LegalizerHelper::lowerMemcpyInline(MachineInstr
&MI
, Register Dst
, Register Src
,
9709 uint64_t KnownLen
, Align DstAlign
,
9710 Align SrcAlign
, bool IsVolatile
) {
9711 assert(MI
.getOpcode() == TargetOpcode::G_MEMCPY_INLINE
);
9712 return lowerMemcpy(MI
, Dst
, Src
, KnownLen
,
9713 std::numeric_limits
<uint64_t>::max(), DstAlign
, SrcAlign
,
9717 LegalizerHelper::LegalizeResult
9718 LegalizerHelper::lowerMemcpy(MachineInstr
&MI
, Register Dst
, Register Src
,
9719 uint64_t KnownLen
, uint64_t Limit
, Align DstAlign
,
9720 Align SrcAlign
, bool IsVolatile
) {
9721 auto &MF
= *MI
.getParent()->getParent();
9722 const auto &TLI
= *MF
.getSubtarget().getTargetLowering();
9723 auto &DL
= MF
.getDataLayout();
9724 LLVMContext
&C
= MF
.getFunction().getContext();
9726 assert(KnownLen
!= 0 && "Have a zero length memcpy length!");
9728 bool DstAlignCanChange
= false;
9729 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
9730 Align Alignment
= std::min(DstAlign
, SrcAlign
);
9732 MachineInstr
*FIDef
= getOpcodeDef(TargetOpcode::G_FRAME_INDEX
, Dst
, MRI
);
9733 if (FIDef
&& !MFI
.isFixedObjectIndex(FIDef
->getOperand(1).getIndex()))
9734 DstAlignCanChange
= true;
9736 // FIXME: infer better src pointer alignment like SelectionDAG does here.
9737 // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
9738 // if the memcpy is in a tail call position.
9740 std::vector
<LLT
> MemOps
;
9742 const auto &DstMMO
= **MI
.memoperands_begin();
9743 const auto &SrcMMO
= **std::next(MI
.memoperands_begin());
9744 MachinePointerInfo DstPtrInfo
= DstMMO
.getPointerInfo();
9745 MachinePointerInfo SrcPtrInfo
= SrcMMO
.getPointerInfo();
9747 if (!findGISelOptimalMemOpLowering(
9749 MemOp::Copy(KnownLen
, DstAlignCanChange
, Alignment
, SrcAlign
,
9751 DstPtrInfo
.getAddrSpace(), SrcPtrInfo
.getAddrSpace(),
9752 MF
.getFunction().getAttributes(), TLI
))
9753 return UnableToLegalize
;
9755 if (DstAlignCanChange
) {
9756 // Get an estimate of the type from the LLT.
9757 Type
*IRTy
= getTypeForLLT(MemOps
[0], C
);
9758 Align NewAlign
= DL
.getABITypeAlign(IRTy
);
9760 // Don't promote to an alignment that would require dynamic stack
9762 const TargetRegisterInfo
*TRI
= MF
.getSubtarget().getRegisterInfo();
9763 if (!TRI
->hasStackRealignment(MF
))
9764 if (MaybeAlign StackAlign
= DL
.getStackAlignment())
9765 NewAlign
= std::min(NewAlign
, *StackAlign
);
9767 if (NewAlign
> Alignment
) {
9768 Alignment
= NewAlign
;
9769 unsigned FI
= FIDef
->getOperand(1).getIndex();
9770 // Give the stack frame object a larger alignment if needed.
9771 if (MFI
.getObjectAlign(FI
) < Alignment
)
9772 MFI
.setObjectAlignment(FI
, Alignment
);
9776 LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI
<< " into loads & stores\n");
9778 MachineIRBuilder
MIB(MI
);
9779 // Now we need to emit a pair of load and stores for each of the types we've
9780 // collected. I.e. for each type, generate a load from the source pointer of
9781 // that type width, and then generate a corresponding store to the dest buffer
9782 // of that value loaded. This can result in a sequence of loads and stores
9783 // mixed types, depending on what the target specifies as good types to use.
9784 unsigned CurrOffset
= 0;
9785 unsigned Size
= KnownLen
;
9786 for (auto CopyTy
: MemOps
) {
9787 // Issuing an unaligned load / store pair that overlaps with the previous
9788 // pair. Adjust the offset accordingly.
9789 if (CopyTy
.getSizeInBytes() > Size
)
9790 CurrOffset
-= CopyTy
.getSizeInBytes() - Size
;
9792 // Construct MMOs for the accesses.
9794 MF
.getMachineMemOperand(&SrcMMO
, CurrOffset
, CopyTy
.getSizeInBytes());
9796 MF
.getMachineMemOperand(&DstMMO
, CurrOffset
, CopyTy
.getSizeInBytes());
9799 Register LoadPtr
= Src
;
9801 if (CurrOffset
!= 0) {
9802 LLT SrcTy
= MRI
.getType(Src
);
9803 Offset
= MIB
.buildConstant(LLT::scalar(SrcTy
.getSizeInBits()), CurrOffset
)
9805 LoadPtr
= MIB
.buildPtrAdd(SrcTy
, Src
, Offset
).getReg(0);
9807 auto LdVal
= MIB
.buildLoad(CopyTy
, LoadPtr
, *LoadMMO
);
9809 // Create the store.
9810 Register StorePtr
= Dst
;
9811 if (CurrOffset
!= 0) {
9812 LLT DstTy
= MRI
.getType(Dst
);
9813 StorePtr
= MIB
.buildPtrAdd(DstTy
, Dst
, Offset
).getReg(0);
9815 MIB
.buildStore(LdVal
, StorePtr
, *StoreMMO
);
9816 CurrOffset
+= CopyTy
.getSizeInBytes();
9817 Size
-= CopyTy
.getSizeInBytes();
9820 MI
.eraseFromParent();
9824 LegalizerHelper::LegalizeResult
9825 LegalizerHelper::lowerMemmove(MachineInstr
&MI
, Register Dst
, Register Src
,
9826 uint64_t KnownLen
, Align DstAlign
, Align SrcAlign
,
9828 auto &MF
= *MI
.getParent()->getParent();
9829 const auto &TLI
= *MF
.getSubtarget().getTargetLowering();
9830 auto &DL
= MF
.getDataLayout();
9831 LLVMContext
&C
= MF
.getFunction().getContext();
9833 assert(KnownLen
!= 0 && "Have a zero length memmove length!");
9835 bool DstAlignCanChange
= false;
9836 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
9837 bool OptSize
= shouldLowerMemFuncForSize(MF
);
9838 Align Alignment
= std::min(DstAlign
, SrcAlign
);
9840 MachineInstr
*FIDef
= getOpcodeDef(TargetOpcode::G_FRAME_INDEX
, Dst
, MRI
);
9841 if (FIDef
&& !MFI
.isFixedObjectIndex(FIDef
->getOperand(1).getIndex()))
9842 DstAlignCanChange
= true;
9844 unsigned Limit
= TLI
.getMaxStoresPerMemmove(OptSize
);
9845 std::vector
<LLT
> MemOps
;
9847 const auto &DstMMO
= **MI
.memoperands_begin();
9848 const auto &SrcMMO
= **std::next(MI
.memoperands_begin());
9849 MachinePointerInfo DstPtrInfo
= DstMMO
.getPointerInfo();
9850 MachinePointerInfo SrcPtrInfo
= SrcMMO
.getPointerInfo();
9852 // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
9853 // to a bug in it's findOptimalMemOpLowering implementation. For now do the
9855 if (!findGISelOptimalMemOpLowering(
9857 MemOp::Copy(KnownLen
, DstAlignCanChange
, Alignment
, SrcAlign
,
9858 /*IsVolatile*/ true),
9859 DstPtrInfo
.getAddrSpace(), SrcPtrInfo
.getAddrSpace(),
9860 MF
.getFunction().getAttributes(), TLI
))
9861 return UnableToLegalize
;
9863 if (DstAlignCanChange
) {
9864 // Get an estimate of the type from the LLT.
9865 Type
*IRTy
= getTypeForLLT(MemOps
[0], C
);
9866 Align NewAlign
= DL
.getABITypeAlign(IRTy
);
9868 // Don't promote to an alignment that would require dynamic stack
9870 const TargetRegisterInfo
*TRI
= MF
.getSubtarget().getRegisterInfo();
9871 if (!TRI
->hasStackRealignment(MF
))
9872 if (MaybeAlign StackAlign
= DL
.getStackAlignment())
9873 NewAlign
= std::min(NewAlign
, *StackAlign
);
9875 if (NewAlign
> Alignment
) {
9876 Alignment
= NewAlign
;
9877 unsigned FI
= FIDef
->getOperand(1).getIndex();
9878 // Give the stack frame object a larger alignment if needed.
9879 if (MFI
.getObjectAlign(FI
) < Alignment
)
9880 MFI
.setObjectAlignment(FI
, Alignment
);
9884 LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI
<< " into loads & stores\n");
9886 MachineIRBuilder
MIB(MI
);
9887 // Memmove requires that we perform the loads first before issuing the stores.
9888 // Apart from that, this loop is pretty much doing the same thing as the
9889 // memcpy codegen function.
9890 unsigned CurrOffset
= 0;
9891 SmallVector
<Register
, 16> LoadVals
;
9892 for (auto CopyTy
: MemOps
) {
9893 // Construct MMO for the load.
9895 MF
.getMachineMemOperand(&SrcMMO
, CurrOffset
, CopyTy
.getSizeInBytes());
9898 Register LoadPtr
= Src
;
9899 if (CurrOffset
!= 0) {
9900 LLT SrcTy
= MRI
.getType(Src
);
9902 MIB
.buildConstant(LLT::scalar(SrcTy
.getSizeInBits()), CurrOffset
);
9903 LoadPtr
= MIB
.buildPtrAdd(SrcTy
, Src
, Offset
).getReg(0);
9905 LoadVals
.push_back(MIB
.buildLoad(CopyTy
, LoadPtr
, *LoadMMO
).getReg(0));
9906 CurrOffset
+= CopyTy
.getSizeInBytes();
9910 for (unsigned I
= 0; I
< MemOps
.size(); ++I
) {
9911 LLT CopyTy
= MemOps
[I
];
9912 // Now store the values loaded.
9914 MF
.getMachineMemOperand(&DstMMO
, CurrOffset
, CopyTy
.getSizeInBytes());
9916 Register StorePtr
= Dst
;
9917 if (CurrOffset
!= 0) {
9918 LLT DstTy
= MRI
.getType(Dst
);
9920 MIB
.buildConstant(LLT::scalar(DstTy
.getSizeInBits()), CurrOffset
);
9921 StorePtr
= MIB
.buildPtrAdd(DstTy
, Dst
, Offset
).getReg(0);
9923 MIB
.buildStore(LoadVals
[I
], StorePtr
, *StoreMMO
);
9924 CurrOffset
+= CopyTy
.getSizeInBytes();
9926 MI
.eraseFromParent();
9930 LegalizerHelper::LegalizeResult
9931 LegalizerHelper::lowerMemCpyFamily(MachineInstr
&MI
, unsigned MaxLen
) {
9932 const unsigned Opc
= MI
.getOpcode();
9933 // This combine is fairly complex so it's not written with a separate
9934 // matcher function.
9935 assert((Opc
== TargetOpcode::G_MEMCPY
|| Opc
== TargetOpcode::G_MEMMOVE
||
9936 Opc
== TargetOpcode::G_MEMSET
) &&
9937 "Expected memcpy like instruction");
9939 auto MMOIt
= MI
.memoperands_begin();
9940 const MachineMemOperand
*MemOp
= *MMOIt
;
9942 Align DstAlign
= MemOp
->getBaseAlign();
9944 auto [Dst
, Src
, Len
] = MI
.getFirst3Regs();
9946 if (Opc
!= TargetOpcode::G_MEMSET
) {
9947 assert(MMOIt
!= MI
.memoperands_end() && "Expected a second MMO on MI");
9949 SrcAlign
= MemOp
->getBaseAlign();
9952 // See if this is a constant length copy
9953 auto LenVRegAndVal
= getIConstantVRegValWithLookThrough(Len
, MRI
);
9955 return UnableToLegalize
;
9956 uint64_t KnownLen
= LenVRegAndVal
->Value
.getZExtValue();
9958 if (KnownLen
== 0) {
9959 MI
.eraseFromParent();
9963 bool IsVolatile
= MemOp
->isVolatile();
9964 if (Opc
== TargetOpcode::G_MEMCPY_INLINE
)
9965 return lowerMemcpyInline(MI
, Dst
, Src
, KnownLen
, DstAlign
, SrcAlign
,
9968 // Don't try to optimize volatile.
9970 return UnableToLegalize
;
9972 if (MaxLen
&& KnownLen
> MaxLen
)
9973 return UnableToLegalize
;
9975 if (Opc
== TargetOpcode::G_MEMCPY
) {
9976 auto &MF
= *MI
.getParent()->getParent();
9977 const auto &TLI
= *MF
.getSubtarget().getTargetLowering();
9978 bool OptSize
= shouldLowerMemFuncForSize(MF
);
9979 uint64_t Limit
= TLI
.getMaxStoresPerMemcpy(OptSize
);
9980 return lowerMemcpy(MI
, Dst
, Src
, KnownLen
, Limit
, DstAlign
, SrcAlign
,
9983 if (Opc
== TargetOpcode::G_MEMMOVE
)
9984 return lowerMemmove(MI
, Dst
, Src
, KnownLen
, DstAlign
, SrcAlign
, IsVolatile
);
9985 if (Opc
== TargetOpcode::G_MEMSET
)
9986 return lowerMemset(MI
, Dst
, Src
, KnownLen
, DstAlign
, IsVolatile
);
9987 return UnableToLegalize
;