1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// \file This file implements the LegalizerHelper class to legalize
10 /// individual instructions and the LegalizeMachineIR wrapper pass for the
11 /// primary legalization.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
16 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
17 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/TargetFrameLowering.h"
21 #include "llvm/CodeGen/TargetInstrInfo.h"
22 #include "llvm/CodeGen/TargetLowering.h"
23 #include "llvm/CodeGen/TargetSubtargetInfo.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Support/raw_ostream.h"
28 #define DEBUG_TYPE "legalizer"
31 using namespace LegalizeActions
;
33 /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
35 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
36 /// with any leftover piece as type \p LeftoverTy
38 /// Returns -1 in the first element of the pair if the breakdown is not
40 static std::pair
<int, int>
41 getNarrowTypeBreakDown(LLT OrigTy
, LLT NarrowTy
, LLT
&LeftoverTy
) {
42 assert(!LeftoverTy
.isValid() && "this is an out argument");
44 unsigned Size
= OrigTy
.getSizeInBits();
45 unsigned NarrowSize
= NarrowTy
.getSizeInBits();
46 unsigned NumParts
= Size
/ NarrowSize
;
47 unsigned LeftoverSize
= Size
- NumParts
* NarrowSize
;
48 assert(Size
> NarrowSize
);
50 if (LeftoverSize
== 0)
53 if (NarrowTy
.isVector()) {
54 unsigned EltSize
= OrigTy
.getScalarSizeInBits();
55 if (LeftoverSize
% EltSize
!= 0)
57 LeftoverTy
= LLT::scalarOrVector(LeftoverSize
/ EltSize
, EltSize
);
59 LeftoverTy
= LLT::scalar(LeftoverSize
);
62 int NumLeftover
= LeftoverSize
/ LeftoverTy
.getSizeInBits();
63 return std::make_pair(NumParts
, NumLeftover
);
66 LegalizerHelper::LegalizerHelper(MachineFunction
&MF
,
67 GISelChangeObserver
&Observer
,
68 MachineIRBuilder
&Builder
)
69 : MIRBuilder(Builder
), MRI(MF
.getRegInfo()),
70 LI(*MF
.getSubtarget().getLegalizerInfo()), Observer(Observer
) {
72 MIRBuilder
.setChangeObserver(Observer
);
75 LegalizerHelper::LegalizerHelper(MachineFunction
&MF
, const LegalizerInfo
&LI
,
76 GISelChangeObserver
&Observer
,
78 : MIRBuilder(B
), MRI(MF
.getRegInfo()), LI(LI
), Observer(Observer
) {
80 MIRBuilder
.setChangeObserver(Observer
);
82 LegalizerHelper::LegalizeResult
83 LegalizerHelper::legalizeInstrStep(MachineInstr
&MI
) {
84 LLVM_DEBUG(dbgs() << "Legalizing: "; MI
.print(dbgs()));
86 if (MI
.getOpcode() == TargetOpcode::G_INTRINSIC
||
87 MI
.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
)
88 return LI
.legalizeIntrinsic(MI
, MRI
, MIRBuilder
) ? Legalized
90 auto Step
= LI
.getAction(MI
, MRI
);
91 switch (Step
.Action
) {
93 LLVM_DEBUG(dbgs() << ".. Already legal\n");
96 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
99 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
100 return narrowScalar(MI
, Step
.TypeIdx
, Step
.NewType
);
102 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
103 return widenScalar(MI
, Step
.TypeIdx
, Step
.NewType
);
105 LLVM_DEBUG(dbgs() << ".. Lower\n");
106 return lower(MI
, Step
.TypeIdx
, Step
.NewType
);
108 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
109 return fewerElementsVector(MI
, Step
.TypeIdx
, Step
.NewType
);
111 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
112 return moreElementsVector(MI
, Step
.TypeIdx
, Step
.NewType
);
114 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
115 return LI
.legalizeCustom(MI
, MRI
, MIRBuilder
, Observer
) ? Legalized
118 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
119 return UnableToLegalize
;
123 void LegalizerHelper::extractParts(Register Reg
, LLT Ty
, int NumParts
,
124 SmallVectorImpl
<Register
> &VRegs
) {
125 for (int i
= 0; i
< NumParts
; ++i
)
126 VRegs
.push_back(MRI
.createGenericVirtualRegister(Ty
));
127 MIRBuilder
.buildUnmerge(VRegs
, Reg
);
130 bool LegalizerHelper::extractParts(Register Reg
, LLT RegTy
,
131 LLT MainTy
, LLT
&LeftoverTy
,
132 SmallVectorImpl
<Register
> &VRegs
,
133 SmallVectorImpl
<Register
> &LeftoverRegs
) {
134 assert(!LeftoverTy
.isValid() && "this is an out argument");
136 unsigned RegSize
= RegTy
.getSizeInBits();
137 unsigned MainSize
= MainTy
.getSizeInBits();
138 unsigned NumParts
= RegSize
/ MainSize
;
139 unsigned LeftoverSize
= RegSize
- NumParts
* MainSize
;
141 // Use an unmerge when possible.
142 if (LeftoverSize
== 0) {
143 for (unsigned I
= 0; I
< NumParts
; ++I
)
144 VRegs
.push_back(MRI
.createGenericVirtualRegister(MainTy
));
145 MIRBuilder
.buildUnmerge(VRegs
, Reg
);
149 if (MainTy
.isVector()) {
150 unsigned EltSize
= MainTy
.getScalarSizeInBits();
151 if (LeftoverSize
% EltSize
!= 0)
153 LeftoverTy
= LLT::scalarOrVector(LeftoverSize
/ EltSize
, EltSize
);
155 LeftoverTy
= LLT::scalar(LeftoverSize
);
158 // For irregular sizes, extract the individual parts.
159 for (unsigned I
= 0; I
!= NumParts
; ++I
) {
160 Register NewReg
= MRI
.createGenericVirtualRegister(MainTy
);
161 VRegs
.push_back(NewReg
);
162 MIRBuilder
.buildExtract(NewReg
, Reg
, MainSize
* I
);
165 for (unsigned Offset
= MainSize
* NumParts
; Offset
< RegSize
;
166 Offset
+= LeftoverSize
) {
167 Register NewReg
= MRI
.createGenericVirtualRegister(LeftoverTy
);
168 LeftoverRegs
.push_back(NewReg
);
169 MIRBuilder
.buildExtract(NewReg
, Reg
, Offset
);
175 static LLT
getGCDType(LLT OrigTy
, LLT TargetTy
) {
176 if (OrigTy
.isVector() && TargetTy
.isVector()) {
177 assert(OrigTy
.getElementType() == TargetTy
.getElementType());
178 int GCD
= greatestCommonDivisor(OrigTy
.getNumElements(),
179 TargetTy
.getNumElements());
180 return LLT::scalarOrVector(GCD
, OrigTy
.getElementType());
183 if (OrigTy
.isVector() && !TargetTy
.isVector()) {
184 assert(OrigTy
.getElementType() == TargetTy
);
188 assert(!OrigTy
.isVector() && !TargetTy
.isVector());
190 int GCD
= greatestCommonDivisor(OrigTy
.getSizeInBits(),
191 TargetTy
.getSizeInBits());
192 return LLT::scalar(GCD
);
195 void LegalizerHelper::insertParts(Register DstReg
,
196 LLT ResultTy
, LLT PartTy
,
197 ArrayRef
<Register
> PartRegs
,
199 ArrayRef
<Register
> LeftoverRegs
) {
200 if (!LeftoverTy
.isValid()) {
201 assert(LeftoverRegs
.empty());
203 if (!ResultTy
.isVector()) {
204 MIRBuilder
.buildMerge(DstReg
, PartRegs
);
208 if (PartTy
.isVector())
209 MIRBuilder
.buildConcatVectors(DstReg
, PartRegs
);
211 MIRBuilder
.buildBuildVector(DstReg
, PartRegs
);
215 unsigned PartSize
= PartTy
.getSizeInBits();
216 unsigned LeftoverPartSize
= LeftoverTy
.getSizeInBits();
218 Register CurResultReg
= MRI
.createGenericVirtualRegister(ResultTy
);
219 MIRBuilder
.buildUndef(CurResultReg
);
222 for (Register PartReg
: PartRegs
) {
223 Register NewResultReg
= MRI
.createGenericVirtualRegister(ResultTy
);
224 MIRBuilder
.buildInsert(NewResultReg
, CurResultReg
, PartReg
, Offset
);
225 CurResultReg
= NewResultReg
;
229 for (unsigned I
= 0, E
= LeftoverRegs
.size(); I
!= E
; ++I
) {
230 // Use the original output register for the final insert to avoid a copy.
231 Register NewResultReg
= (I
+ 1 == E
) ?
232 DstReg
: MRI
.createGenericVirtualRegister(ResultTy
);
234 MIRBuilder
.buildInsert(NewResultReg
, CurResultReg
, LeftoverRegs
[I
], Offset
);
235 CurResultReg
= NewResultReg
;
236 Offset
+= LeftoverPartSize
;
240 static RTLIB::Libcall
getRTLibDesc(unsigned Opcode
, unsigned Size
) {
242 case TargetOpcode::G_SDIV
:
243 assert((Size
== 32 || Size
== 64 || Size
== 128) && "Unsupported size");
246 return RTLIB::SDIV_I32
;
248 return RTLIB::SDIV_I64
;
250 return RTLIB::SDIV_I128
;
252 llvm_unreachable("unexpected size");
254 case TargetOpcode::G_UDIV
:
255 assert((Size
== 32 || Size
== 64 || Size
== 128) && "Unsupported size");
258 return RTLIB::UDIV_I32
;
260 return RTLIB::UDIV_I64
;
262 return RTLIB::UDIV_I128
;
264 llvm_unreachable("unexpected size");
266 case TargetOpcode::G_SREM
:
267 assert((Size
== 32 || Size
== 64) && "Unsupported size");
268 return Size
== 64 ? RTLIB::SREM_I64
: RTLIB::SREM_I32
;
269 case TargetOpcode::G_UREM
:
270 assert((Size
== 32 || Size
== 64) && "Unsupported size");
271 return Size
== 64 ? RTLIB::UREM_I64
: RTLIB::UREM_I32
;
272 case TargetOpcode::G_CTLZ_ZERO_UNDEF
:
273 assert(Size
== 32 && "Unsupported size");
274 return RTLIB::CTLZ_I32
;
275 case TargetOpcode::G_FADD
:
276 assert((Size
== 32 || Size
== 64) && "Unsupported size");
277 return Size
== 64 ? RTLIB::ADD_F64
: RTLIB::ADD_F32
;
278 case TargetOpcode::G_FSUB
:
279 assert((Size
== 32 || Size
== 64) && "Unsupported size");
280 return Size
== 64 ? RTLIB::SUB_F64
: RTLIB::SUB_F32
;
281 case TargetOpcode::G_FMUL
:
282 assert((Size
== 32 || Size
== 64) && "Unsupported size");
283 return Size
== 64 ? RTLIB::MUL_F64
: RTLIB::MUL_F32
;
284 case TargetOpcode::G_FDIV
:
285 assert((Size
== 32 || Size
== 64) && "Unsupported size");
286 return Size
== 64 ? RTLIB::DIV_F64
: RTLIB::DIV_F32
;
287 case TargetOpcode::G_FEXP
:
288 assert((Size
== 32 || Size
== 64) && "Unsupported size");
289 return Size
== 64 ? RTLIB::EXP_F64
: RTLIB::EXP_F32
;
290 case TargetOpcode::G_FEXP2
:
291 assert((Size
== 32 || Size
== 64) && "Unsupported size");
292 return Size
== 64 ? RTLIB::EXP2_F64
: RTLIB::EXP2_F32
;
293 case TargetOpcode::G_FREM
:
294 return Size
== 64 ? RTLIB::REM_F64
: RTLIB::REM_F32
;
295 case TargetOpcode::G_FPOW
:
296 return Size
== 64 ? RTLIB::POW_F64
: RTLIB::POW_F32
;
297 case TargetOpcode::G_FMA
:
298 assert((Size
== 32 || Size
== 64) && "Unsupported size");
299 return Size
== 64 ? RTLIB::FMA_F64
: RTLIB::FMA_F32
;
300 case TargetOpcode::G_FSIN
:
301 assert((Size
== 32 || Size
== 64 || Size
== 128) && "Unsupported size");
302 return Size
== 128 ? RTLIB::SIN_F128
303 : Size
== 64 ? RTLIB::SIN_F64
: RTLIB::SIN_F32
;
304 case TargetOpcode::G_FCOS
:
305 assert((Size
== 32 || Size
== 64 || Size
== 128) && "Unsupported size");
306 return Size
== 128 ? RTLIB::COS_F128
307 : Size
== 64 ? RTLIB::COS_F64
: RTLIB::COS_F32
;
308 case TargetOpcode::G_FLOG10
:
309 assert((Size
== 32 || Size
== 64 || Size
== 128) && "Unsupported size");
310 return Size
== 128 ? RTLIB::LOG10_F128
311 : Size
== 64 ? RTLIB::LOG10_F64
: RTLIB::LOG10_F32
;
312 case TargetOpcode::G_FLOG
:
313 assert((Size
== 32 || Size
== 64 || Size
== 128) && "Unsupported size");
314 return Size
== 128 ? RTLIB::LOG_F128
315 : Size
== 64 ? RTLIB::LOG_F64
: RTLIB::LOG_F32
;
316 case TargetOpcode::G_FLOG2
:
317 assert((Size
== 32 || Size
== 64 || Size
== 128) && "Unsupported size");
318 return Size
== 128 ? RTLIB::LOG2_F128
319 : Size
== 64 ? RTLIB::LOG2_F64
: RTLIB::LOG2_F32
;
320 case TargetOpcode::G_FCEIL
:
321 assert((Size
== 32 || Size
== 64) && "Unsupported size");
322 return Size
== 64 ? RTLIB::CEIL_F64
: RTLIB::CEIL_F32
;
323 case TargetOpcode::G_FFLOOR
:
324 assert((Size
== 32 || Size
== 64) && "Unsupported size");
325 return Size
== 64 ? RTLIB::FLOOR_F64
: RTLIB::FLOOR_F32
;
327 llvm_unreachable("Unknown libcall function");
330 /// True if an instruction is in tail position in its caller. Intended for
331 /// legalizing libcalls as tail calls when possible.
332 static bool isLibCallInTailPosition(MachineInstr
&MI
) {
333 const Function
&F
= MI
.getParent()->getParent()->getFunction();
335 // Conservatively require the attributes of the call to match those of
336 // the return. Ignore NoAlias and NonNull because they don't affect the
338 AttributeList CallerAttrs
= F
.getAttributes();
339 if (AttrBuilder(CallerAttrs
, AttributeList::ReturnIndex
)
340 .removeAttribute(Attribute::NoAlias
)
341 .removeAttribute(Attribute::NonNull
)
345 // It's not safe to eliminate the sign / zero extension of the return value.
346 if (CallerAttrs
.hasAttribute(AttributeList::ReturnIndex
, Attribute::ZExt
) ||
347 CallerAttrs
.hasAttribute(AttributeList::ReturnIndex
, Attribute::SExt
))
350 // Only tail call if the following instruction is a standard return.
351 auto &TII
= *MI
.getMF()->getSubtarget().getInstrInfo();
352 MachineInstr
*Next
= MI
.getNextNode();
353 if (!Next
|| TII
.isTailCall(*Next
) || !Next
->isReturn())
359 LegalizerHelper::LegalizeResult
360 llvm::createLibcall(MachineIRBuilder
&MIRBuilder
, RTLIB::Libcall Libcall
,
361 const CallLowering::ArgInfo
&Result
,
362 ArrayRef
<CallLowering::ArgInfo
> Args
) {
363 auto &CLI
= *MIRBuilder
.getMF().getSubtarget().getCallLowering();
364 auto &TLI
= *MIRBuilder
.getMF().getSubtarget().getTargetLowering();
365 const char *Name
= TLI
.getLibcallName(Libcall
);
367 CallLowering::CallLoweringInfo Info
;
368 Info
.CallConv
= TLI
.getLibcallCallingConv(Libcall
);
369 Info
.Callee
= MachineOperand::CreateES(Name
);
370 Info
.OrigRet
= Result
;
371 std::copy(Args
.begin(), Args
.end(), std::back_inserter(Info
.OrigArgs
));
372 if (!CLI
.lowerCall(MIRBuilder
, Info
))
373 return LegalizerHelper::UnableToLegalize
;
375 return LegalizerHelper::Legalized
;
378 // Useful for libcalls where all operands have the same type.
379 static LegalizerHelper::LegalizeResult
380 simpleLibcall(MachineInstr
&MI
, MachineIRBuilder
&MIRBuilder
, unsigned Size
,
382 auto Libcall
= getRTLibDesc(MI
.getOpcode(), Size
);
384 SmallVector
<CallLowering::ArgInfo
, 3> Args
;
385 for (unsigned i
= 1; i
< MI
.getNumOperands(); i
++)
386 Args
.push_back({MI
.getOperand(i
).getReg(), OpType
});
387 return createLibcall(MIRBuilder
, Libcall
, {MI
.getOperand(0).getReg(), OpType
},
391 LegalizerHelper::LegalizeResult
392 llvm::createMemLibcall(MachineIRBuilder
&MIRBuilder
, MachineRegisterInfo
&MRI
,
394 assert(MI
.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
);
395 auto &Ctx
= MIRBuilder
.getMF().getFunction().getContext();
397 SmallVector
<CallLowering::ArgInfo
, 3> Args
;
398 for (unsigned i
= 1; i
< MI
.getNumOperands(); i
++) {
399 Register Reg
= MI
.getOperand(i
).getReg();
401 // Need derive an IR type for call lowering.
402 LLT OpLLT
= MRI
.getType(Reg
);
403 Type
*OpTy
= nullptr;
404 if (OpLLT
.isPointer())
405 OpTy
= Type::getInt8PtrTy(Ctx
, OpLLT
.getAddressSpace());
407 OpTy
= IntegerType::get(Ctx
, OpLLT
.getSizeInBits());
408 Args
.push_back({Reg
, OpTy
});
411 auto &CLI
= *MIRBuilder
.getMF().getSubtarget().getCallLowering();
412 auto &TLI
= *MIRBuilder
.getMF().getSubtarget().getTargetLowering();
413 Intrinsic::ID ID
= MI
.getOperand(0).getIntrinsicID();
414 RTLIB::Libcall RTLibcall
;
416 case Intrinsic::memcpy
:
417 RTLibcall
= RTLIB::MEMCPY
;
419 case Intrinsic::memset
:
420 RTLibcall
= RTLIB::MEMSET
;
422 case Intrinsic::memmove
:
423 RTLibcall
= RTLIB::MEMMOVE
;
426 return LegalizerHelper::UnableToLegalize
;
428 const char *Name
= TLI
.getLibcallName(RTLibcall
);
430 MIRBuilder
.setInstr(MI
);
432 CallLowering::CallLoweringInfo Info
;
433 Info
.CallConv
= TLI
.getLibcallCallingConv(RTLibcall
);
434 Info
.Callee
= MachineOperand::CreateES(Name
);
435 Info
.OrigRet
= CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx
));
436 Info
.IsTailCall
= isLibCallInTailPosition(MI
);
438 std::copy(Args
.begin(), Args
.end(), std::back_inserter(Info
.OrigArgs
));
439 if (!CLI
.lowerCall(MIRBuilder
, Info
))
440 return LegalizerHelper::UnableToLegalize
;
442 if (Info
.LoweredTailCall
) {
443 assert(Info
.IsTailCall
&& "Lowered tail call when it wasn't a tail call?");
444 // We must have a return following the call to get past
445 // isLibCallInTailPosition.
446 assert(MI
.getNextNode() && MI
.getNextNode()->isReturn() &&
447 "Expected instr following MI to be a return?");
449 // We lowered a tail call, so the call is now the return from the block.
450 // Delete the old return.
451 MI
.getNextNode()->eraseFromParent();
454 return LegalizerHelper::Legalized
;
457 static RTLIB::Libcall
getConvRTLibDesc(unsigned Opcode
, Type
*ToType
,
459 auto ToMVT
= MVT::getVT(ToType
);
460 auto FromMVT
= MVT::getVT(FromType
);
463 case TargetOpcode::G_FPEXT
:
464 return RTLIB::getFPEXT(FromMVT
, ToMVT
);
465 case TargetOpcode::G_FPTRUNC
:
466 return RTLIB::getFPROUND(FromMVT
, ToMVT
);
467 case TargetOpcode::G_FPTOSI
:
468 return RTLIB::getFPTOSINT(FromMVT
, ToMVT
);
469 case TargetOpcode::G_FPTOUI
:
470 return RTLIB::getFPTOUINT(FromMVT
, ToMVT
);
471 case TargetOpcode::G_SITOFP
:
472 return RTLIB::getSINTTOFP(FromMVT
, ToMVT
);
473 case TargetOpcode::G_UITOFP
:
474 return RTLIB::getUINTTOFP(FromMVT
, ToMVT
);
476 llvm_unreachable("Unsupported libcall function");
479 static LegalizerHelper::LegalizeResult
480 conversionLibcall(MachineInstr
&MI
, MachineIRBuilder
&MIRBuilder
, Type
*ToType
,
482 RTLIB::Libcall Libcall
= getConvRTLibDesc(MI
.getOpcode(), ToType
, FromType
);
483 return createLibcall(MIRBuilder
, Libcall
, {MI
.getOperand(0).getReg(), ToType
},
484 {{MI
.getOperand(1).getReg(), FromType
}});
487 LegalizerHelper::LegalizeResult
488 LegalizerHelper::libcall(MachineInstr
&MI
) {
489 LLT LLTy
= MRI
.getType(MI
.getOperand(0).getReg());
490 unsigned Size
= LLTy
.getSizeInBits();
491 auto &Ctx
= MIRBuilder
.getMF().getFunction().getContext();
493 MIRBuilder
.setInstr(MI
);
495 switch (MI
.getOpcode()) {
497 return UnableToLegalize
;
498 case TargetOpcode::G_SDIV
:
499 case TargetOpcode::G_UDIV
:
500 case TargetOpcode::G_SREM
:
501 case TargetOpcode::G_UREM
:
502 case TargetOpcode::G_CTLZ_ZERO_UNDEF
: {
503 Type
*HLTy
= IntegerType::get(Ctx
, Size
);
504 auto Status
= simpleLibcall(MI
, MIRBuilder
, Size
, HLTy
);
505 if (Status
!= Legalized
)
509 case TargetOpcode::G_FADD
:
510 case TargetOpcode::G_FSUB
:
511 case TargetOpcode::G_FMUL
:
512 case TargetOpcode::G_FDIV
:
513 case TargetOpcode::G_FMA
:
514 case TargetOpcode::G_FPOW
:
515 case TargetOpcode::G_FREM
:
516 case TargetOpcode::G_FCOS
:
517 case TargetOpcode::G_FSIN
:
518 case TargetOpcode::G_FLOG10
:
519 case TargetOpcode::G_FLOG
:
520 case TargetOpcode::G_FLOG2
:
521 case TargetOpcode::G_FEXP
:
522 case TargetOpcode::G_FEXP2
:
523 case TargetOpcode::G_FCEIL
:
524 case TargetOpcode::G_FFLOOR
: {
526 LLVM_DEBUG(dbgs() << "Size " << Size
<< " too large to legalize.\n");
527 return UnableToLegalize
;
529 Type
*HLTy
= Size
== 64 ? Type::getDoubleTy(Ctx
) : Type::getFloatTy(Ctx
);
530 auto Status
= simpleLibcall(MI
, MIRBuilder
, Size
, HLTy
);
531 if (Status
!= Legalized
)
535 case TargetOpcode::G_FPEXT
: {
536 // FIXME: Support other floating point types (half, fp128 etc)
537 unsigned FromSize
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
538 unsigned ToSize
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
539 if (ToSize
!= 64 || FromSize
!= 32)
540 return UnableToLegalize
;
541 LegalizeResult Status
= conversionLibcall(
542 MI
, MIRBuilder
, Type::getDoubleTy(Ctx
), Type::getFloatTy(Ctx
));
543 if (Status
!= Legalized
)
547 case TargetOpcode::G_FPTRUNC
: {
548 // FIXME: Support other floating point types (half, fp128 etc)
549 unsigned FromSize
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
550 unsigned ToSize
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
551 if (ToSize
!= 32 || FromSize
!= 64)
552 return UnableToLegalize
;
553 LegalizeResult Status
= conversionLibcall(
554 MI
, MIRBuilder
, Type::getFloatTy(Ctx
), Type::getDoubleTy(Ctx
));
555 if (Status
!= Legalized
)
559 case TargetOpcode::G_FPTOSI
:
560 case TargetOpcode::G_FPTOUI
: {
561 // FIXME: Support other types
562 unsigned FromSize
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
563 unsigned ToSize
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
564 if ((ToSize
!= 32 && ToSize
!= 64) || (FromSize
!= 32 && FromSize
!= 64))
565 return UnableToLegalize
;
566 LegalizeResult Status
= conversionLibcall(
568 ToSize
== 32 ? Type::getInt32Ty(Ctx
) : Type::getInt64Ty(Ctx
),
569 FromSize
== 64 ? Type::getDoubleTy(Ctx
) : Type::getFloatTy(Ctx
));
570 if (Status
!= Legalized
)
574 case TargetOpcode::G_SITOFP
:
575 case TargetOpcode::G_UITOFP
: {
576 // FIXME: Support other types
577 unsigned FromSize
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
578 unsigned ToSize
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
579 if ((FromSize
!= 32 && FromSize
!= 64) || (ToSize
!= 32 && ToSize
!= 64))
580 return UnableToLegalize
;
581 LegalizeResult Status
= conversionLibcall(
583 ToSize
== 64 ? Type::getDoubleTy(Ctx
) : Type::getFloatTy(Ctx
),
584 FromSize
== 32 ? Type::getInt32Ty(Ctx
) : Type::getInt64Ty(Ctx
));
585 if (Status
!= Legalized
)
591 MI
.eraseFromParent();
595 LegalizerHelper::LegalizeResult
LegalizerHelper::narrowScalar(MachineInstr
&MI
,
598 MIRBuilder
.setInstr(MI
);
600 uint64_t SizeOp0
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
601 uint64_t NarrowSize
= NarrowTy
.getSizeInBits();
603 switch (MI
.getOpcode()) {
605 return UnableToLegalize
;
606 case TargetOpcode::G_IMPLICIT_DEF
: {
607 // FIXME: add support for when SizeOp0 isn't an exact multiple of
609 if (SizeOp0
% NarrowSize
!= 0)
610 return UnableToLegalize
;
611 int NumParts
= SizeOp0
/ NarrowSize
;
613 SmallVector
<Register
, 2> DstRegs
;
614 for (int i
= 0; i
< NumParts
; ++i
)
616 MIRBuilder
.buildUndef(NarrowTy
)->getOperand(0).getReg());
618 Register DstReg
= MI
.getOperand(0).getReg();
619 if(MRI
.getType(DstReg
).isVector())
620 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
622 MIRBuilder
.buildMerge(DstReg
, DstRegs
);
623 MI
.eraseFromParent();
626 case TargetOpcode::G_CONSTANT
: {
627 LLT Ty
= MRI
.getType(MI
.getOperand(0).getReg());
628 const APInt
&Val
= MI
.getOperand(1).getCImm()->getValue();
629 unsigned TotalSize
= Ty
.getSizeInBits();
630 unsigned NarrowSize
= NarrowTy
.getSizeInBits();
631 int NumParts
= TotalSize
/ NarrowSize
;
633 SmallVector
<Register
, 4> PartRegs
;
634 for (int I
= 0; I
!= NumParts
; ++I
) {
635 unsigned Offset
= I
* NarrowSize
;
636 auto K
= MIRBuilder
.buildConstant(NarrowTy
,
637 Val
.lshr(Offset
).trunc(NarrowSize
));
638 PartRegs
.push_back(K
.getReg(0));
642 unsigned LeftoverBits
= TotalSize
- NumParts
* NarrowSize
;
643 SmallVector
<Register
, 1> LeftoverRegs
;
644 if (LeftoverBits
!= 0) {
645 LeftoverTy
= LLT::scalar(LeftoverBits
);
646 auto K
= MIRBuilder
.buildConstant(
648 Val
.lshr(NumParts
* NarrowSize
).trunc(LeftoverBits
));
649 LeftoverRegs
.push_back(K
.getReg(0));
652 insertParts(MI
.getOperand(0).getReg(),
653 Ty
, NarrowTy
, PartRegs
, LeftoverTy
, LeftoverRegs
);
655 MI
.eraseFromParent();
658 case TargetOpcode::G_SEXT
: {
660 return UnableToLegalize
;
662 Register SrcReg
= MI
.getOperand(1).getReg();
663 LLT SrcTy
= MRI
.getType(SrcReg
);
665 // FIXME: support the general case where the requested NarrowTy may not be
666 // the same as the source type. E.g. s128 = sext(s32)
667 if ((SrcTy
.getSizeInBits() != SizeOp0
/ 2) ||
668 SrcTy
.getSizeInBits() != NarrowTy
.getSizeInBits()) {
669 LLVM_DEBUG(dbgs() << "Can't narrow sext to type " << NarrowTy
<< "\n");
670 return UnableToLegalize
;
673 // Shift the sign bit of the low register through the high register.
675 MIRBuilder
.buildConstant(LLT::scalar(64), NarrowTy
.getSizeInBits() - 1);
676 auto Shift
= MIRBuilder
.buildAShr(NarrowTy
, SrcReg
, ShiftAmt
);
677 MIRBuilder
.buildMerge(MI
.getOperand(0).getReg(), {SrcReg
, Shift
.getReg(0)});
678 MI
.eraseFromParent();
681 case TargetOpcode::G_ZEXT
: {
683 return UnableToLegalize
;
685 LLT SrcTy
= MRI
.getType(MI
.getOperand(1).getReg());
686 uint64_t SizeOp1
= SrcTy
.getSizeInBits();
687 if (SizeOp0
% SizeOp1
!= 0)
688 return UnableToLegalize
;
690 // Generate a merge where the bottom bits are taken from the source, and
691 // zero everything else.
692 Register ZeroReg
= MIRBuilder
.buildConstant(SrcTy
, 0).getReg(0);
693 unsigned NumParts
= SizeOp0
/ SizeOp1
;
694 SmallVector
<Register
, 4> Srcs
= {MI
.getOperand(1).getReg()};
695 for (unsigned Part
= 1; Part
< NumParts
; ++Part
)
696 Srcs
.push_back(ZeroReg
);
697 MIRBuilder
.buildMerge(MI
.getOperand(0).getReg(), Srcs
);
698 MI
.eraseFromParent();
701 case TargetOpcode::G_TRUNC
: {
703 return UnableToLegalize
;
705 uint64_t SizeOp1
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
706 if (NarrowTy
.getSizeInBits() * 2 != SizeOp1
) {
707 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy
<< "\n");
708 return UnableToLegalize
;
711 auto Unmerge
= MIRBuilder
.buildUnmerge(NarrowTy
, MI
.getOperand(1).getReg());
712 MIRBuilder
.buildCopy(MI
.getOperand(0).getReg(), Unmerge
.getReg(0));
713 MI
.eraseFromParent();
717 case TargetOpcode::G_ADD
: {
718 // FIXME: add support for when SizeOp0 isn't an exact multiple of
720 if (SizeOp0
% NarrowSize
!= 0)
721 return UnableToLegalize
;
722 // Expand in terms of carry-setting/consuming G_ADDE instructions.
723 int NumParts
= SizeOp0
/ NarrowTy
.getSizeInBits();
725 SmallVector
<Register
, 2> Src1Regs
, Src2Regs
, DstRegs
;
726 extractParts(MI
.getOperand(1).getReg(), NarrowTy
, NumParts
, Src1Regs
);
727 extractParts(MI
.getOperand(2).getReg(), NarrowTy
, NumParts
, Src2Regs
);
730 for (int i
= 0; i
< NumParts
; ++i
) {
731 Register DstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
732 Register CarryOut
= MRI
.createGenericVirtualRegister(LLT::scalar(1));
735 MIRBuilder
.buildUAddo(DstReg
, CarryOut
, Src1Regs
[i
], Src2Regs
[i
]);
737 MIRBuilder
.buildUAdde(DstReg
, CarryOut
, Src1Regs
[i
],
738 Src2Regs
[i
], CarryIn
);
741 DstRegs
.push_back(DstReg
);
744 Register DstReg
= MI
.getOperand(0).getReg();
745 if(MRI
.getType(DstReg
).isVector())
746 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
748 MIRBuilder
.buildMerge(DstReg
, DstRegs
);
749 MI
.eraseFromParent();
752 case TargetOpcode::G_SUB
: {
753 // FIXME: add support for when SizeOp0 isn't an exact multiple of
755 if (SizeOp0
% NarrowSize
!= 0)
756 return UnableToLegalize
;
758 int NumParts
= SizeOp0
/ NarrowTy
.getSizeInBits();
760 SmallVector
<Register
, 2> Src1Regs
, Src2Regs
, DstRegs
;
761 extractParts(MI
.getOperand(1).getReg(), NarrowTy
, NumParts
, Src1Regs
);
762 extractParts(MI
.getOperand(2).getReg(), NarrowTy
, NumParts
, Src2Regs
);
764 Register DstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
765 Register BorrowOut
= MRI
.createGenericVirtualRegister(LLT::scalar(1));
766 MIRBuilder
.buildInstr(TargetOpcode::G_USUBO
, {DstReg
, BorrowOut
},
767 {Src1Regs
[0], Src2Regs
[0]});
768 DstRegs
.push_back(DstReg
);
769 Register BorrowIn
= BorrowOut
;
770 for (int i
= 1; i
< NumParts
; ++i
) {
771 DstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
772 BorrowOut
= MRI
.createGenericVirtualRegister(LLT::scalar(1));
774 MIRBuilder
.buildInstr(TargetOpcode::G_USUBE
, {DstReg
, BorrowOut
},
775 {Src1Regs
[i
], Src2Regs
[i
], BorrowIn
});
777 DstRegs
.push_back(DstReg
);
778 BorrowIn
= BorrowOut
;
780 MIRBuilder
.buildMerge(MI
.getOperand(0).getReg(), DstRegs
);
781 MI
.eraseFromParent();
784 case TargetOpcode::G_MUL
:
785 case TargetOpcode::G_UMULH
:
786 return narrowScalarMul(MI
, NarrowTy
);
787 case TargetOpcode::G_EXTRACT
:
788 return narrowScalarExtract(MI
, TypeIdx
, NarrowTy
);
789 case TargetOpcode::G_INSERT
:
790 return narrowScalarInsert(MI
, TypeIdx
, NarrowTy
);
791 case TargetOpcode::G_LOAD
: {
792 const auto &MMO
= **MI
.memoperands_begin();
793 Register DstReg
= MI
.getOperand(0).getReg();
794 LLT DstTy
= MRI
.getType(DstReg
);
795 if (DstTy
.isVector())
796 return UnableToLegalize
;
798 if (8 * MMO
.getSize() != DstTy
.getSizeInBits()) {
799 Register TmpReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
800 auto &MMO
= **MI
.memoperands_begin();
801 MIRBuilder
.buildLoad(TmpReg
, MI
.getOperand(1).getReg(), MMO
);
802 MIRBuilder
.buildAnyExt(DstReg
, TmpReg
);
803 MI
.eraseFromParent();
807 return reduceLoadStoreWidth(MI
, TypeIdx
, NarrowTy
);
809 case TargetOpcode::G_ZEXTLOAD
:
810 case TargetOpcode::G_SEXTLOAD
: {
811 bool ZExt
= MI
.getOpcode() == TargetOpcode::G_ZEXTLOAD
;
812 Register DstReg
= MI
.getOperand(0).getReg();
813 Register PtrReg
= MI
.getOperand(1).getReg();
815 Register TmpReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
816 auto &MMO
= **MI
.memoperands_begin();
817 if (MMO
.getSizeInBits() == NarrowSize
) {
818 MIRBuilder
.buildLoad(TmpReg
, PtrReg
, MMO
);
820 unsigned ExtLoad
= ZExt
? TargetOpcode::G_ZEXTLOAD
821 : TargetOpcode::G_SEXTLOAD
;
822 MIRBuilder
.buildInstr(ExtLoad
)
825 .addMemOperand(&MMO
);
829 MIRBuilder
.buildZExt(DstReg
, TmpReg
);
831 MIRBuilder
.buildSExt(DstReg
, TmpReg
);
833 MI
.eraseFromParent();
836 case TargetOpcode::G_STORE
: {
837 const auto &MMO
= **MI
.memoperands_begin();
839 Register SrcReg
= MI
.getOperand(0).getReg();
840 LLT SrcTy
= MRI
.getType(SrcReg
);
841 if (SrcTy
.isVector())
842 return UnableToLegalize
;
844 int NumParts
= SizeOp0
/ NarrowSize
;
845 unsigned HandledSize
= NumParts
* NarrowTy
.getSizeInBits();
846 unsigned LeftoverBits
= SrcTy
.getSizeInBits() - HandledSize
;
847 if (SrcTy
.isVector() && LeftoverBits
!= 0)
848 return UnableToLegalize
;
850 if (8 * MMO
.getSize() != SrcTy
.getSizeInBits()) {
851 Register TmpReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
852 auto &MMO
= **MI
.memoperands_begin();
853 MIRBuilder
.buildTrunc(TmpReg
, SrcReg
);
854 MIRBuilder
.buildStore(TmpReg
, MI
.getOperand(1).getReg(), MMO
);
855 MI
.eraseFromParent();
859 return reduceLoadStoreWidth(MI
, 0, NarrowTy
);
861 case TargetOpcode::G_SELECT
:
862 return narrowScalarSelect(MI
, TypeIdx
, NarrowTy
);
863 case TargetOpcode::G_AND
:
864 case TargetOpcode::G_OR
:
865 case TargetOpcode::G_XOR
: {
866 // Legalize bitwise operation:
867 // A = BinOp<Ty> B, C
869 // B1, ..., BN = G_UNMERGE_VALUES B
870 // C1, ..., CN = G_UNMERGE_VALUES C
871 // A1 = BinOp<Ty/N> B1, C2
873 // AN = BinOp<Ty/N> BN, CN
874 // A = G_MERGE_VALUES A1, ..., AN
875 return narrowScalarBasic(MI
, TypeIdx
, NarrowTy
);
877 case TargetOpcode::G_SHL
:
878 case TargetOpcode::G_LSHR
:
879 case TargetOpcode::G_ASHR
:
880 return narrowScalarShift(MI
, TypeIdx
, NarrowTy
);
881 case TargetOpcode::G_CTLZ
:
882 case TargetOpcode::G_CTLZ_ZERO_UNDEF
:
883 case TargetOpcode::G_CTTZ
:
884 case TargetOpcode::G_CTTZ_ZERO_UNDEF
:
885 case TargetOpcode::G_CTPOP
:
887 return UnableToLegalize
; // TODO
889 Observer
.changingInstr(MI
);
890 narrowScalarDst(MI
, NarrowTy
, 0, TargetOpcode::G_ZEXT
);
891 Observer
.changedInstr(MI
);
893 case TargetOpcode::G_INTTOPTR
:
895 return UnableToLegalize
;
897 Observer
.changingInstr(MI
);
898 narrowScalarSrc(MI
, NarrowTy
, 1);
899 Observer
.changedInstr(MI
);
901 case TargetOpcode::G_PTRTOINT
:
903 return UnableToLegalize
;
905 Observer
.changingInstr(MI
);
906 narrowScalarDst(MI
, NarrowTy
, 0, TargetOpcode::G_ZEXT
);
907 Observer
.changedInstr(MI
);
909 case TargetOpcode::G_PHI
: {
910 unsigned NumParts
= SizeOp0
/ NarrowSize
;
911 SmallVector
<Register
, 2> DstRegs
;
912 SmallVector
<SmallVector
<Register
, 2>, 2> SrcRegs
;
913 DstRegs
.resize(NumParts
);
914 SrcRegs
.resize(MI
.getNumOperands() / 2);
915 Observer
.changingInstr(MI
);
916 for (unsigned i
= 1; i
< MI
.getNumOperands(); i
+= 2) {
917 MachineBasicBlock
&OpMBB
= *MI
.getOperand(i
+ 1).getMBB();
918 MIRBuilder
.setInsertPt(OpMBB
, OpMBB
.getFirstTerminator());
919 extractParts(MI
.getOperand(i
).getReg(), NarrowTy
, NumParts
,
922 MachineBasicBlock
&MBB
= *MI
.getParent();
923 MIRBuilder
.setInsertPt(MBB
, MI
);
924 for (unsigned i
= 0; i
< NumParts
; ++i
) {
925 DstRegs
[i
] = MRI
.createGenericVirtualRegister(NarrowTy
);
926 MachineInstrBuilder MIB
=
927 MIRBuilder
.buildInstr(TargetOpcode::G_PHI
).addDef(DstRegs
[i
]);
928 for (unsigned j
= 1; j
< MI
.getNumOperands(); j
+= 2)
929 MIB
.addUse(SrcRegs
[j
/ 2][i
]).add(MI
.getOperand(j
+ 1));
931 MIRBuilder
.setInsertPt(MBB
, MBB
.getFirstNonPHI());
932 MIRBuilder
.buildMerge(MI
.getOperand(0).getReg(), DstRegs
);
933 Observer
.changedInstr(MI
);
934 MI
.eraseFromParent();
937 case TargetOpcode::G_EXTRACT_VECTOR_ELT
:
938 case TargetOpcode::G_INSERT_VECTOR_ELT
: {
940 return UnableToLegalize
;
942 int OpIdx
= MI
.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT
? 2 : 3;
943 Observer
.changingInstr(MI
);
944 narrowScalarSrc(MI
, NarrowTy
, OpIdx
);
945 Observer
.changedInstr(MI
);
948 case TargetOpcode::G_ICMP
: {
949 uint64_t SrcSize
= MRI
.getType(MI
.getOperand(2).getReg()).getSizeInBits();
950 if (NarrowSize
* 2 != SrcSize
)
951 return UnableToLegalize
;
953 Observer
.changingInstr(MI
);
954 Register LHSL
= MRI
.createGenericVirtualRegister(NarrowTy
);
955 Register LHSH
= MRI
.createGenericVirtualRegister(NarrowTy
);
956 MIRBuilder
.buildUnmerge({LHSL
, LHSH
}, MI
.getOperand(2).getReg());
958 Register RHSL
= MRI
.createGenericVirtualRegister(NarrowTy
);
959 Register RHSH
= MRI
.createGenericVirtualRegister(NarrowTy
);
960 MIRBuilder
.buildUnmerge({RHSL
, RHSH
}, MI
.getOperand(3).getReg());
962 CmpInst::Predicate Pred
=
963 static_cast<CmpInst::Predicate
>(MI
.getOperand(1).getPredicate());
964 LLT ResTy
= MRI
.getType(MI
.getOperand(0).getReg());
966 if (Pred
== CmpInst::ICMP_EQ
|| Pred
== CmpInst::ICMP_NE
) {
967 MachineInstrBuilder XorL
= MIRBuilder
.buildXor(NarrowTy
, LHSL
, RHSL
);
968 MachineInstrBuilder XorH
= MIRBuilder
.buildXor(NarrowTy
, LHSH
, RHSH
);
969 MachineInstrBuilder Or
= MIRBuilder
.buildOr(NarrowTy
, XorL
, XorH
);
970 MachineInstrBuilder Zero
= MIRBuilder
.buildConstant(NarrowTy
, 0);
971 MIRBuilder
.buildICmp(Pred
, MI
.getOperand(0).getReg(), Or
, Zero
);
973 MachineInstrBuilder CmpH
= MIRBuilder
.buildICmp(Pred
, ResTy
, LHSH
, RHSH
);
974 MachineInstrBuilder CmpHEQ
=
975 MIRBuilder
.buildICmp(CmpInst::Predicate::ICMP_EQ
, ResTy
, LHSH
, RHSH
);
976 MachineInstrBuilder CmpLU
= MIRBuilder
.buildICmp(
977 ICmpInst::getUnsignedPredicate(Pred
), ResTy
, LHSL
, RHSL
);
978 MIRBuilder
.buildSelect(MI
.getOperand(0).getReg(), CmpHEQ
, CmpLU
, CmpH
);
980 Observer
.changedInstr(MI
);
981 MI
.eraseFromParent();
984 case TargetOpcode::G_SEXT_INREG
: {
986 return UnableToLegalize
;
988 if (!MI
.getOperand(2).isImm())
989 return UnableToLegalize
;
990 int64_t SizeInBits
= MI
.getOperand(2).getImm();
992 // So long as the new type has more bits than the bits we're extending we
993 // don't need to break it apart.
994 if (NarrowTy
.getScalarSizeInBits() >= SizeInBits
) {
995 Observer
.changingInstr(MI
);
996 // We don't lose any non-extension bits by truncating the src and
997 // sign-extending the dst.
998 MachineOperand
&MO1
= MI
.getOperand(1);
999 auto TruncMIB
= MIRBuilder
.buildTrunc(NarrowTy
, MO1
.getReg());
1000 MO1
.setReg(TruncMIB
->getOperand(0).getReg());
1002 MachineOperand
&MO2
= MI
.getOperand(0);
1003 Register DstExt
= MRI
.createGenericVirtualRegister(NarrowTy
);
1004 MIRBuilder
.setInsertPt(MIRBuilder
.getMBB(), ++MIRBuilder
.getInsertPt());
1005 MIRBuilder
.buildInstr(TargetOpcode::G_SEXT
, {MO2
.getReg()}, {DstExt
});
1007 Observer
.changedInstr(MI
);
1011 // Break it apart. Components below the extension point are unmodified. The
1012 // component containing the extension point becomes a narrower SEXT_INREG.
1013 // Components above it are ashr'd from the component containing the
1015 if (SizeOp0
% NarrowSize
!= 0)
1016 return UnableToLegalize
;
1017 int NumParts
= SizeOp0
/ NarrowSize
;
1019 // List the registers where the destination will be scattered.
1020 SmallVector
<Register
, 2> DstRegs
;
1021 // List the registers where the source will be split.
1022 SmallVector
<Register
, 2> SrcRegs
;
1024 // Create all the temporary registers.
1025 for (int i
= 0; i
< NumParts
; ++i
) {
1026 Register SrcReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
1028 SrcRegs
.push_back(SrcReg
);
1031 // Explode the big arguments into smaller chunks.
1032 MIRBuilder
.buildUnmerge(SrcRegs
, MI
.getOperand(1).getReg());
1034 Register AshrCstReg
=
1035 MIRBuilder
.buildConstant(NarrowTy
, NarrowTy
.getScalarSizeInBits() - 1)
1038 Register FullExtensionReg
= 0;
1039 Register PartialExtensionReg
= 0;
1041 // Do the operation on each small part.
1042 for (int i
= 0; i
< NumParts
; ++i
) {
1043 if ((i
+ 1) * NarrowTy
.getScalarSizeInBits() < SizeInBits
)
1044 DstRegs
.push_back(SrcRegs
[i
]);
1045 else if (i
* NarrowTy
.getScalarSizeInBits() > SizeInBits
) {
1046 assert(PartialExtensionReg
&&
1047 "Expected to visit partial extension before full");
1048 if (FullExtensionReg
) {
1049 DstRegs
.push_back(FullExtensionReg
);
1052 DstRegs
.push_back(MIRBuilder
1053 .buildInstr(TargetOpcode::G_ASHR
, {NarrowTy
},
1054 {PartialExtensionReg
, AshrCstReg
})
1057 FullExtensionReg
= DstRegs
.back();
1062 TargetOpcode::G_SEXT_INREG
, {NarrowTy
},
1063 {SrcRegs
[i
], SizeInBits
% NarrowTy
.getScalarSizeInBits()})
1066 PartialExtensionReg
= DstRegs
.back();
1070 // Gather the destination registers into the final destination.
1071 Register DstReg
= MI
.getOperand(0).getReg();
1072 MIRBuilder
.buildMerge(DstReg
, DstRegs
);
1073 MI
.eraseFromParent();
1079 void LegalizerHelper::widenScalarSrc(MachineInstr
&MI
, LLT WideTy
,
1080 unsigned OpIdx
, unsigned ExtOpcode
) {
1081 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
1082 auto ExtB
= MIRBuilder
.buildInstr(ExtOpcode
, {WideTy
}, {MO
.getReg()});
1083 MO
.setReg(ExtB
->getOperand(0).getReg());
1086 void LegalizerHelper::narrowScalarSrc(MachineInstr
&MI
, LLT NarrowTy
,
1088 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
1089 auto ExtB
= MIRBuilder
.buildInstr(TargetOpcode::G_TRUNC
, {NarrowTy
},
1091 MO
.setReg(ExtB
->getOperand(0).getReg());
1094 void LegalizerHelper::widenScalarDst(MachineInstr
&MI
, LLT WideTy
,
1095 unsigned OpIdx
, unsigned TruncOpcode
) {
1096 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
1097 Register DstExt
= MRI
.createGenericVirtualRegister(WideTy
);
1098 MIRBuilder
.setInsertPt(MIRBuilder
.getMBB(), ++MIRBuilder
.getInsertPt());
1099 MIRBuilder
.buildInstr(TruncOpcode
, {MO
.getReg()}, {DstExt
});
1103 void LegalizerHelper::narrowScalarDst(MachineInstr
&MI
, LLT NarrowTy
,
1104 unsigned OpIdx
, unsigned ExtOpcode
) {
1105 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
1106 Register DstTrunc
= MRI
.createGenericVirtualRegister(NarrowTy
);
1107 MIRBuilder
.setInsertPt(MIRBuilder
.getMBB(), ++MIRBuilder
.getInsertPt());
1108 MIRBuilder
.buildInstr(ExtOpcode
, {MO
.getReg()}, {DstTrunc
});
1109 MO
.setReg(DstTrunc
);
1112 void LegalizerHelper::moreElementsVectorDst(MachineInstr
&MI
, LLT WideTy
,
1114 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
1115 Register DstExt
= MRI
.createGenericVirtualRegister(WideTy
);
1116 MIRBuilder
.setInsertPt(MIRBuilder
.getMBB(), ++MIRBuilder
.getInsertPt());
1117 MIRBuilder
.buildExtract(MO
.getReg(), DstExt
, 0);
1121 void LegalizerHelper::moreElementsVectorSrc(MachineInstr
&MI
, LLT MoreTy
,
1123 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
1125 LLT OldTy
= MRI
.getType(MO
.getReg());
1126 unsigned OldElts
= OldTy
.getNumElements();
1127 unsigned NewElts
= MoreTy
.getNumElements();
1129 unsigned NumParts
= NewElts
/ OldElts
;
1131 // Use concat_vectors if the result is a multiple of the number of elements.
1132 if (NumParts
* OldElts
== NewElts
) {
1133 SmallVector
<Register
, 8> Parts
;
1134 Parts
.push_back(MO
.getReg());
1136 Register ImpDef
= MIRBuilder
.buildUndef(OldTy
).getReg(0);
1137 for (unsigned I
= 1; I
!= NumParts
; ++I
)
1138 Parts
.push_back(ImpDef
);
1140 auto Concat
= MIRBuilder
.buildConcatVectors(MoreTy
, Parts
);
1141 MO
.setReg(Concat
.getReg(0));
1145 Register MoreReg
= MRI
.createGenericVirtualRegister(MoreTy
);
1146 Register ImpDef
= MIRBuilder
.buildUndef(MoreTy
).getReg(0);
1147 MIRBuilder
.buildInsert(MoreReg
, ImpDef
, MO
.getReg(), 0);
1151 LegalizerHelper::LegalizeResult
1152 LegalizerHelper::widenScalarMergeValues(MachineInstr
&MI
, unsigned TypeIdx
,
1155 return UnableToLegalize
;
1157 Register DstReg
= MI
.getOperand(0).getReg();
1158 LLT DstTy
= MRI
.getType(DstReg
);
1159 if (DstTy
.isVector())
1160 return UnableToLegalize
;
1162 Register Src1
= MI
.getOperand(1).getReg();
1163 LLT SrcTy
= MRI
.getType(Src1
);
1164 const int DstSize
= DstTy
.getSizeInBits();
1165 const int SrcSize
= SrcTy
.getSizeInBits();
1166 const int WideSize
= WideTy
.getSizeInBits();
1167 const int NumMerge
= (DstSize
+ WideSize
- 1) / WideSize
;
1169 unsigned NumOps
= MI
.getNumOperands();
1170 unsigned NumSrc
= MI
.getNumOperands() - 1;
1171 unsigned PartSize
= DstTy
.getSizeInBits() / NumSrc
;
1173 if (WideSize
>= DstSize
) {
1174 // Directly pack the bits in the target type.
1175 Register ResultReg
= MIRBuilder
.buildZExt(WideTy
, Src1
).getReg(0);
1177 for (unsigned I
= 2; I
!= NumOps
; ++I
) {
1178 const unsigned Offset
= (I
- 1) * PartSize
;
1180 Register SrcReg
= MI
.getOperand(I
).getReg();
1181 assert(MRI
.getType(SrcReg
) == LLT::scalar(PartSize
));
1183 auto ZextInput
= MIRBuilder
.buildZExt(WideTy
, SrcReg
);
1185 Register NextResult
= I
+ 1 == NumOps
&& WideTy
== DstTy
? DstReg
:
1186 MRI
.createGenericVirtualRegister(WideTy
);
1188 auto ShiftAmt
= MIRBuilder
.buildConstant(WideTy
, Offset
);
1189 auto Shl
= MIRBuilder
.buildShl(WideTy
, ZextInput
, ShiftAmt
);
1190 MIRBuilder
.buildOr(NextResult
, ResultReg
, Shl
);
1191 ResultReg
= NextResult
;
1194 if (WideSize
> DstSize
)
1195 MIRBuilder
.buildTrunc(DstReg
, ResultReg
);
1196 else if (DstTy
.isPointer())
1197 MIRBuilder
.buildIntToPtr(DstReg
, ResultReg
);
1199 MI
.eraseFromParent();
1203 // Unmerge the original values to the GCD type, and recombine to the next
1204 // multiple greater than the original type.
1206 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
1207 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
1208 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
1209 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
1210 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
1211 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1212 // %12:_(s12) = G_MERGE_VALUES %10, %11
1214 // Padding with undef if necessary:
1216 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1217 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1218 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1219 // %7:_(s2) = G_IMPLICIT_DEF
1220 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1221 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1222 // %10:_(s12) = G_MERGE_VALUES %8, %9
1224 const int GCD
= greatestCommonDivisor(SrcSize
, WideSize
);
1225 LLT GCDTy
= LLT::scalar(GCD
);
1227 SmallVector
<Register
, 8> Parts
;
1228 SmallVector
<Register
, 8> NewMergeRegs
;
1229 SmallVector
<Register
, 8> Unmerges
;
1230 LLT WideDstTy
= LLT::scalar(NumMerge
* WideSize
);
1232 // Decompose the original operands if they don't evenly divide.
1233 for (int I
= 1, E
= MI
.getNumOperands(); I
!= E
; ++I
) {
1234 Register SrcReg
= MI
.getOperand(I
).getReg();
1235 if (GCD
== SrcSize
) {
1236 Unmerges
.push_back(SrcReg
);
1238 auto Unmerge
= MIRBuilder
.buildUnmerge(GCDTy
, SrcReg
);
1239 for (int J
= 0, JE
= Unmerge
->getNumOperands() - 1; J
!= JE
; ++J
)
1240 Unmerges
.push_back(Unmerge
.getReg(J
));
1244 // Pad with undef to the next size that is a multiple of the requested size.
1245 if (static_cast<int>(Unmerges
.size()) != NumMerge
* WideSize
) {
1246 Register UndefReg
= MIRBuilder
.buildUndef(GCDTy
).getReg(0);
1247 for (int I
= Unmerges
.size(); I
!= NumMerge
* WideSize
; ++I
)
1248 Unmerges
.push_back(UndefReg
);
1251 const int PartsPerGCD
= WideSize
/ GCD
;
1253 // Build merges of each piece.
1254 ArrayRef
<Register
> Slicer(Unmerges
);
1255 for (int I
= 0; I
!= NumMerge
; ++I
, Slicer
= Slicer
.drop_front(PartsPerGCD
)) {
1256 auto Merge
= MIRBuilder
.buildMerge(WideTy
, Slicer
.take_front(PartsPerGCD
));
1257 NewMergeRegs
.push_back(Merge
.getReg(0));
1260 // A truncate may be necessary if the requested type doesn't evenly divide the
1261 // original result type.
1262 if (DstTy
.getSizeInBits() == WideDstTy
.getSizeInBits()) {
1263 MIRBuilder
.buildMerge(DstReg
, NewMergeRegs
);
1265 auto FinalMerge
= MIRBuilder
.buildMerge(WideDstTy
, NewMergeRegs
);
1266 MIRBuilder
.buildTrunc(DstReg
, FinalMerge
.getReg(0));
1269 MI
.eraseFromParent();
1273 LegalizerHelper::LegalizeResult
1274 LegalizerHelper::widenScalarUnmergeValues(MachineInstr
&MI
, unsigned TypeIdx
,
1277 return UnableToLegalize
;
1279 unsigned NumDst
= MI
.getNumOperands() - 1;
1280 Register SrcReg
= MI
.getOperand(NumDst
).getReg();
1281 LLT SrcTy
= MRI
.getType(SrcReg
);
1282 if (!SrcTy
.isScalar())
1283 return UnableToLegalize
;
1285 Register Dst0Reg
= MI
.getOperand(0).getReg();
1286 LLT DstTy
= MRI
.getType(Dst0Reg
);
1287 if (!DstTy
.isScalar())
1288 return UnableToLegalize
;
1290 unsigned NewSrcSize
= NumDst
* WideTy
.getSizeInBits();
1291 LLT NewSrcTy
= LLT::scalar(NewSrcSize
);
1292 unsigned SizeDiff
= WideTy
.getSizeInBits() - DstTy
.getSizeInBits();
1294 auto WideSrc
= MIRBuilder
.buildZExt(NewSrcTy
, SrcReg
);
1296 for (unsigned I
= 1; I
!= NumDst
; ++I
) {
1297 auto ShiftAmt
= MIRBuilder
.buildConstant(NewSrcTy
, SizeDiff
* I
);
1298 auto Shl
= MIRBuilder
.buildShl(NewSrcTy
, WideSrc
, ShiftAmt
);
1299 WideSrc
= MIRBuilder
.buildOr(NewSrcTy
, WideSrc
, Shl
);
1302 Observer
.changingInstr(MI
);
1304 MI
.getOperand(NumDst
).setReg(WideSrc
->getOperand(0).getReg());
1305 for (unsigned I
= 0; I
!= NumDst
; ++I
)
1306 widenScalarDst(MI
, WideTy
, I
);
1308 Observer
.changedInstr(MI
);
1313 LegalizerHelper::LegalizeResult
1314 LegalizerHelper::widenScalarExtract(MachineInstr
&MI
, unsigned TypeIdx
,
1316 Register DstReg
= MI
.getOperand(0).getReg();
1317 Register SrcReg
= MI
.getOperand(1).getReg();
1318 LLT SrcTy
= MRI
.getType(SrcReg
);
1320 LLT DstTy
= MRI
.getType(DstReg
);
1321 unsigned Offset
= MI
.getOperand(2).getImm();
1324 if (SrcTy
.isVector() || DstTy
.isVector())
1325 return UnableToLegalize
;
1328 if (SrcTy
.isPointer()) {
1329 // Extracts from pointers can be handled only if they are really just
1331 const DataLayout
&DL
= MIRBuilder
.getDataLayout();
1332 if (DL
.isNonIntegralAddressSpace(SrcTy
.getAddressSpace()))
1333 return UnableToLegalize
;
1335 LLT SrcAsIntTy
= LLT::scalar(SrcTy
.getSizeInBits());
1336 Src
= MIRBuilder
.buildPtrToInt(SrcAsIntTy
, Src
);
1340 if (DstTy
.isPointer())
1341 return UnableToLegalize
;
1344 // Avoid a shift in the degenerate case.
1345 MIRBuilder
.buildTrunc(DstReg
,
1346 MIRBuilder
.buildAnyExtOrTrunc(WideTy
, Src
));
1347 MI
.eraseFromParent();
1351 // Do a shift in the source type.
1352 LLT ShiftTy
= SrcTy
;
1353 if (WideTy
.getSizeInBits() > SrcTy
.getSizeInBits()) {
1354 Src
= MIRBuilder
.buildAnyExt(WideTy
, Src
);
1356 } else if (WideTy
.getSizeInBits() > SrcTy
.getSizeInBits())
1357 return UnableToLegalize
;
1359 auto LShr
= MIRBuilder
.buildLShr(
1360 ShiftTy
, Src
, MIRBuilder
.buildConstant(ShiftTy
, Offset
));
1361 MIRBuilder
.buildTrunc(DstReg
, LShr
);
1362 MI
.eraseFromParent();
1366 if (SrcTy
.isScalar()) {
1367 Observer
.changingInstr(MI
);
1368 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
1369 Observer
.changedInstr(MI
);
1373 if (!SrcTy
.isVector())
1374 return UnableToLegalize
;
1376 if (DstTy
!= SrcTy
.getElementType())
1377 return UnableToLegalize
;
1379 if (Offset
% SrcTy
.getScalarSizeInBits() != 0)
1380 return UnableToLegalize
;
1382 Observer
.changingInstr(MI
);
1383 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
1385 MI
.getOperand(2).setImm((WideTy
.getSizeInBits() / SrcTy
.getSizeInBits()) *
1387 widenScalarDst(MI
, WideTy
.getScalarType(), 0);
1388 Observer
.changedInstr(MI
);
1392 LegalizerHelper::LegalizeResult
1393 LegalizerHelper::widenScalarInsert(MachineInstr
&MI
, unsigned TypeIdx
,
1396 return UnableToLegalize
;
1397 Observer
.changingInstr(MI
);
1398 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
1399 widenScalarDst(MI
, WideTy
);
1400 Observer
.changedInstr(MI
);
1404 LegalizerHelper::LegalizeResult
1405 LegalizerHelper::widenScalar(MachineInstr
&MI
, unsigned TypeIdx
, LLT WideTy
) {
1406 MIRBuilder
.setInstr(MI
);
1408 switch (MI
.getOpcode()) {
1410 return UnableToLegalize
;
1411 case TargetOpcode::G_EXTRACT
:
1412 return widenScalarExtract(MI
, TypeIdx
, WideTy
);
1413 case TargetOpcode::G_INSERT
:
1414 return widenScalarInsert(MI
, TypeIdx
, WideTy
);
1415 case TargetOpcode::G_MERGE_VALUES
:
1416 return widenScalarMergeValues(MI
, TypeIdx
, WideTy
);
1417 case TargetOpcode::G_UNMERGE_VALUES
:
1418 return widenScalarUnmergeValues(MI
, TypeIdx
, WideTy
);
1419 case TargetOpcode::G_UADDO
:
1420 case TargetOpcode::G_USUBO
: {
1422 return UnableToLegalize
; // TODO
1423 auto LHSZext
= MIRBuilder
.buildInstr(TargetOpcode::G_ZEXT
, {WideTy
},
1424 {MI
.getOperand(2).getReg()});
1425 auto RHSZext
= MIRBuilder
.buildInstr(TargetOpcode::G_ZEXT
, {WideTy
},
1426 {MI
.getOperand(3).getReg()});
1427 unsigned Opcode
= MI
.getOpcode() == TargetOpcode::G_UADDO
1428 ? TargetOpcode::G_ADD
1429 : TargetOpcode::G_SUB
;
1430 // Do the arithmetic in the larger type.
1431 auto NewOp
= MIRBuilder
.buildInstr(Opcode
, {WideTy
}, {LHSZext
, RHSZext
});
1432 LLT OrigTy
= MRI
.getType(MI
.getOperand(0).getReg());
1433 APInt Mask
= APInt::getAllOnesValue(OrigTy
.getSizeInBits());
1434 auto AndOp
= MIRBuilder
.buildInstr(
1435 TargetOpcode::G_AND
, {WideTy
},
1436 {NewOp
, MIRBuilder
.buildConstant(WideTy
, Mask
.getZExtValue())});
1437 // There is no overflow if the AndOp is the same as NewOp.
1438 MIRBuilder
.buildICmp(CmpInst::ICMP_NE
, MI
.getOperand(1).getReg(), NewOp
,
1440 // Now trunc the NewOp to the original result.
1441 MIRBuilder
.buildTrunc(MI
.getOperand(0).getReg(), NewOp
);
1442 MI
.eraseFromParent();
1445 case TargetOpcode::G_CTTZ
:
1446 case TargetOpcode::G_CTTZ_ZERO_UNDEF
:
1447 case TargetOpcode::G_CTLZ
:
1448 case TargetOpcode::G_CTLZ_ZERO_UNDEF
:
1449 case TargetOpcode::G_CTPOP
: {
1451 Observer
.changingInstr(MI
);
1452 widenScalarDst(MI
, WideTy
, 0);
1453 Observer
.changedInstr(MI
);
1457 Register SrcReg
= MI
.getOperand(1).getReg();
1459 // First ZEXT the input.
1460 auto MIBSrc
= MIRBuilder
.buildZExt(WideTy
, SrcReg
);
1461 LLT CurTy
= MRI
.getType(SrcReg
);
1462 if (MI
.getOpcode() == TargetOpcode::G_CTTZ
) {
1463 // The count is the same in the larger type except if the original
1464 // value was zero. This can be handled by setting the bit just off
1465 // the top of the original type.
1467 APInt::getOneBitSet(WideTy
.getSizeInBits(), CurTy
.getSizeInBits());
1468 MIBSrc
= MIRBuilder
.buildOr(
1469 WideTy
, MIBSrc
, MIRBuilder
.buildConstant(WideTy
, TopBit
));
1472 // Perform the operation at the larger size.
1473 auto MIBNewOp
= MIRBuilder
.buildInstr(MI
.getOpcode(), {WideTy
}, {MIBSrc
});
1474 // This is already the correct result for CTPOP and CTTZs
1475 if (MI
.getOpcode() == TargetOpcode::G_CTLZ
||
1476 MI
.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF
) {
1477 // The correct result is NewOp - (Difference in widety and current ty).
1478 unsigned SizeDiff
= WideTy
.getSizeInBits() - CurTy
.getSizeInBits();
1479 MIBNewOp
= MIRBuilder
.buildInstr(
1480 TargetOpcode::G_SUB
, {WideTy
},
1481 {MIBNewOp
, MIRBuilder
.buildConstant(WideTy
, SizeDiff
)});
1484 MIRBuilder
.buildZExtOrTrunc(MI
.getOperand(0), MIBNewOp
);
1485 MI
.eraseFromParent();
1488 case TargetOpcode::G_BSWAP
: {
1489 Observer
.changingInstr(MI
);
1490 Register DstReg
= MI
.getOperand(0).getReg();
1492 Register ShrReg
= MRI
.createGenericVirtualRegister(WideTy
);
1493 Register DstExt
= MRI
.createGenericVirtualRegister(WideTy
);
1494 Register ShiftAmtReg
= MRI
.createGenericVirtualRegister(WideTy
);
1495 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
1497 MI
.getOperand(0).setReg(DstExt
);
1499 MIRBuilder
.setInsertPt(MIRBuilder
.getMBB(), ++MIRBuilder
.getInsertPt());
1501 LLT Ty
= MRI
.getType(DstReg
);
1502 unsigned DiffBits
= WideTy
.getScalarSizeInBits() - Ty
.getScalarSizeInBits();
1503 MIRBuilder
.buildConstant(ShiftAmtReg
, DiffBits
);
1504 MIRBuilder
.buildInstr(TargetOpcode::G_LSHR
)
1507 .addUse(ShiftAmtReg
);
1509 MIRBuilder
.buildTrunc(DstReg
, ShrReg
);
1510 Observer
.changedInstr(MI
);
1513 case TargetOpcode::G_BITREVERSE
: {
1514 Observer
.changingInstr(MI
);
1516 Register DstReg
= MI
.getOperand(0).getReg();
1517 LLT Ty
= MRI
.getType(DstReg
);
1518 unsigned DiffBits
= WideTy
.getScalarSizeInBits() - Ty
.getScalarSizeInBits();
1520 Register DstExt
= MRI
.createGenericVirtualRegister(WideTy
);
1521 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
1522 MI
.getOperand(0).setReg(DstExt
);
1523 MIRBuilder
.setInsertPt(MIRBuilder
.getMBB(), ++MIRBuilder
.getInsertPt());
1525 auto ShiftAmt
= MIRBuilder
.buildConstant(WideTy
, DiffBits
);
1526 auto Shift
= MIRBuilder
.buildLShr(WideTy
, DstExt
, ShiftAmt
);
1527 MIRBuilder
.buildTrunc(DstReg
, Shift
);
1528 Observer
.changedInstr(MI
);
1531 case TargetOpcode::G_ADD
:
1532 case TargetOpcode::G_AND
:
1533 case TargetOpcode::G_MUL
:
1534 case TargetOpcode::G_OR
:
1535 case TargetOpcode::G_XOR
:
1536 case TargetOpcode::G_SUB
:
1537 // Perform operation at larger width (any extension is fines here, high bits
1538 // don't affect the result) and then truncate the result back to the
1540 Observer
.changingInstr(MI
);
1541 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
1542 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ANYEXT
);
1543 widenScalarDst(MI
, WideTy
);
1544 Observer
.changedInstr(MI
);
1547 case TargetOpcode::G_SHL
:
1548 Observer
.changingInstr(MI
);
1551 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
1552 widenScalarDst(MI
, WideTy
);
1554 assert(TypeIdx
== 1);
1555 // The "number of bits to shift" operand must preserve its value as an
1556 // unsigned integer:
1557 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ZEXT
);
1560 Observer
.changedInstr(MI
);
1563 case TargetOpcode::G_SDIV
:
1564 case TargetOpcode::G_SREM
:
1565 case TargetOpcode::G_SMIN
:
1566 case TargetOpcode::G_SMAX
:
1567 Observer
.changingInstr(MI
);
1568 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_SEXT
);
1569 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_SEXT
);
1570 widenScalarDst(MI
, WideTy
);
1571 Observer
.changedInstr(MI
);
1574 case TargetOpcode::G_ASHR
:
1575 case TargetOpcode::G_LSHR
:
1576 Observer
.changingInstr(MI
);
1579 unsigned CvtOp
= MI
.getOpcode() == TargetOpcode::G_ASHR
?
1580 TargetOpcode::G_SEXT
: TargetOpcode::G_ZEXT
;
1582 widenScalarSrc(MI
, WideTy
, 1, CvtOp
);
1583 widenScalarDst(MI
, WideTy
);
1585 assert(TypeIdx
== 1);
1586 // The "number of bits to shift" operand must preserve its value as an
1587 // unsigned integer:
1588 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ZEXT
);
1591 Observer
.changedInstr(MI
);
1593 case TargetOpcode::G_UDIV
:
1594 case TargetOpcode::G_UREM
:
1595 case TargetOpcode::G_UMIN
:
1596 case TargetOpcode::G_UMAX
:
1597 Observer
.changingInstr(MI
);
1598 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ZEXT
);
1599 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ZEXT
);
1600 widenScalarDst(MI
, WideTy
);
1601 Observer
.changedInstr(MI
);
1604 case TargetOpcode::G_SELECT
:
1605 Observer
.changingInstr(MI
);
1607 // Perform operation at larger width (any extension is fine here, high
1608 // bits don't affect the result) and then truncate the result back to the
1610 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ANYEXT
);
1611 widenScalarSrc(MI
, WideTy
, 3, TargetOpcode::G_ANYEXT
);
1612 widenScalarDst(MI
, WideTy
);
1614 bool IsVec
= MRI
.getType(MI
.getOperand(1).getReg()).isVector();
1615 // Explicit extension is required here since high bits affect the result.
1616 widenScalarSrc(MI
, WideTy
, 1, MIRBuilder
.getBoolExtOp(IsVec
, false));
1618 Observer
.changedInstr(MI
);
1621 case TargetOpcode::G_FPTOSI
:
1622 case TargetOpcode::G_FPTOUI
:
1624 return UnableToLegalize
;
1625 Observer
.changingInstr(MI
);
1626 widenScalarDst(MI
, WideTy
);
1627 Observer
.changedInstr(MI
);
1630 case TargetOpcode::G_SITOFP
:
1632 return UnableToLegalize
;
1633 Observer
.changingInstr(MI
);
1634 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_SEXT
);
1635 Observer
.changedInstr(MI
);
1638 case TargetOpcode::G_UITOFP
:
1640 return UnableToLegalize
;
1641 Observer
.changingInstr(MI
);
1642 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ZEXT
);
1643 Observer
.changedInstr(MI
);
1646 case TargetOpcode::G_LOAD
:
1647 case TargetOpcode::G_SEXTLOAD
:
1648 case TargetOpcode::G_ZEXTLOAD
:
1649 Observer
.changingInstr(MI
);
1650 widenScalarDst(MI
, WideTy
);
1651 Observer
.changedInstr(MI
);
1654 case TargetOpcode::G_STORE
: {
1656 return UnableToLegalize
;
1658 LLT Ty
= MRI
.getType(MI
.getOperand(0).getReg());
1659 if (!isPowerOf2_32(Ty
.getSizeInBits()))
1660 return UnableToLegalize
;
1662 Observer
.changingInstr(MI
);
1664 unsigned ExtType
= Ty
.getScalarSizeInBits() == 1 ?
1665 TargetOpcode::G_ZEXT
: TargetOpcode::G_ANYEXT
;
1666 widenScalarSrc(MI
, WideTy
, 0, ExtType
);
1668 Observer
.changedInstr(MI
);
1671 case TargetOpcode::G_CONSTANT
: {
1672 MachineOperand
&SrcMO
= MI
.getOperand(1);
1673 LLVMContext
&Ctx
= MIRBuilder
.getMF().getFunction().getContext();
1674 const APInt
&Val
= SrcMO
.getCImm()->getValue().sext(WideTy
.getSizeInBits());
1675 Observer
.changingInstr(MI
);
1676 SrcMO
.setCImm(ConstantInt::get(Ctx
, Val
));
1678 widenScalarDst(MI
, WideTy
);
1679 Observer
.changedInstr(MI
);
1682 case TargetOpcode::G_FCONSTANT
: {
1683 MachineOperand
&SrcMO
= MI
.getOperand(1);
1684 LLVMContext
&Ctx
= MIRBuilder
.getMF().getFunction().getContext();
1685 APFloat Val
= SrcMO
.getFPImm()->getValueAPF();
1687 switch (WideTy
.getSizeInBits()) {
1689 Val
.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven
,
1693 Val
.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven
,
1697 return UnableToLegalize
;
1700 assert(!LosesInfo
&& "extend should always be lossless");
1702 Observer
.changingInstr(MI
);
1703 SrcMO
.setFPImm(ConstantFP::get(Ctx
, Val
));
1705 widenScalarDst(MI
, WideTy
, 0, TargetOpcode::G_FPTRUNC
);
1706 Observer
.changedInstr(MI
);
1709 case TargetOpcode::G_IMPLICIT_DEF
: {
1710 Observer
.changingInstr(MI
);
1711 widenScalarDst(MI
, WideTy
);
1712 Observer
.changedInstr(MI
);
1715 case TargetOpcode::G_BRCOND
:
1716 Observer
.changingInstr(MI
);
1717 widenScalarSrc(MI
, WideTy
, 0, MIRBuilder
.getBoolExtOp(false, false));
1718 Observer
.changedInstr(MI
);
1721 case TargetOpcode::G_FCMP
:
1722 Observer
.changingInstr(MI
);
1724 widenScalarDst(MI
, WideTy
);
1726 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_FPEXT
);
1727 widenScalarSrc(MI
, WideTy
, 3, TargetOpcode::G_FPEXT
);
1729 Observer
.changedInstr(MI
);
1732 case TargetOpcode::G_ICMP
:
1733 Observer
.changingInstr(MI
);
1735 widenScalarDst(MI
, WideTy
);
1737 unsigned ExtOpcode
= CmpInst::isSigned(static_cast<CmpInst::Predicate
>(
1738 MI
.getOperand(1).getPredicate()))
1739 ? TargetOpcode::G_SEXT
1740 : TargetOpcode::G_ZEXT
;
1741 widenScalarSrc(MI
, WideTy
, 2, ExtOpcode
);
1742 widenScalarSrc(MI
, WideTy
, 3, ExtOpcode
);
1744 Observer
.changedInstr(MI
);
1747 case TargetOpcode::G_GEP
:
1748 assert(TypeIdx
== 1 && "unable to legalize pointer of GEP");
1749 Observer
.changingInstr(MI
);
1750 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_SEXT
);
1751 Observer
.changedInstr(MI
);
1754 case TargetOpcode::G_PHI
: {
1755 assert(TypeIdx
== 0 && "Expecting only Idx 0");
1757 Observer
.changingInstr(MI
);
1758 for (unsigned I
= 1; I
< MI
.getNumOperands(); I
+= 2) {
1759 MachineBasicBlock
&OpMBB
= *MI
.getOperand(I
+ 1).getMBB();
1760 MIRBuilder
.setInsertPt(OpMBB
, OpMBB
.getFirstTerminator());
1761 widenScalarSrc(MI
, WideTy
, I
, TargetOpcode::G_ANYEXT
);
1764 MachineBasicBlock
&MBB
= *MI
.getParent();
1765 MIRBuilder
.setInsertPt(MBB
, --MBB
.getFirstNonPHI());
1766 widenScalarDst(MI
, WideTy
);
1767 Observer
.changedInstr(MI
);
1770 case TargetOpcode::G_EXTRACT_VECTOR_ELT
: {
1772 Register VecReg
= MI
.getOperand(1).getReg();
1773 LLT VecTy
= MRI
.getType(VecReg
);
1774 Observer
.changingInstr(MI
);
1776 widenScalarSrc(MI
, LLT::vector(VecTy
.getNumElements(),
1777 WideTy
.getSizeInBits()),
1778 1, TargetOpcode::G_SEXT
);
1780 widenScalarDst(MI
, WideTy
, 0);
1781 Observer
.changedInstr(MI
);
1786 return UnableToLegalize
;
1787 Observer
.changingInstr(MI
);
1788 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_SEXT
);
1789 Observer
.changedInstr(MI
);
1792 case TargetOpcode::G_FADD
:
1793 case TargetOpcode::G_FMUL
:
1794 case TargetOpcode::G_FSUB
:
1795 case TargetOpcode::G_FMA
:
1796 case TargetOpcode::G_FMAD
:
1797 case TargetOpcode::G_FNEG
:
1798 case TargetOpcode::G_FABS
:
1799 case TargetOpcode::G_FCANONICALIZE
:
1800 case TargetOpcode::G_FMINNUM
:
1801 case TargetOpcode::G_FMAXNUM
:
1802 case TargetOpcode::G_FMINNUM_IEEE
:
1803 case TargetOpcode::G_FMAXNUM_IEEE
:
1804 case TargetOpcode::G_FMINIMUM
:
1805 case TargetOpcode::G_FMAXIMUM
:
1806 case TargetOpcode::G_FDIV
:
1807 case TargetOpcode::G_FREM
:
1808 case TargetOpcode::G_FCEIL
:
1809 case TargetOpcode::G_FFLOOR
:
1810 case TargetOpcode::G_FCOS
:
1811 case TargetOpcode::G_FSIN
:
1812 case TargetOpcode::G_FLOG10
:
1813 case TargetOpcode::G_FLOG
:
1814 case TargetOpcode::G_FLOG2
:
1815 case TargetOpcode::G_FRINT
:
1816 case TargetOpcode::G_FNEARBYINT
:
1817 case TargetOpcode::G_FSQRT
:
1818 case TargetOpcode::G_FEXP
:
1819 case TargetOpcode::G_FEXP2
:
1820 case TargetOpcode::G_FPOW
:
1821 case TargetOpcode::G_INTRINSIC_TRUNC
:
1822 case TargetOpcode::G_INTRINSIC_ROUND
:
1823 assert(TypeIdx
== 0);
1824 Observer
.changingInstr(MI
);
1826 for (unsigned I
= 1, E
= MI
.getNumOperands(); I
!= E
; ++I
)
1827 widenScalarSrc(MI
, WideTy
, I
, TargetOpcode::G_FPEXT
);
1829 widenScalarDst(MI
, WideTy
, 0, TargetOpcode::G_FPTRUNC
);
1830 Observer
.changedInstr(MI
);
1832 case TargetOpcode::G_INTTOPTR
:
1834 return UnableToLegalize
;
1836 Observer
.changingInstr(MI
);
1837 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ZEXT
);
1838 Observer
.changedInstr(MI
);
1840 case TargetOpcode::G_PTRTOINT
:
1842 return UnableToLegalize
;
1844 Observer
.changingInstr(MI
);
1845 widenScalarDst(MI
, WideTy
, 0);
1846 Observer
.changedInstr(MI
);
1848 case TargetOpcode::G_BUILD_VECTOR
: {
1849 Observer
.changingInstr(MI
);
1851 const LLT WideEltTy
= TypeIdx
== 1 ? WideTy
: WideTy
.getElementType();
1852 for (int I
= 1, E
= MI
.getNumOperands(); I
!= E
; ++I
)
1853 widenScalarSrc(MI
, WideEltTy
, I
, TargetOpcode::G_ANYEXT
);
1855 // Avoid changing the result vector type if the source element type was
1858 auto &TII
= *MI
.getMF()->getSubtarget().getInstrInfo();
1859 MI
.setDesc(TII
.get(TargetOpcode::G_BUILD_VECTOR_TRUNC
));
1861 widenScalarDst(MI
, WideTy
, 0);
1864 Observer
.changedInstr(MI
);
1867 case TargetOpcode::G_SEXT_INREG
:
1869 return UnableToLegalize
;
1871 Observer
.changingInstr(MI
);
1872 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
1873 widenScalarDst(MI
, WideTy
, 0, TargetOpcode::G_TRUNC
);
1874 Observer
.changedInstr(MI
);
1879 LegalizerHelper::LegalizeResult
1880 LegalizerHelper::lower(MachineInstr
&MI
, unsigned TypeIdx
, LLT Ty
) {
1881 using namespace TargetOpcode
;
1882 MIRBuilder
.setInstr(MI
);
1884 switch(MI
.getOpcode()) {
1886 return UnableToLegalize
;
1887 case TargetOpcode::G_SREM
:
1888 case TargetOpcode::G_UREM
: {
1889 Register QuotReg
= MRI
.createGenericVirtualRegister(Ty
);
1890 MIRBuilder
.buildInstr(MI
.getOpcode() == G_SREM
? G_SDIV
: G_UDIV
)
1892 .addUse(MI
.getOperand(1).getReg())
1893 .addUse(MI
.getOperand(2).getReg());
1895 Register ProdReg
= MRI
.createGenericVirtualRegister(Ty
);
1896 MIRBuilder
.buildMul(ProdReg
, QuotReg
, MI
.getOperand(2).getReg());
1897 MIRBuilder
.buildSub(MI
.getOperand(0).getReg(), MI
.getOperand(1).getReg(),
1899 MI
.eraseFromParent();
1902 case TargetOpcode::G_SMULO
:
1903 case TargetOpcode::G_UMULO
: {
1904 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
1906 Register Res
= MI
.getOperand(0).getReg();
1907 Register Overflow
= MI
.getOperand(1).getReg();
1908 Register LHS
= MI
.getOperand(2).getReg();
1909 Register RHS
= MI
.getOperand(3).getReg();
1911 MIRBuilder
.buildMul(Res
, LHS
, RHS
);
1913 unsigned Opcode
= MI
.getOpcode() == TargetOpcode::G_SMULO
1914 ? TargetOpcode::G_SMULH
1915 : TargetOpcode::G_UMULH
;
1917 Register HiPart
= MRI
.createGenericVirtualRegister(Ty
);
1918 MIRBuilder
.buildInstr(Opcode
)
1923 Register Zero
= MRI
.createGenericVirtualRegister(Ty
);
1924 MIRBuilder
.buildConstant(Zero
, 0);
1926 // For *signed* multiply, overflow is detected by checking:
1927 // (hi != (lo >> bitwidth-1))
1928 if (Opcode
== TargetOpcode::G_SMULH
) {
1929 Register Shifted
= MRI
.createGenericVirtualRegister(Ty
);
1930 Register ShiftAmt
= MRI
.createGenericVirtualRegister(Ty
);
1931 MIRBuilder
.buildConstant(ShiftAmt
, Ty
.getSizeInBits() - 1);
1932 MIRBuilder
.buildInstr(TargetOpcode::G_ASHR
)
1936 MIRBuilder
.buildICmp(CmpInst::ICMP_NE
, Overflow
, HiPart
, Shifted
);
1938 MIRBuilder
.buildICmp(CmpInst::ICMP_NE
, Overflow
, HiPart
, Zero
);
1940 MI
.eraseFromParent();
1943 case TargetOpcode::G_FNEG
: {
1944 // TODO: Handle vector types once we are able to
1947 return UnableToLegalize
;
1948 Register Res
= MI
.getOperand(0).getReg();
1950 LLVMContext
&Ctx
= MIRBuilder
.getMF().getFunction().getContext();
1951 switch (Ty
.getSizeInBits()) {
1953 ZeroTy
= Type::getHalfTy(Ctx
);
1956 ZeroTy
= Type::getFloatTy(Ctx
);
1959 ZeroTy
= Type::getDoubleTy(Ctx
);
1962 ZeroTy
= Type::getFP128Ty(Ctx
);
1965 llvm_unreachable("unexpected floating-point type");
1967 ConstantFP
&ZeroForNegation
=
1968 *cast
<ConstantFP
>(ConstantFP::getZeroValueForNegation(ZeroTy
));
1969 auto Zero
= MIRBuilder
.buildFConstant(Ty
, ZeroForNegation
);
1970 Register SubByReg
= MI
.getOperand(1).getReg();
1971 Register ZeroReg
= Zero
->getOperand(0).getReg();
1972 MIRBuilder
.buildInstr(TargetOpcode::G_FSUB
, {Res
}, {ZeroReg
, SubByReg
},
1974 MI
.eraseFromParent();
1977 case TargetOpcode::G_FSUB
: {
1978 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
1979 // First, check if G_FNEG is marked as Lower. If so, we may
1980 // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
1981 if (LI
.getAction({G_FNEG
, {Ty
}}).Action
== Lower
)
1982 return UnableToLegalize
;
1983 Register Res
= MI
.getOperand(0).getReg();
1984 Register LHS
= MI
.getOperand(1).getReg();
1985 Register RHS
= MI
.getOperand(2).getReg();
1986 Register Neg
= MRI
.createGenericVirtualRegister(Ty
);
1987 MIRBuilder
.buildInstr(TargetOpcode::G_FNEG
).addDef(Neg
).addUse(RHS
);
1988 MIRBuilder
.buildInstr(TargetOpcode::G_FADD
, {Res
}, {LHS
, Neg
}, MI
.getFlags());
1989 MI
.eraseFromParent();
1992 case TargetOpcode::G_FMAD
:
1993 return lowerFMad(MI
);
1994 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS
: {
1995 Register OldValRes
= MI
.getOperand(0).getReg();
1996 Register SuccessRes
= MI
.getOperand(1).getReg();
1997 Register Addr
= MI
.getOperand(2).getReg();
1998 Register CmpVal
= MI
.getOperand(3).getReg();
1999 Register NewVal
= MI
.getOperand(4).getReg();
2000 MIRBuilder
.buildAtomicCmpXchg(OldValRes
, Addr
, CmpVal
, NewVal
,
2001 **MI
.memoperands_begin());
2002 MIRBuilder
.buildICmp(CmpInst::ICMP_EQ
, SuccessRes
, OldValRes
, CmpVal
);
2003 MI
.eraseFromParent();
2006 case TargetOpcode::G_LOAD
:
2007 case TargetOpcode::G_SEXTLOAD
:
2008 case TargetOpcode::G_ZEXTLOAD
: {
2009 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
2010 Register DstReg
= MI
.getOperand(0).getReg();
2011 Register PtrReg
= MI
.getOperand(1).getReg();
2012 LLT DstTy
= MRI
.getType(DstReg
);
2013 auto &MMO
= **MI
.memoperands_begin();
2015 if (DstTy
.getSizeInBits() == MMO
.getSizeInBits()) {
2016 if (MI
.getOpcode() == TargetOpcode::G_LOAD
) {
2017 // This load needs splitting into power of 2 sized loads.
2018 if (DstTy
.isVector())
2019 return UnableToLegalize
;
2020 if (isPowerOf2_32(DstTy
.getSizeInBits()))
2021 return UnableToLegalize
; // Don't know what we're being asked to do.
2023 // Our strategy here is to generate anyextending loads for the smaller
2024 // types up to next power-2 result type, and then combine the two larger
2025 // result values together, before truncating back down to the non-pow-2
2027 // E.g. v1 = i24 load =>
2028 // v2 = i32 load (2 byte)
2029 // v3 = i32 load (1 byte)
2030 // v4 = i32 shl v3, 16
2031 // v5 = i32 or v4, v2
2032 // v1 = i24 trunc v5
2033 // By doing this we generate the correct truncate which should get
2034 // combined away as an artifact with a matching extend.
2035 uint64_t LargeSplitSize
= PowerOf2Floor(DstTy
.getSizeInBits());
2036 uint64_t SmallSplitSize
= DstTy
.getSizeInBits() - LargeSplitSize
;
2038 MachineFunction
&MF
= MIRBuilder
.getMF();
2039 MachineMemOperand
*LargeMMO
=
2040 MF
.getMachineMemOperand(&MMO
, 0, LargeSplitSize
/ 8);
2041 MachineMemOperand
*SmallMMO
= MF
.getMachineMemOperand(
2042 &MMO
, LargeSplitSize
/ 8, SmallSplitSize
/ 8);
2044 LLT PtrTy
= MRI
.getType(PtrReg
);
2045 unsigned AnyExtSize
= NextPowerOf2(DstTy
.getSizeInBits());
2046 LLT AnyExtTy
= LLT::scalar(AnyExtSize
);
2047 Register LargeLdReg
= MRI
.createGenericVirtualRegister(AnyExtTy
);
2048 Register SmallLdReg
= MRI
.createGenericVirtualRegister(AnyExtTy
);
2050 MIRBuilder
.buildLoad(LargeLdReg
, PtrReg
, *LargeMMO
);
2053 MIRBuilder
.buildConstant(LLT::scalar(64), LargeSplitSize
/ 8);
2054 Register GEPReg
= MRI
.createGenericVirtualRegister(PtrTy
);
2055 auto SmallPtr
= MIRBuilder
.buildGEP(GEPReg
, PtrReg
, OffsetCst
.getReg(0));
2056 auto SmallLoad
= MIRBuilder
.buildLoad(SmallLdReg
, SmallPtr
.getReg(0),
2059 auto ShiftAmt
= MIRBuilder
.buildConstant(AnyExtTy
, LargeSplitSize
);
2060 auto Shift
= MIRBuilder
.buildShl(AnyExtTy
, SmallLoad
, ShiftAmt
);
2061 auto Or
= MIRBuilder
.buildOr(AnyExtTy
, Shift
, LargeLoad
);
2062 MIRBuilder
.buildTrunc(DstReg
, {Or
.getReg(0)});
2063 MI
.eraseFromParent();
2066 MIRBuilder
.buildLoad(DstReg
, PtrReg
, MMO
);
2067 MI
.eraseFromParent();
2071 if (DstTy
.isScalar()) {
2073 MRI
.createGenericVirtualRegister(LLT::scalar(MMO
.getSizeInBits()));
2074 MIRBuilder
.buildLoad(TmpReg
, PtrReg
, MMO
);
2075 switch (MI
.getOpcode()) {
2077 llvm_unreachable("Unexpected opcode");
2078 case TargetOpcode::G_LOAD
:
2079 MIRBuilder
.buildAnyExt(DstReg
, TmpReg
);
2081 case TargetOpcode::G_SEXTLOAD
:
2082 MIRBuilder
.buildSExt(DstReg
, TmpReg
);
2084 case TargetOpcode::G_ZEXTLOAD
:
2085 MIRBuilder
.buildZExt(DstReg
, TmpReg
);
2088 MI
.eraseFromParent();
2092 return UnableToLegalize
;
2094 case TargetOpcode::G_STORE
: {
2095 // Lower a non-power of 2 store into multiple pow-2 stores.
2096 // E.g. split an i24 store into an i16 store + i8 store.
2097 // We do this by first extending the stored value to the next largest power
2098 // of 2 type, and then using truncating stores to store the components.
2099 // By doing this, likewise with G_LOAD, generate an extend that can be
2100 // artifact-combined away instead of leaving behind extracts.
2101 Register SrcReg
= MI
.getOperand(0).getReg();
2102 Register PtrReg
= MI
.getOperand(1).getReg();
2103 LLT SrcTy
= MRI
.getType(SrcReg
);
2104 MachineMemOperand
&MMO
= **MI
.memoperands_begin();
2105 if (SrcTy
.getSizeInBits() != MMO
.getSizeInBits())
2106 return UnableToLegalize
;
2107 if (SrcTy
.isVector())
2108 return UnableToLegalize
;
2109 if (isPowerOf2_32(SrcTy
.getSizeInBits()))
2110 return UnableToLegalize
; // Don't know what we're being asked to do.
2112 // Extend to the next pow-2.
2113 const LLT ExtendTy
= LLT::scalar(NextPowerOf2(SrcTy
.getSizeInBits()));
2114 auto ExtVal
= MIRBuilder
.buildAnyExt(ExtendTy
, SrcReg
);
2116 // Obtain the smaller value by shifting away the larger value.
2117 uint64_t LargeSplitSize
= PowerOf2Floor(SrcTy
.getSizeInBits());
2118 uint64_t SmallSplitSize
= SrcTy
.getSizeInBits() - LargeSplitSize
;
2119 auto ShiftAmt
= MIRBuilder
.buildConstant(ExtendTy
, LargeSplitSize
);
2120 auto SmallVal
= MIRBuilder
.buildLShr(ExtendTy
, ExtVal
, ShiftAmt
);
2122 // Generate the GEP and truncating stores.
2123 LLT PtrTy
= MRI
.getType(PtrReg
);
2125 MIRBuilder
.buildConstant(LLT::scalar(64), LargeSplitSize
/ 8);
2126 Register GEPReg
= MRI
.createGenericVirtualRegister(PtrTy
);
2127 auto SmallPtr
= MIRBuilder
.buildGEP(GEPReg
, PtrReg
, OffsetCst
.getReg(0));
2129 MachineFunction
&MF
= MIRBuilder
.getMF();
2130 MachineMemOperand
*LargeMMO
=
2131 MF
.getMachineMemOperand(&MMO
, 0, LargeSplitSize
/ 8);
2132 MachineMemOperand
*SmallMMO
=
2133 MF
.getMachineMemOperand(&MMO
, LargeSplitSize
/ 8, SmallSplitSize
/ 8);
2134 MIRBuilder
.buildStore(ExtVal
.getReg(0), PtrReg
, *LargeMMO
);
2135 MIRBuilder
.buildStore(SmallVal
.getReg(0), SmallPtr
.getReg(0), *SmallMMO
);
2136 MI
.eraseFromParent();
2139 case TargetOpcode::G_CTLZ_ZERO_UNDEF
:
2140 case TargetOpcode::G_CTTZ_ZERO_UNDEF
:
2141 case TargetOpcode::G_CTLZ
:
2142 case TargetOpcode::G_CTTZ
:
2143 case TargetOpcode::G_CTPOP
:
2144 return lowerBitCount(MI
, TypeIdx
, Ty
);
2146 Register Res
= MI
.getOperand(0).getReg();
2147 Register CarryOut
= MI
.getOperand(1).getReg();
2148 Register LHS
= MI
.getOperand(2).getReg();
2149 Register RHS
= MI
.getOperand(3).getReg();
2151 MIRBuilder
.buildAdd(Res
, LHS
, RHS
);
2152 MIRBuilder
.buildICmp(CmpInst::ICMP_ULT
, CarryOut
, Res
, RHS
);
2154 MI
.eraseFromParent();
2158 Register Res
= MI
.getOperand(0).getReg();
2159 Register CarryOut
= MI
.getOperand(1).getReg();
2160 Register LHS
= MI
.getOperand(2).getReg();
2161 Register RHS
= MI
.getOperand(3).getReg();
2162 Register CarryIn
= MI
.getOperand(4).getReg();
2164 Register TmpRes
= MRI
.createGenericVirtualRegister(Ty
);
2165 Register ZExtCarryIn
= MRI
.createGenericVirtualRegister(Ty
);
2167 MIRBuilder
.buildAdd(TmpRes
, LHS
, RHS
);
2168 MIRBuilder
.buildZExt(ZExtCarryIn
, CarryIn
);
2169 MIRBuilder
.buildAdd(Res
, TmpRes
, ZExtCarryIn
);
2170 MIRBuilder
.buildICmp(CmpInst::ICMP_ULT
, CarryOut
, Res
, LHS
);
2172 MI
.eraseFromParent();
2176 Register Res
= MI
.getOperand(0).getReg();
2177 Register BorrowOut
= MI
.getOperand(1).getReg();
2178 Register LHS
= MI
.getOperand(2).getReg();
2179 Register RHS
= MI
.getOperand(3).getReg();
2181 MIRBuilder
.buildSub(Res
, LHS
, RHS
);
2182 MIRBuilder
.buildICmp(CmpInst::ICMP_ULT
, BorrowOut
, LHS
, RHS
);
2184 MI
.eraseFromParent();
2188 Register Res
= MI
.getOperand(0).getReg();
2189 Register BorrowOut
= MI
.getOperand(1).getReg();
2190 Register LHS
= MI
.getOperand(2).getReg();
2191 Register RHS
= MI
.getOperand(3).getReg();
2192 Register BorrowIn
= MI
.getOperand(4).getReg();
2194 Register TmpRes
= MRI
.createGenericVirtualRegister(Ty
);
2195 Register ZExtBorrowIn
= MRI
.createGenericVirtualRegister(Ty
);
2196 Register LHS_EQ_RHS
= MRI
.createGenericVirtualRegister(LLT::scalar(1));
2197 Register LHS_ULT_RHS
= MRI
.createGenericVirtualRegister(LLT::scalar(1));
2199 MIRBuilder
.buildSub(TmpRes
, LHS
, RHS
);
2200 MIRBuilder
.buildZExt(ZExtBorrowIn
, BorrowIn
);
2201 MIRBuilder
.buildSub(Res
, TmpRes
, ZExtBorrowIn
);
2202 MIRBuilder
.buildICmp(CmpInst::ICMP_EQ
, LHS_EQ_RHS
, LHS
, RHS
);
2203 MIRBuilder
.buildICmp(CmpInst::ICMP_ULT
, LHS_ULT_RHS
, LHS
, RHS
);
2204 MIRBuilder
.buildSelect(BorrowOut
, LHS_EQ_RHS
, BorrowIn
, LHS_ULT_RHS
);
2206 MI
.eraseFromParent();
2210 return lowerUITOFP(MI
, TypeIdx
, Ty
);
2212 return lowerSITOFP(MI
, TypeIdx
, Ty
);
2214 return lowerFPTOUI(MI
, TypeIdx
, Ty
);
2219 return lowerMinMax(MI
, TypeIdx
, Ty
);
2221 return lowerFCopySign(MI
, TypeIdx
, Ty
);
2224 return lowerFMinNumMaxNum(MI
);
2225 case G_UNMERGE_VALUES
:
2226 return lowerUnmergeValues(MI
);
2227 case TargetOpcode::G_SEXT_INREG
: {
2228 assert(MI
.getOperand(2).isImm() && "Expected immediate");
2229 int64_t SizeInBits
= MI
.getOperand(2).getImm();
2231 Register DstReg
= MI
.getOperand(0).getReg();
2232 Register SrcReg
= MI
.getOperand(1).getReg();
2233 LLT DstTy
= MRI
.getType(DstReg
);
2234 Register TmpRes
= MRI
.createGenericVirtualRegister(DstTy
);
2236 auto MIBSz
= MIRBuilder
.buildConstant(DstTy
, DstTy
.getScalarSizeInBits() - SizeInBits
);
2237 MIRBuilder
.buildInstr(TargetOpcode::G_SHL
, {TmpRes
}, {SrcReg
, MIBSz
->getOperand(0).getReg()});
2238 MIRBuilder
.buildInstr(TargetOpcode::G_ASHR
, {DstReg
}, {TmpRes
, MIBSz
->getOperand(0).getReg()});
2239 MI
.eraseFromParent();
2242 case G_SHUFFLE_VECTOR
:
2243 return lowerShuffleVector(MI
);
2244 case G_DYN_STACKALLOC
:
2245 return lowerDynStackAlloc(MI
);
2249 LegalizerHelper::LegalizeResult
LegalizerHelper::fewerElementsVectorImplicitDef(
2250 MachineInstr
&MI
, unsigned TypeIdx
, LLT NarrowTy
) {
2251 SmallVector
<Register
, 2> DstRegs
;
2253 unsigned NarrowSize
= NarrowTy
.getSizeInBits();
2254 Register DstReg
= MI
.getOperand(0).getReg();
2255 unsigned Size
= MRI
.getType(DstReg
).getSizeInBits();
2256 int NumParts
= Size
/ NarrowSize
;
2257 // FIXME: Don't know how to handle the situation where the small vectors
2258 // aren't all the same size yet.
2259 if (Size
% NarrowSize
!= 0)
2260 return UnableToLegalize
;
2262 for (int i
= 0; i
< NumParts
; ++i
) {
2263 Register TmpReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
2264 MIRBuilder
.buildUndef(TmpReg
);
2265 DstRegs
.push_back(TmpReg
);
2268 if (NarrowTy
.isVector())
2269 MIRBuilder
.buildConcatVectors(DstReg
, DstRegs
);
2271 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
2273 MI
.eraseFromParent();
2277 LegalizerHelper::LegalizeResult
2278 LegalizerHelper::fewerElementsVectorBasic(MachineInstr
&MI
, unsigned TypeIdx
,
2280 const unsigned Opc
= MI
.getOpcode();
2281 const unsigned NumOps
= MI
.getNumOperands() - 1;
2282 const unsigned NarrowSize
= NarrowTy
.getSizeInBits();
2283 const Register DstReg
= MI
.getOperand(0).getReg();
2284 const unsigned Flags
= MI
.getFlags();
2285 const LLT DstTy
= MRI
.getType(DstReg
);
2286 const unsigned Size
= DstTy
.getSizeInBits();
2287 const int NumParts
= Size
/ NarrowSize
;
2288 const LLT EltTy
= DstTy
.getElementType();
2289 const unsigned EltSize
= EltTy
.getSizeInBits();
2290 const unsigned BitsForNumParts
= NarrowSize
* NumParts
;
2292 // Check if we have any leftovers. If we do, then only handle the case where
2293 // the leftover is one element.
2294 if (BitsForNumParts
!= Size
&& BitsForNumParts
+ EltSize
!= Size
)
2295 return UnableToLegalize
;
2297 if (BitsForNumParts
!= Size
) {
2298 Register AccumDstReg
= MRI
.createGenericVirtualRegister(DstTy
);
2299 MIRBuilder
.buildUndef(AccumDstReg
);
2301 // Handle the pieces which evenly divide into the requested type with
2302 // extract/op/insert sequence.
2303 for (unsigned Offset
= 0; Offset
< BitsForNumParts
; Offset
+= NarrowSize
) {
2304 SmallVector
<SrcOp
, 4> SrcOps
;
2305 for (unsigned I
= 1, E
= MI
.getNumOperands(); I
!= E
; ++I
) {
2306 Register PartOpReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
2307 MIRBuilder
.buildExtract(PartOpReg
, MI
.getOperand(I
).getReg(), Offset
);
2308 SrcOps
.push_back(PartOpReg
);
2311 Register PartDstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
2312 MIRBuilder
.buildInstr(Opc
, {PartDstReg
}, SrcOps
, Flags
);
2314 Register PartInsertReg
= MRI
.createGenericVirtualRegister(DstTy
);
2315 MIRBuilder
.buildInsert(PartInsertReg
, AccumDstReg
, PartDstReg
, Offset
);
2316 AccumDstReg
= PartInsertReg
;
2319 // Handle the remaining element sized leftover piece.
2320 SmallVector
<SrcOp
, 4> SrcOps
;
2321 for (unsigned I
= 1, E
= MI
.getNumOperands(); I
!= E
; ++I
) {
2322 Register PartOpReg
= MRI
.createGenericVirtualRegister(EltTy
);
2323 MIRBuilder
.buildExtract(PartOpReg
, MI
.getOperand(I
).getReg(),
2325 SrcOps
.push_back(PartOpReg
);
2328 Register PartDstReg
= MRI
.createGenericVirtualRegister(EltTy
);
2329 MIRBuilder
.buildInstr(Opc
, {PartDstReg
}, SrcOps
, Flags
);
2330 MIRBuilder
.buildInsert(DstReg
, AccumDstReg
, PartDstReg
, BitsForNumParts
);
2331 MI
.eraseFromParent();
2336 SmallVector
<Register
, 2> DstRegs
, Src0Regs
, Src1Regs
, Src2Regs
;
2338 extractParts(MI
.getOperand(1).getReg(), NarrowTy
, NumParts
, Src0Regs
);
2341 extractParts(MI
.getOperand(2).getReg(), NarrowTy
, NumParts
, Src1Regs
);
2344 extractParts(MI
.getOperand(3).getReg(), NarrowTy
, NumParts
, Src2Regs
);
2346 for (int i
= 0; i
< NumParts
; ++i
) {
2347 Register DstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
2350 MIRBuilder
.buildInstr(Opc
, {DstReg
}, {Src0Regs
[i
]}, Flags
);
2351 else if (NumOps
== 2) {
2352 MIRBuilder
.buildInstr(Opc
, {DstReg
}, {Src0Regs
[i
], Src1Regs
[i
]}, Flags
);
2353 } else if (NumOps
== 3) {
2354 MIRBuilder
.buildInstr(Opc
, {DstReg
},
2355 {Src0Regs
[i
], Src1Regs
[i
], Src2Regs
[i
]}, Flags
);
2358 DstRegs
.push_back(DstReg
);
2361 if (NarrowTy
.isVector())
2362 MIRBuilder
.buildConcatVectors(DstReg
, DstRegs
);
2364 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
2366 MI
.eraseFromParent();
2370 // Handle splitting vector operations which need to have the same number of
2371 // elements in each type index, but each type index may have a different element
2374 // e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
2375 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2376 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2378 // Also handles some irregular breakdown cases, e.g.
2379 // e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
2380 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2381 // s64 = G_SHL s64, s32
2382 LegalizerHelper::LegalizeResult
2383 LegalizerHelper::fewerElementsVectorMultiEltType(
2384 MachineInstr
&MI
, unsigned TypeIdx
, LLT NarrowTyArg
) {
2386 return UnableToLegalize
;
2388 const LLT NarrowTy0
= NarrowTyArg
;
2389 const unsigned NewNumElts
=
2390 NarrowTy0
.isVector() ? NarrowTy0
.getNumElements() : 1;
2392 const Register DstReg
= MI
.getOperand(0).getReg();
2393 LLT DstTy
= MRI
.getType(DstReg
);
2396 // All of the operands need to have the same number of elements, so if we can
2397 // determine a type breakdown for the result type, we can for all of the
2399 int NumParts
= getNarrowTypeBreakDown(DstTy
, NarrowTy0
, LeftoverTy0
).first
;
2401 return UnableToLegalize
;
2403 SmallVector
<MachineInstrBuilder
, 4> NewInsts
;
2405 SmallVector
<Register
, 4> DstRegs
, LeftoverDstRegs
;
2406 SmallVector
<Register
, 4> PartRegs
, LeftoverRegs
;
2408 for (unsigned I
= 1, E
= MI
.getNumOperands(); I
!= E
; ++I
) {
2410 Register SrcReg
= MI
.getOperand(I
).getReg();
2411 LLT SrcTyI
= MRI
.getType(SrcReg
);
2412 LLT NarrowTyI
= LLT::scalarOrVector(NewNumElts
, SrcTyI
.getScalarType());
2415 // Split this operand into the requested typed registers, and any leftover
2416 // required to reproduce the original type.
2417 if (!extractParts(SrcReg
, SrcTyI
, NarrowTyI
, LeftoverTyI
, PartRegs
,
2419 return UnableToLegalize
;
2422 // For the first operand, create an instruction for each part and setup
2424 for (Register PartReg
: PartRegs
) {
2425 Register PartDstReg
= MRI
.createGenericVirtualRegister(NarrowTy0
);
2426 NewInsts
.push_back(MIRBuilder
.buildInstrNoInsert(MI
.getOpcode())
2429 DstRegs
.push_back(PartDstReg
);
2432 for (Register LeftoverReg
: LeftoverRegs
) {
2433 Register PartDstReg
= MRI
.createGenericVirtualRegister(LeftoverTy0
);
2434 NewInsts
.push_back(MIRBuilder
.buildInstrNoInsert(MI
.getOpcode())
2436 .addUse(LeftoverReg
));
2437 LeftoverDstRegs
.push_back(PartDstReg
);
2440 assert(NewInsts
.size() == PartRegs
.size() + LeftoverRegs
.size());
2442 // Add the newly created operand splits to the existing instructions. The
2443 // odd-sized pieces are ordered after the requested NarrowTyArg sized
2445 unsigned InstCount
= 0;
2446 for (unsigned J
= 0, JE
= PartRegs
.size(); J
!= JE
; ++J
)
2447 NewInsts
[InstCount
++].addUse(PartRegs
[J
]);
2448 for (unsigned J
= 0, JE
= LeftoverRegs
.size(); J
!= JE
; ++J
)
2449 NewInsts
[InstCount
++].addUse(LeftoverRegs
[J
]);
2453 LeftoverRegs
.clear();
2456 // Insert the newly built operations and rebuild the result register.
2457 for (auto &MIB
: NewInsts
)
2458 MIRBuilder
.insertInstr(MIB
);
2460 insertParts(DstReg
, DstTy
, NarrowTy0
, DstRegs
, LeftoverTy0
, LeftoverDstRegs
);
2462 MI
.eraseFromParent();
2466 LegalizerHelper::LegalizeResult
2467 LegalizerHelper::fewerElementsVectorCasts(MachineInstr
&MI
, unsigned TypeIdx
,
2470 return UnableToLegalize
;
2472 Register DstReg
= MI
.getOperand(0).getReg();
2473 Register SrcReg
= MI
.getOperand(1).getReg();
2474 LLT DstTy
= MRI
.getType(DstReg
);
2475 LLT SrcTy
= MRI
.getType(SrcReg
);
2477 LLT NarrowTy0
= NarrowTy
;
2481 if (NarrowTy
.isVector()) {
2482 // Uneven breakdown not handled.
2483 NumParts
= DstTy
.getNumElements() / NarrowTy
.getNumElements();
2484 if (NumParts
* NarrowTy
.getNumElements() != DstTy
.getNumElements())
2485 return UnableToLegalize
;
2487 NarrowTy1
= LLT::vector(NumParts
, SrcTy
.getElementType().getSizeInBits());
2489 NumParts
= DstTy
.getNumElements();
2490 NarrowTy1
= SrcTy
.getElementType();
2493 SmallVector
<Register
, 4> SrcRegs
, DstRegs
;
2494 extractParts(SrcReg
, NarrowTy1
, NumParts
, SrcRegs
);
2496 for (unsigned I
= 0; I
< NumParts
; ++I
) {
2497 Register DstReg
= MRI
.createGenericVirtualRegister(NarrowTy0
);
2498 MachineInstr
*NewInst
= MIRBuilder
.buildInstr(MI
.getOpcode())
2500 .addUse(SrcRegs
[I
]);
2502 NewInst
->setFlags(MI
.getFlags());
2503 DstRegs
.push_back(DstReg
);
2506 if (NarrowTy
.isVector())
2507 MIRBuilder
.buildConcatVectors(DstReg
, DstRegs
);
2509 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
2511 MI
.eraseFromParent();
2515 LegalizerHelper::LegalizeResult
2516 LegalizerHelper::fewerElementsVectorCmp(MachineInstr
&MI
, unsigned TypeIdx
,
2518 Register DstReg
= MI
.getOperand(0).getReg();
2519 Register Src0Reg
= MI
.getOperand(2).getReg();
2520 LLT DstTy
= MRI
.getType(DstReg
);
2521 LLT SrcTy
= MRI
.getType(Src0Reg
);
2524 LLT NarrowTy0
, NarrowTy1
;
2527 unsigned NewElts
= NarrowTy
.isVector() ? NarrowTy
.getNumElements() : 1;
2528 unsigned OldElts
= DstTy
.getNumElements();
2530 NarrowTy0
= NarrowTy
;
2531 NumParts
= NarrowTy
.isVector() ? (OldElts
/ NewElts
) : DstTy
.getNumElements();
2532 NarrowTy1
= NarrowTy
.isVector() ?
2533 LLT::vector(NarrowTy
.getNumElements(), SrcTy
.getScalarSizeInBits()) :
2534 SrcTy
.getElementType();
2537 unsigned NewElts
= NarrowTy
.isVector() ? NarrowTy
.getNumElements() : 1;
2538 unsigned OldElts
= SrcTy
.getNumElements();
2540 NumParts
= NarrowTy
.isVector() ? (OldElts
/ NewElts
) :
2541 NarrowTy
.getNumElements();
2542 NarrowTy0
= LLT::vector(NarrowTy
.getNumElements(),
2543 DstTy
.getScalarSizeInBits());
2544 NarrowTy1
= NarrowTy
;
2547 // FIXME: Don't know how to handle the situation where the small vectors
2548 // aren't all the same size yet.
2549 if (NarrowTy1
.isVector() &&
2550 NarrowTy1
.getNumElements() * NumParts
!= DstTy
.getNumElements())
2551 return UnableToLegalize
;
2553 CmpInst::Predicate Pred
2554 = static_cast<CmpInst::Predicate
>(MI
.getOperand(1).getPredicate());
2556 SmallVector
<Register
, 2> Src1Regs
, Src2Regs
, DstRegs
;
2557 extractParts(MI
.getOperand(2).getReg(), NarrowTy1
, NumParts
, Src1Regs
);
2558 extractParts(MI
.getOperand(3).getReg(), NarrowTy1
, NumParts
, Src2Regs
);
2560 for (unsigned I
= 0; I
< NumParts
; ++I
) {
2561 Register DstReg
= MRI
.createGenericVirtualRegister(NarrowTy0
);
2562 DstRegs
.push_back(DstReg
);
2564 if (MI
.getOpcode() == TargetOpcode::G_ICMP
)
2565 MIRBuilder
.buildICmp(Pred
, DstReg
, Src1Regs
[I
], Src2Regs
[I
]);
2567 MachineInstr
*NewCmp
2568 = MIRBuilder
.buildFCmp(Pred
, DstReg
, Src1Regs
[I
], Src2Regs
[I
]);
2569 NewCmp
->setFlags(MI
.getFlags());
2573 if (NarrowTy1
.isVector())
2574 MIRBuilder
.buildConcatVectors(DstReg
, DstRegs
);
2576 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
2578 MI
.eraseFromParent();
2582 LegalizerHelper::LegalizeResult
2583 LegalizerHelper::fewerElementsVectorSelect(MachineInstr
&MI
, unsigned TypeIdx
,
2585 Register DstReg
= MI
.getOperand(0).getReg();
2586 Register CondReg
= MI
.getOperand(1).getReg();
2588 unsigned NumParts
= 0;
2589 LLT NarrowTy0
, NarrowTy1
;
2591 LLT DstTy
= MRI
.getType(DstReg
);
2592 LLT CondTy
= MRI
.getType(CondReg
);
2593 unsigned Size
= DstTy
.getSizeInBits();
2595 assert(TypeIdx
== 0 || CondTy
.isVector());
2598 NarrowTy0
= NarrowTy
;
2601 unsigned NarrowSize
= NarrowTy0
.getSizeInBits();
2602 // FIXME: Don't know how to handle the situation where the small vectors
2603 // aren't all the same size yet.
2604 if (Size
% NarrowSize
!= 0)
2605 return UnableToLegalize
;
2607 NumParts
= Size
/ NarrowSize
;
2609 // Need to break down the condition type
2610 if (CondTy
.isVector()) {
2611 if (CondTy
.getNumElements() == NumParts
)
2612 NarrowTy1
= CondTy
.getElementType();
2614 NarrowTy1
= LLT::vector(CondTy
.getNumElements() / NumParts
,
2615 CondTy
.getScalarSizeInBits());
2618 NumParts
= CondTy
.getNumElements();
2619 if (NarrowTy
.isVector()) {
2620 // TODO: Handle uneven breakdown.
2621 if (NumParts
* NarrowTy
.getNumElements() != CondTy
.getNumElements())
2622 return UnableToLegalize
;
2624 return UnableToLegalize
;
2626 NarrowTy0
= DstTy
.getElementType();
2627 NarrowTy1
= NarrowTy
;
2631 SmallVector
<Register
, 2> DstRegs
, Src0Regs
, Src1Regs
, Src2Regs
;
2632 if (CondTy
.isVector())
2633 extractParts(MI
.getOperand(1).getReg(), NarrowTy1
, NumParts
, Src0Regs
);
2635 extractParts(MI
.getOperand(2).getReg(), NarrowTy0
, NumParts
, Src1Regs
);
2636 extractParts(MI
.getOperand(3).getReg(), NarrowTy0
, NumParts
, Src2Regs
);
2638 for (unsigned i
= 0; i
< NumParts
; ++i
) {
2639 Register DstReg
= MRI
.createGenericVirtualRegister(NarrowTy0
);
2640 MIRBuilder
.buildSelect(DstReg
, CondTy
.isVector() ? Src0Regs
[i
] : CondReg
,
2641 Src1Regs
[i
], Src2Regs
[i
]);
2642 DstRegs
.push_back(DstReg
);
2645 if (NarrowTy0
.isVector())
2646 MIRBuilder
.buildConcatVectors(DstReg
, DstRegs
);
2648 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
2650 MI
.eraseFromParent();
2654 LegalizerHelper::LegalizeResult
2655 LegalizerHelper::fewerElementsVectorPhi(MachineInstr
&MI
, unsigned TypeIdx
,
2657 const Register DstReg
= MI
.getOperand(0).getReg();
2658 LLT PhiTy
= MRI
.getType(DstReg
);
2661 // All of the operands need to have the same number of elements, so if we can
2662 // determine a type breakdown for the result type, we can for all of the
2664 int NumParts
, NumLeftover
;
2665 std::tie(NumParts
, NumLeftover
)
2666 = getNarrowTypeBreakDown(PhiTy
, NarrowTy
, LeftoverTy
);
2668 return UnableToLegalize
;
2670 SmallVector
<Register
, 4> DstRegs
, LeftoverDstRegs
;
2671 SmallVector
<MachineInstrBuilder
, 4> NewInsts
;
2673 const int TotalNumParts
= NumParts
+ NumLeftover
;
2675 // Insert the new phis in the result block first.
2676 for (int I
= 0; I
!= TotalNumParts
; ++I
) {
2677 LLT Ty
= I
< NumParts
? NarrowTy
: LeftoverTy
;
2678 Register PartDstReg
= MRI
.createGenericVirtualRegister(Ty
);
2679 NewInsts
.push_back(MIRBuilder
.buildInstr(TargetOpcode::G_PHI
)
2680 .addDef(PartDstReg
));
2682 DstRegs
.push_back(PartDstReg
);
2684 LeftoverDstRegs
.push_back(PartDstReg
);
2687 MachineBasicBlock
*MBB
= MI
.getParent();
2688 MIRBuilder
.setInsertPt(*MBB
, MBB
->getFirstNonPHI());
2689 insertParts(DstReg
, PhiTy
, NarrowTy
, DstRegs
, LeftoverTy
, LeftoverDstRegs
);
2691 SmallVector
<Register
, 4> PartRegs
, LeftoverRegs
;
2693 // Insert code to extract the incoming values in each predecessor block.
2694 for (unsigned I
= 1, E
= MI
.getNumOperands(); I
!= E
; I
+= 2) {
2696 LeftoverRegs
.clear();
2698 Register SrcReg
= MI
.getOperand(I
).getReg();
2699 MachineBasicBlock
&OpMBB
= *MI
.getOperand(I
+ 1).getMBB();
2700 MIRBuilder
.setInsertPt(OpMBB
, OpMBB
.getFirstTerminator());
2703 if (!extractParts(SrcReg
, PhiTy
, NarrowTy
, Unused
, PartRegs
,
2705 return UnableToLegalize
;
2707 // Add the newly created operand splits to the existing instructions. The
2708 // odd-sized pieces are ordered after the requested NarrowTyArg sized
2710 for (int J
= 0; J
!= TotalNumParts
; ++J
) {
2711 MachineInstrBuilder MIB
= NewInsts
[J
];
2712 MIB
.addUse(J
< NumParts
? PartRegs
[J
] : LeftoverRegs
[J
- NumParts
]);
2717 MI
.eraseFromParent();
2721 LegalizerHelper::LegalizeResult
2722 LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr
&MI
,
2726 return UnableToLegalize
;
2728 const int NumDst
= MI
.getNumOperands() - 1;
2729 const Register SrcReg
= MI
.getOperand(NumDst
).getReg();
2730 LLT SrcTy
= MRI
.getType(SrcReg
);
2732 LLT DstTy
= MRI
.getType(MI
.getOperand(0).getReg());
2734 // TODO: Create sequence of extracts.
2735 if (DstTy
== NarrowTy
)
2736 return UnableToLegalize
;
2738 LLT GCDTy
= getGCDType(SrcTy
, NarrowTy
);
2739 if (DstTy
== GCDTy
) {
2740 // This would just be a copy of the same unmerge.
2741 // TODO: Create extracts, pad with undef and create intermediate merges.
2742 return UnableToLegalize
;
2745 auto Unmerge
= MIRBuilder
.buildUnmerge(GCDTy
, SrcReg
);
2746 const int NumUnmerge
= Unmerge
->getNumOperands() - 1;
2747 const int PartsPerUnmerge
= NumDst
/ NumUnmerge
;
2749 for (int I
= 0; I
!= NumUnmerge
; ++I
) {
2750 auto MIB
= MIRBuilder
.buildInstr(TargetOpcode::G_UNMERGE_VALUES
);
2752 for (int J
= 0; J
!= PartsPerUnmerge
; ++J
)
2753 MIB
.addDef(MI
.getOperand(I
* PartsPerUnmerge
+ J
).getReg());
2754 MIB
.addUse(Unmerge
.getReg(I
));
2757 MI
.eraseFromParent();
2761 LegalizerHelper::LegalizeResult
2762 LegalizerHelper::reduceLoadStoreWidth(MachineInstr
&MI
, unsigned TypeIdx
,
2764 // FIXME: Don't know how to handle secondary types yet.
2766 return UnableToLegalize
;
2768 MachineMemOperand
*MMO
= *MI
.memoperands_begin();
2770 // This implementation doesn't work for atomics. Give up instead of doing
2771 // something invalid.
2772 if (MMO
->getOrdering() != AtomicOrdering::NotAtomic
||
2773 MMO
->getFailureOrdering() != AtomicOrdering::NotAtomic
)
2774 return UnableToLegalize
;
2776 bool IsLoad
= MI
.getOpcode() == TargetOpcode::G_LOAD
;
2777 Register ValReg
= MI
.getOperand(0).getReg();
2778 Register AddrReg
= MI
.getOperand(1).getReg();
2779 LLT ValTy
= MRI
.getType(ValReg
);
2782 int NumLeftover
= -1;
2784 SmallVector
<Register
, 8> NarrowRegs
, NarrowLeftoverRegs
;
2786 std::tie(NumParts
, NumLeftover
) = getNarrowTypeBreakDown(ValTy
, NarrowTy
, LeftoverTy
);
2788 if (extractParts(ValReg
, ValTy
, NarrowTy
, LeftoverTy
, NarrowRegs
,
2789 NarrowLeftoverRegs
)) {
2790 NumParts
= NarrowRegs
.size();
2791 NumLeftover
= NarrowLeftoverRegs
.size();
2796 return UnableToLegalize
;
2798 const LLT OffsetTy
= LLT::scalar(MRI
.getType(AddrReg
).getScalarSizeInBits());
2800 unsigned TotalSize
= ValTy
.getSizeInBits();
2802 // Split the load/store into PartTy sized pieces starting at Offset. If this
2803 // is a load, return the new registers in ValRegs. For a store, each elements
2804 // of ValRegs should be PartTy. Returns the next offset that needs to be
2806 auto splitTypePieces
= [=](LLT PartTy
, SmallVectorImpl
<Register
> &ValRegs
,
2807 unsigned Offset
) -> unsigned {
2808 MachineFunction
&MF
= MIRBuilder
.getMF();
2809 unsigned PartSize
= PartTy
.getSizeInBits();
2810 for (unsigned Idx
= 0, E
= NumParts
; Idx
!= E
&& Offset
< TotalSize
;
2811 Offset
+= PartSize
, ++Idx
) {
2812 unsigned ByteSize
= PartSize
/ 8;
2813 unsigned ByteOffset
= Offset
/ 8;
2814 Register NewAddrReg
;
2816 MIRBuilder
.materializeGEP(NewAddrReg
, AddrReg
, OffsetTy
, ByteOffset
);
2818 MachineMemOperand
*NewMMO
=
2819 MF
.getMachineMemOperand(MMO
, ByteOffset
, ByteSize
);
2822 Register Dst
= MRI
.createGenericVirtualRegister(PartTy
);
2823 ValRegs
.push_back(Dst
);
2824 MIRBuilder
.buildLoad(Dst
, NewAddrReg
, *NewMMO
);
2826 MIRBuilder
.buildStore(ValRegs
[Idx
], NewAddrReg
, *NewMMO
);
2833 unsigned HandledOffset
= splitTypePieces(NarrowTy
, NarrowRegs
, 0);
2835 // Handle the rest of the register if this isn't an even type breakdown.
2836 if (LeftoverTy
.isValid())
2837 splitTypePieces(LeftoverTy
, NarrowLeftoverRegs
, HandledOffset
);
2840 insertParts(ValReg
, ValTy
, NarrowTy
, NarrowRegs
,
2841 LeftoverTy
, NarrowLeftoverRegs
);
2844 MI
.eraseFromParent();
2848 LegalizerHelper::LegalizeResult
2849 LegalizerHelper::fewerElementsVector(MachineInstr
&MI
, unsigned TypeIdx
,
2851 using namespace TargetOpcode
;
2853 MIRBuilder
.setInstr(MI
);
2854 switch (MI
.getOpcode()) {
2855 case G_IMPLICIT_DEF
:
2856 return fewerElementsVectorImplicitDef(MI
, TypeIdx
, NarrowTy
);
2870 case G_FCANONICALIZE
:
2885 case G_INTRINSIC_ROUND
:
2886 case G_INTRINSIC_TRUNC
:
2899 case G_FMINNUM_IEEE
:
2900 case G_FMAXNUM_IEEE
:
2903 return fewerElementsVectorBasic(MI
, TypeIdx
, NarrowTy
);
2908 case G_CTLZ_ZERO_UNDEF
:
2910 case G_CTTZ_ZERO_UNDEF
:
2913 return fewerElementsVectorMultiEltType(MI
, TypeIdx
, NarrowTy
);
2925 case G_ADDRSPACE_CAST
:
2926 return fewerElementsVectorCasts(MI
, TypeIdx
, NarrowTy
);
2929 return fewerElementsVectorCmp(MI
, TypeIdx
, NarrowTy
);
2931 return fewerElementsVectorSelect(MI
, TypeIdx
, NarrowTy
);
2933 return fewerElementsVectorPhi(MI
, TypeIdx
, NarrowTy
);
2934 case G_UNMERGE_VALUES
:
2935 return fewerElementsVectorUnmergeValues(MI
, TypeIdx
, NarrowTy
);
2938 return reduceLoadStoreWidth(MI
, TypeIdx
, NarrowTy
);
2940 return UnableToLegalize
;
2944 LegalizerHelper::LegalizeResult
2945 LegalizerHelper::narrowScalarShiftByConstant(MachineInstr
&MI
, const APInt
&Amt
,
2946 const LLT HalfTy
, const LLT AmtTy
) {
2948 Register InL
= MRI
.createGenericVirtualRegister(HalfTy
);
2949 Register InH
= MRI
.createGenericVirtualRegister(HalfTy
);
2950 MIRBuilder
.buildUnmerge({InL
, InH
}, MI
.getOperand(1).getReg());
2952 if (Amt
.isNullValue()) {
2953 MIRBuilder
.buildMerge(MI
.getOperand(0).getReg(), {InL
, InH
});
2954 MI
.eraseFromParent();
2959 unsigned NVTBits
= HalfTy
.getSizeInBits();
2960 unsigned VTBits
= 2 * NVTBits
;
2962 SrcOp
Lo(Register(0)), Hi(Register(0));
2963 if (MI
.getOpcode() == TargetOpcode::G_SHL
) {
2964 if (Amt
.ugt(VTBits
)) {
2965 Lo
= Hi
= MIRBuilder
.buildConstant(NVT
, 0);
2966 } else if (Amt
.ugt(NVTBits
)) {
2967 Lo
= MIRBuilder
.buildConstant(NVT
, 0);
2968 Hi
= MIRBuilder
.buildShl(NVT
, InL
,
2969 MIRBuilder
.buildConstant(AmtTy
, Amt
- NVTBits
));
2970 } else if (Amt
== NVTBits
) {
2971 Lo
= MIRBuilder
.buildConstant(NVT
, 0);
2974 Lo
= MIRBuilder
.buildShl(NVT
, InL
, MIRBuilder
.buildConstant(AmtTy
, Amt
));
2976 MIRBuilder
.buildShl(NVT
, InH
, MIRBuilder
.buildConstant(AmtTy
, Amt
));
2977 auto OrRHS
= MIRBuilder
.buildLShr(
2978 NVT
, InL
, MIRBuilder
.buildConstant(AmtTy
, -Amt
+ NVTBits
));
2979 Hi
= MIRBuilder
.buildOr(NVT
, OrLHS
, OrRHS
);
2981 } else if (MI
.getOpcode() == TargetOpcode::G_LSHR
) {
2982 if (Amt
.ugt(VTBits
)) {
2983 Lo
= Hi
= MIRBuilder
.buildConstant(NVT
, 0);
2984 } else if (Amt
.ugt(NVTBits
)) {
2985 Lo
= MIRBuilder
.buildLShr(NVT
, InH
,
2986 MIRBuilder
.buildConstant(AmtTy
, Amt
- NVTBits
));
2987 Hi
= MIRBuilder
.buildConstant(NVT
, 0);
2988 } else if (Amt
== NVTBits
) {
2990 Hi
= MIRBuilder
.buildConstant(NVT
, 0);
2992 auto ShiftAmtConst
= MIRBuilder
.buildConstant(AmtTy
, Amt
);
2994 auto OrLHS
= MIRBuilder
.buildLShr(NVT
, InL
, ShiftAmtConst
);
2995 auto OrRHS
= MIRBuilder
.buildShl(
2996 NVT
, InH
, MIRBuilder
.buildConstant(AmtTy
, -Amt
+ NVTBits
));
2998 Lo
= MIRBuilder
.buildOr(NVT
, OrLHS
, OrRHS
);
2999 Hi
= MIRBuilder
.buildLShr(NVT
, InH
, ShiftAmtConst
);
3002 if (Amt
.ugt(VTBits
)) {
3003 Hi
= Lo
= MIRBuilder
.buildAShr(
3004 NVT
, InH
, MIRBuilder
.buildConstant(AmtTy
, NVTBits
- 1));
3005 } else if (Amt
.ugt(NVTBits
)) {
3006 Lo
= MIRBuilder
.buildAShr(NVT
, InH
,
3007 MIRBuilder
.buildConstant(AmtTy
, Amt
- NVTBits
));
3008 Hi
= MIRBuilder
.buildAShr(NVT
, InH
,
3009 MIRBuilder
.buildConstant(AmtTy
, NVTBits
- 1));
3010 } else if (Amt
== NVTBits
) {
3012 Hi
= MIRBuilder
.buildAShr(NVT
, InH
,
3013 MIRBuilder
.buildConstant(AmtTy
, NVTBits
- 1));
3015 auto ShiftAmtConst
= MIRBuilder
.buildConstant(AmtTy
, Amt
);
3017 auto OrLHS
= MIRBuilder
.buildLShr(NVT
, InL
, ShiftAmtConst
);
3018 auto OrRHS
= MIRBuilder
.buildShl(
3019 NVT
, InH
, MIRBuilder
.buildConstant(AmtTy
, -Amt
+ NVTBits
));
3021 Lo
= MIRBuilder
.buildOr(NVT
, OrLHS
, OrRHS
);
3022 Hi
= MIRBuilder
.buildAShr(NVT
, InH
, ShiftAmtConst
);
3026 MIRBuilder
.buildMerge(MI
.getOperand(0).getReg(), {Lo
.getReg(), Hi
.getReg()});
3027 MI
.eraseFromParent();
3032 // TODO: Optimize if constant shift amount.
3033 LegalizerHelper::LegalizeResult
3034 LegalizerHelper::narrowScalarShift(MachineInstr
&MI
, unsigned TypeIdx
,
3037 Observer
.changingInstr(MI
);
3038 narrowScalarSrc(MI
, RequestedTy
, 2);
3039 Observer
.changedInstr(MI
);
3043 Register DstReg
= MI
.getOperand(0).getReg();
3044 LLT DstTy
= MRI
.getType(DstReg
);
3045 if (DstTy
.isVector())
3046 return UnableToLegalize
;
3048 Register Amt
= MI
.getOperand(2).getReg();
3049 LLT ShiftAmtTy
= MRI
.getType(Amt
);
3050 const unsigned DstEltSize
= DstTy
.getScalarSizeInBits();
3051 if (DstEltSize
% 2 != 0)
3052 return UnableToLegalize
;
3054 // Ignore the input type. We can only go to exactly half the size of the
3055 // input. If that isn't small enough, the resulting pieces will be further
3057 const unsigned NewBitSize
= DstEltSize
/ 2;
3058 const LLT HalfTy
= LLT::scalar(NewBitSize
);
3059 const LLT CondTy
= LLT::scalar(1);
3061 if (const MachineInstr
*KShiftAmt
=
3062 getOpcodeDef(TargetOpcode::G_CONSTANT
, Amt
, MRI
)) {
3063 return narrowScalarShiftByConstant(
3064 MI
, KShiftAmt
->getOperand(1).getCImm()->getValue(), HalfTy
, ShiftAmtTy
);
3067 // TODO: Expand with known bits.
3069 // Handle the fully general expansion by an unknown amount.
3070 auto NewBits
= MIRBuilder
.buildConstant(ShiftAmtTy
, NewBitSize
);
3072 Register InL
= MRI
.createGenericVirtualRegister(HalfTy
);
3073 Register InH
= MRI
.createGenericVirtualRegister(HalfTy
);
3074 MIRBuilder
.buildUnmerge({InL
, InH
}, MI
.getOperand(1).getReg());
3076 auto AmtExcess
= MIRBuilder
.buildSub(ShiftAmtTy
, Amt
, NewBits
);
3077 auto AmtLack
= MIRBuilder
.buildSub(ShiftAmtTy
, NewBits
, Amt
);
3079 auto Zero
= MIRBuilder
.buildConstant(ShiftAmtTy
, 0);
3080 auto IsShort
= MIRBuilder
.buildICmp(ICmpInst::ICMP_ULT
, CondTy
, Amt
, NewBits
);
3081 auto IsZero
= MIRBuilder
.buildICmp(ICmpInst::ICMP_EQ
, CondTy
, Amt
, Zero
);
3083 Register ResultRegs
[2];
3084 switch (MI
.getOpcode()) {
3085 case TargetOpcode::G_SHL
: {
3086 // Short: ShAmt < NewBitSize
3087 auto LoS
= MIRBuilder
.buildShl(HalfTy
, InL
, Amt
);
3089 auto LoOr
= MIRBuilder
.buildLShr(HalfTy
, InL
, AmtLack
);
3090 auto HiOr
= MIRBuilder
.buildShl(HalfTy
, InH
, Amt
);
3091 auto HiS
= MIRBuilder
.buildOr(HalfTy
, LoOr
, HiOr
);
3093 // Long: ShAmt >= NewBitSize
3094 auto LoL
= MIRBuilder
.buildConstant(HalfTy
, 0); // Lo part is zero.
3095 auto HiL
= MIRBuilder
.buildShl(HalfTy
, InL
, AmtExcess
); // Hi from Lo part.
3097 auto Lo
= MIRBuilder
.buildSelect(HalfTy
, IsShort
, LoS
, LoL
);
3098 auto Hi
= MIRBuilder
.buildSelect(
3099 HalfTy
, IsZero
, InH
, MIRBuilder
.buildSelect(HalfTy
, IsShort
, HiS
, HiL
));
3101 ResultRegs
[0] = Lo
.getReg(0);
3102 ResultRegs
[1] = Hi
.getReg(0);
3105 case TargetOpcode::G_LSHR
:
3106 case TargetOpcode::G_ASHR
: {
3107 // Short: ShAmt < NewBitSize
3108 auto HiS
= MIRBuilder
.buildInstr(MI
.getOpcode(), {HalfTy
}, {InH
, Amt
});
3110 auto LoOr
= MIRBuilder
.buildLShr(HalfTy
, InL
, Amt
);
3111 auto HiOr
= MIRBuilder
.buildShl(HalfTy
, InH
, AmtLack
);
3112 auto LoS
= MIRBuilder
.buildOr(HalfTy
, LoOr
, HiOr
);
3114 // Long: ShAmt >= NewBitSize
3115 MachineInstrBuilder HiL
;
3116 if (MI
.getOpcode() == TargetOpcode::G_LSHR
) {
3117 HiL
= MIRBuilder
.buildConstant(HalfTy
, 0); // Hi part is zero.
3119 auto ShiftAmt
= MIRBuilder
.buildConstant(ShiftAmtTy
, NewBitSize
- 1);
3120 HiL
= MIRBuilder
.buildAShr(HalfTy
, InH
, ShiftAmt
); // Sign of Hi part.
3122 auto LoL
= MIRBuilder
.buildInstr(MI
.getOpcode(), {HalfTy
},
3123 {InH
, AmtExcess
}); // Lo from Hi part.
3125 auto Lo
= MIRBuilder
.buildSelect(
3126 HalfTy
, IsZero
, InL
, MIRBuilder
.buildSelect(HalfTy
, IsShort
, LoS
, LoL
));
3128 auto Hi
= MIRBuilder
.buildSelect(HalfTy
, IsShort
, HiS
, HiL
);
3130 ResultRegs
[0] = Lo
.getReg(0);
3131 ResultRegs
[1] = Hi
.getReg(0);
3135 llvm_unreachable("not a shift");
3138 MIRBuilder
.buildMerge(DstReg
, ResultRegs
);
3139 MI
.eraseFromParent();
3143 LegalizerHelper::LegalizeResult
3144 LegalizerHelper::moreElementsVectorPhi(MachineInstr
&MI
, unsigned TypeIdx
,
3146 assert(TypeIdx
== 0 && "Expecting only Idx 0");
3148 Observer
.changingInstr(MI
);
3149 for (unsigned I
= 1, E
= MI
.getNumOperands(); I
!= E
; I
+= 2) {
3150 MachineBasicBlock
&OpMBB
= *MI
.getOperand(I
+ 1).getMBB();
3151 MIRBuilder
.setInsertPt(OpMBB
, OpMBB
.getFirstTerminator());
3152 moreElementsVectorSrc(MI
, MoreTy
, I
);
3155 MachineBasicBlock
&MBB
= *MI
.getParent();
3156 MIRBuilder
.setInsertPt(MBB
, --MBB
.getFirstNonPHI());
3157 moreElementsVectorDst(MI
, MoreTy
, 0);
3158 Observer
.changedInstr(MI
);
3162 LegalizerHelper::LegalizeResult
3163 LegalizerHelper::moreElementsVector(MachineInstr
&MI
, unsigned TypeIdx
,
3165 MIRBuilder
.setInstr(MI
);
3166 unsigned Opc
= MI
.getOpcode();
3168 case TargetOpcode::G_IMPLICIT_DEF
:
3169 case TargetOpcode::G_LOAD
: {
3171 return UnableToLegalize
;
3172 Observer
.changingInstr(MI
);
3173 moreElementsVectorDst(MI
, MoreTy
, 0);
3174 Observer
.changedInstr(MI
);
3177 case TargetOpcode::G_STORE
:
3179 return UnableToLegalize
;
3180 Observer
.changingInstr(MI
);
3181 moreElementsVectorSrc(MI
, MoreTy
, 0);
3182 Observer
.changedInstr(MI
);
3184 case TargetOpcode::G_AND
:
3185 case TargetOpcode::G_OR
:
3186 case TargetOpcode::G_XOR
:
3187 case TargetOpcode::G_SMIN
:
3188 case TargetOpcode::G_SMAX
:
3189 case TargetOpcode::G_UMIN
:
3190 case TargetOpcode::G_UMAX
: {
3191 Observer
.changingInstr(MI
);
3192 moreElementsVectorSrc(MI
, MoreTy
, 1);
3193 moreElementsVectorSrc(MI
, MoreTy
, 2);
3194 moreElementsVectorDst(MI
, MoreTy
, 0);
3195 Observer
.changedInstr(MI
);
3198 case TargetOpcode::G_EXTRACT
:
3200 return UnableToLegalize
;
3201 Observer
.changingInstr(MI
);
3202 moreElementsVectorSrc(MI
, MoreTy
, 1);
3203 Observer
.changedInstr(MI
);
3205 case TargetOpcode::G_INSERT
:
3207 return UnableToLegalize
;
3208 Observer
.changingInstr(MI
);
3209 moreElementsVectorSrc(MI
, MoreTy
, 1);
3210 moreElementsVectorDst(MI
, MoreTy
, 0);
3211 Observer
.changedInstr(MI
);
3213 case TargetOpcode::G_SELECT
:
3215 return UnableToLegalize
;
3216 if (MRI
.getType(MI
.getOperand(1).getReg()).isVector())
3217 return UnableToLegalize
;
3219 Observer
.changingInstr(MI
);
3220 moreElementsVectorSrc(MI
, MoreTy
, 2);
3221 moreElementsVectorSrc(MI
, MoreTy
, 3);
3222 moreElementsVectorDst(MI
, MoreTy
, 0);
3223 Observer
.changedInstr(MI
);
3225 case TargetOpcode::G_UNMERGE_VALUES
: {
3227 return UnableToLegalize
;
3229 LLT DstTy
= MRI
.getType(MI
.getOperand(0).getReg());
3230 int NumDst
= MI
.getNumOperands() - 1;
3231 moreElementsVectorSrc(MI
, MoreTy
, NumDst
);
3233 auto MIB
= MIRBuilder
.buildInstr(TargetOpcode::G_UNMERGE_VALUES
);
3234 for (int I
= 0; I
!= NumDst
; ++I
)
3235 MIB
.addDef(MI
.getOperand(I
).getReg());
3237 int NewNumDst
= MoreTy
.getSizeInBits() / DstTy
.getSizeInBits();
3238 for (int I
= NumDst
; I
!= NewNumDst
; ++I
)
3239 MIB
.addDef(MRI
.createGenericVirtualRegister(DstTy
));
3241 MIB
.addUse(MI
.getOperand(NumDst
).getReg());
3242 MI
.eraseFromParent();
3245 case TargetOpcode::G_PHI
:
3246 return moreElementsVectorPhi(MI
, TypeIdx
, MoreTy
);
3248 return UnableToLegalize
;
3252 void LegalizerHelper::multiplyRegisters(SmallVectorImpl
<Register
> &DstRegs
,
3253 ArrayRef
<Register
> Src1Regs
,
3254 ArrayRef
<Register
> Src2Regs
,
3256 MachineIRBuilder
&B
= MIRBuilder
;
3257 unsigned SrcParts
= Src1Regs
.size();
3258 unsigned DstParts
= DstRegs
.size();
3260 unsigned DstIdx
= 0; // Low bits of the result.
3261 Register FactorSum
=
3262 B
.buildMul(NarrowTy
, Src1Regs
[DstIdx
], Src2Regs
[DstIdx
]).getReg(0);
3263 DstRegs
[DstIdx
] = FactorSum
;
3265 unsigned CarrySumPrevDstIdx
;
3266 SmallVector
<Register
, 4> Factors
;
3268 for (DstIdx
= 1; DstIdx
< DstParts
; DstIdx
++) {
3269 // Collect low parts of muls for DstIdx.
3270 for (unsigned i
= DstIdx
+ 1 < SrcParts
? 0 : DstIdx
- SrcParts
+ 1;
3271 i
<= std::min(DstIdx
, SrcParts
- 1); ++i
) {
3272 MachineInstrBuilder Mul
=
3273 B
.buildMul(NarrowTy
, Src1Regs
[DstIdx
- i
], Src2Regs
[i
]);
3274 Factors
.push_back(Mul
.getReg(0));
3276 // Collect high parts of muls from previous DstIdx.
3277 for (unsigned i
= DstIdx
< SrcParts
? 0 : DstIdx
- SrcParts
;
3278 i
<= std::min(DstIdx
- 1, SrcParts
- 1); ++i
) {
3279 MachineInstrBuilder Umulh
=
3280 B
.buildUMulH(NarrowTy
, Src1Regs
[DstIdx
- 1 - i
], Src2Regs
[i
]);
3281 Factors
.push_back(Umulh
.getReg(0));
3283 // Add CarrySum from additons calculated for previous DstIdx.
3285 Factors
.push_back(CarrySumPrevDstIdx
);
3289 // Add all factors and accumulate all carries into CarrySum.
3290 if (DstIdx
!= DstParts
- 1) {
3291 MachineInstrBuilder Uaddo
=
3292 B
.buildUAddo(NarrowTy
, LLT::scalar(1), Factors
[0], Factors
[1]);
3293 FactorSum
= Uaddo
.getReg(0);
3294 CarrySum
= B
.buildZExt(NarrowTy
, Uaddo
.getReg(1)).getReg(0);
3295 for (unsigned i
= 2; i
< Factors
.size(); ++i
) {
3296 MachineInstrBuilder Uaddo
=
3297 B
.buildUAddo(NarrowTy
, LLT::scalar(1), FactorSum
, Factors
[i
]);
3298 FactorSum
= Uaddo
.getReg(0);
3299 MachineInstrBuilder Carry
= B
.buildZExt(NarrowTy
, Uaddo
.getReg(1));
3300 CarrySum
= B
.buildAdd(NarrowTy
, CarrySum
, Carry
).getReg(0);
3303 // Since value for the next index is not calculated, neither is CarrySum.
3304 FactorSum
= B
.buildAdd(NarrowTy
, Factors
[0], Factors
[1]).getReg(0);
3305 for (unsigned i
= 2; i
< Factors
.size(); ++i
)
3306 FactorSum
= B
.buildAdd(NarrowTy
, FactorSum
, Factors
[i
]).getReg(0);
3309 CarrySumPrevDstIdx
= CarrySum
;
3310 DstRegs
[DstIdx
] = FactorSum
;
3315 LegalizerHelper::LegalizeResult
3316 LegalizerHelper::narrowScalarMul(MachineInstr
&MI
, LLT NarrowTy
) {
3317 Register DstReg
= MI
.getOperand(0).getReg();
3318 Register Src1
= MI
.getOperand(1).getReg();
3319 Register Src2
= MI
.getOperand(2).getReg();
3321 LLT Ty
= MRI
.getType(DstReg
);
3323 return UnableToLegalize
;
3325 unsigned SrcSize
= MRI
.getType(Src1
).getSizeInBits();
3326 unsigned DstSize
= Ty
.getSizeInBits();
3327 unsigned NarrowSize
= NarrowTy
.getSizeInBits();
3328 if (DstSize
% NarrowSize
!= 0 || SrcSize
% NarrowSize
!= 0)
3329 return UnableToLegalize
;
3331 unsigned NumDstParts
= DstSize
/ NarrowSize
;
3332 unsigned NumSrcParts
= SrcSize
/ NarrowSize
;
3333 bool IsMulHigh
= MI
.getOpcode() == TargetOpcode::G_UMULH
;
3334 unsigned DstTmpParts
= NumDstParts
* (IsMulHigh
? 2 : 1);
3336 SmallVector
<Register
, 2> Src1Parts
, Src2Parts
, DstTmpRegs
;
3337 extractParts(Src1
, NarrowTy
, NumSrcParts
, Src1Parts
);
3338 extractParts(Src2
, NarrowTy
, NumSrcParts
, Src2Parts
);
3339 DstTmpRegs
.resize(DstTmpParts
);
3340 multiplyRegisters(DstTmpRegs
, Src1Parts
, Src2Parts
, NarrowTy
);
3342 // Take only high half of registers if this is high mul.
3343 ArrayRef
<Register
> DstRegs(
3344 IsMulHigh
? &DstTmpRegs
[DstTmpParts
/ 2] : &DstTmpRegs
[0], NumDstParts
);
3345 MIRBuilder
.buildMerge(DstReg
, DstRegs
);
3346 MI
.eraseFromParent();
3350 LegalizerHelper::LegalizeResult
3351 LegalizerHelper::narrowScalarExtract(MachineInstr
&MI
, unsigned TypeIdx
,
3354 return UnableToLegalize
;
3356 uint64_t NarrowSize
= NarrowTy
.getSizeInBits();
3358 int64_t SizeOp1
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
3359 // FIXME: add support for when SizeOp1 isn't an exact multiple of
3361 if (SizeOp1
% NarrowSize
!= 0)
3362 return UnableToLegalize
;
3363 int NumParts
= SizeOp1
/ NarrowSize
;
3365 SmallVector
<Register
, 2> SrcRegs
, DstRegs
;
3366 SmallVector
<uint64_t, 2> Indexes
;
3367 extractParts(MI
.getOperand(1).getReg(), NarrowTy
, NumParts
, SrcRegs
);
3369 Register OpReg
= MI
.getOperand(0).getReg();
3370 uint64_t OpStart
= MI
.getOperand(2).getImm();
3371 uint64_t OpSize
= MRI
.getType(OpReg
).getSizeInBits();
3372 for (int i
= 0; i
< NumParts
; ++i
) {
3373 unsigned SrcStart
= i
* NarrowSize
;
3375 if (SrcStart
+ NarrowSize
<= OpStart
|| SrcStart
>= OpStart
+ OpSize
) {
3376 // No part of the extract uses this subregister, ignore it.
3378 } else if (SrcStart
== OpStart
&& NarrowTy
== MRI
.getType(OpReg
)) {
3379 // The entire subregister is extracted, forward the value.
3380 DstRegs
.push_back(SrcRegs
[i
]);
3384 // OpSegStart is where this destination segment would start in OpReg if it
3385 // extended infinitely in both directions.
3386 int64_t ExtractOffset
;
3388 if (OpStart
< SrcStart
) {
3390 SegSize
= std::min(NarrowSize
, OpStart
+ OpSize
- SrcStart
);
3392 ExtractOffset
= OpStart
- SrcStart
;
3393 SegSize
= std::min(SrcStart
+ NarrowSize
- OpStart
, OpSize
);
3396 Register SegReg
= SrcRegs
[i
];
3397 if (ExtractOffset
!= 0 || SegSize
!= NarrowSize
) {
3398 // A genuine extract is needed.
3399 SegReg
= MRI
.createGenericVirtualRegister(LLT::scalar(SegSize
));
3400 MIRBuilder
.buildExtract(SegReg
, SrcRegs
[i
], ExtractOffset
);
3403 DstRegs
.push_back(SegReg
);
3406 Register DstReg
= MI
.getOperand(0).getReg();
3407 if(MRI
.getType(DstReg
).isVector())
3408 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
3410 MIRBuilder
.buildMerge(DstReg
, DstRegs
);
3411 MI
.eraseFromParent();
3415 LegalizerHelper::LegalizeResult
3416 LegalizerHelper::narrowScalarInsert(MachineInstr
&MI
, unsigned TypeIdx
,
3418 // FIXME: Don't know how to handle secondary types yet.
3420 return UnableToLegalize
;
3422 uint64_t SizeOp0
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
3423 uint64_t NarrowSize
= NarrowTy
.getSizeInBits();
3425 // FIXME: add support for when SizeOp0 isn't an exact multiple of
3427 if (SizeOp0
% NarrowSize
!= 0)
3428 return UnableToLegalize
;
3430 int NumParts
= SizeOp0
/ NarrowSize
;
3432 SmallVector
<Register
, 2> SrcRegs
, DstRegs
;
3433 SmallVector
<uint64_t, 2> Indexes
;
3434 extractParts(MI
.getOperand(1).getReg(), NarrowTy
, NumParts
, SrcRegs
);
3436 Register OpReg
= MI
.getOperand(2).getReg();
3437 uint64_t OpStart
= MI
.getOperand(3).getImm();
3438 uint64_t OpSize
= MRI
.getType(OpReg
).getSizeInBits();
3439 for (int i
= 0; i
< NumParts
; ++i
) {
3440 unsigned DstStart
= i
* NarrowSize
;
3442 if (DstStart
+ NarrowSize
<= OpStart
|| DstStart
>= OpStart
+ OpSize
) {
3443 // No part of the insert affects this subregister, forward the original.
3444 DstRegs
.push_back(SrcRegs
[i
]);
3446 } else if (DstStart
== OpStart
&& NarrowTy
== MRI
.getType(OpReg
)) {
3447 // The entire subregister is defined by this insert, forward the new
3449 DstRegs
.push_back(OpReg
);
3453 // OpSegStart is where this destination segment would start in OpReg if it
3454 // extended infinitely in both directions.
3455 int64_t ExtractOffset
, InsertOffset
;
3457 if (OpStart
< DstStart
) {
3459 ExtractOffset
= DstStart
- OpStart
;
3460 SegSize
= std::min(NarrowSize
, OpStart
+ OpSize
- DstStart
);
3462 InsertOffset
= OpStart
- DstStart
;
3465 std::min(NarrowSize
- InsertOffset
, OpStart
+ OpSize
- DstStart
);
3468 Register SegReg
= OpReg
;
3469 if (ExtractOffset
!= 0 || SegSize
!= OpSize
) {
3470 // A genuine extract is needed.
3471 SegReg
= MRI
.createGenericVirtualRegister(LLT::scalar(SegSize
));
3472 MIRBuilder
.buildExtract(SegReg
, OpReg
, ExtractOffset
);
3475 Register DstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
3476 MIRBuilder
.buildInsert(DstReg
, SrcRegs
[i
], SegReg
, InsertOffset
);
3477 DstRegs
.push_back(DstReg
);
3480 assert(DstRegs
.size() == (unsigned)NumParts
&& "not all parts covered");
3481 Register DstReg
= MI
.getOperand(0).getReg();
3482 if(MRI
.getType(DstReg
).isVector())
3483 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
3485 MIRBuilder
.buildMerge(DstReg
, DstRegs
);
3486 MI
.eraseFromParent();
3490 LegalizerHelper::LegalizeResult
3491 LegalizerHelper::narrowScalarBasic(MachineInstr
&MI
, unsigned TypeIdx
,
3493 Register DstReg
= MI
.getOperand(0).getReg();
3494 LLT DstTy
= MRI
.getType(DstReg
);
3496 assert(MI
.getNumOperands() == 3 && TypeIdx
== 0);
3498 SmallVector
<Register
, 4> DstRegs
, DstLeftoverRegs
;
3499 SmallVector
<Register
, 4> Src0Regs
, Src0LeftoverRegs
;
3500 SmallVector
<Register
, 4> Src1Regs
, Src1LeftoverRegs
;
3502 if (!extractParts(MI
.getOperand(1).getReg(), DstTy
, NarrowTy
, LeftoverTy
,
3503 Src0Regs
, Src0LeftoverRegs
))
3504 return UnableToLegalize
;
3507 if (!extractParts(MI
.getOperand(2).getReg(), DstTy
, NarrowTy
, Unused
,
3508 Src1Regs
, Src1LeftoverRegs
))
3509 llvm_unreachable("inconsistent extractParts result");
3511 for (unsigned I
= 0, E
= Src1Regs
.size(); I
!= E
; ++I
) {
3512 auto Inst
= MIRBuilder
.buildInstr(MI
.getOpcode(), {NarrowTy
},
3513 {Src0Regs
[I
], Src1Regs
[I
]});
3514 DstRegs
.push_back(Inst
->getOperand(0).getReg());
3517 for (unsigned I
= 0, E
= Src1LeftoverRegs
.size(); I
!= E
; ++I
) {
3518 auto Inst
= MIRBuilder
.buildInstr(
3520 {LeftoverTy
}, {Src0LeftoverRegs
[I
], Src1LeftoverRegs
[I
]});
3521 DstLeftoverRegs
.push_back(Inst
->getOperand(0).getReg());
3524 insertParts(DstReg
, DstTy
, NarrowTy
, DstRegs
,
3525 LeftoverTy
, DstLeftoverRegs
);
3527 MI
.eraseFromParent();
3531 LegalizerHelper::LegalizeResult
3532 LegalizerHelper::narrowScalarSelect(MachineInstr
&MI
, unsigned TypeIdx
,
3535 return UnableToLegalize
;
3537 Register CondReg
= MI
.getOperand(1).getReg();
3538 LLT CondTy
= MRI
.getType(CondReg
);
3539 if (CondTy
.isVector()) // TODO: Handle vselect
3540 return UnableToLegalize
;
3542 Register DstReg
= MI
.getOperand(0).getReg();
3543 LLT DstTy
= MRI
.getType(DstReg
);
3545 SmallVector
<Register
, 4> DstRegs
, DstLeftoverRegs
;
3546 SmallVector
<Register
, 4> Src1Regs
, Src1LeftoverRegs
;
3547 SmallVector
<Register
, 4> Src2Regs
, Src2LeftoverRegs
;
3549 if (!extractParts(MI
.getOperand(2).getReg(), DstTy
, NarrowTy
, LeftoverTy
,
3550 Src1Regs
, Src1LeftoverRegs
))
3551 return UnableToLegalize
;
3554 if (!extractParts(MI
.getOperand(3).getReg(), DstTy
, NarrowTy
, Unused
,
3555 Src2Regs
, Src2LeftoverRegs
))
3556 llvm_unreachable("inconsistent extractParts result");
3558 for (unsigned I
= 0, E
= Src1Regs
.size(); I
!= E
; ++I
) {
3559 auto Select
= MIRBuilder
.buildSelect(NarrowTy
,
3560 CondReg
, Src1Regs
[I
], Src2Regs
[I
]);
3561 DstRegs
.push_back(Select
->getOperand(0).getReg());
3564 for (unsigned I
= 0, E
= Src1LeftoverRegs
.size(); I
!= E
; ++I
) {
3565 auto Select
= MIRBuilder
.buildSelect(
3566 LeftoverTy
, CondReg
, Src1LeftoverRegs
[I
], Src2LeftoverRegs
[I
]);
3567 DstLeftoverRegs
.push_back(Select
->getOperand(0).getReg());
3570 insertParts(DstReg
, DstTy
, NarrowTy
, DstRegs
,
3571 LeftoverTy
, DstLeftoverRegs
);
3573 MI
.eraseFromParent();
3577 LegalizerHelper::LegalizeResult
3578 LegalizerHelper::lowerBitCount(MachineInstr
&MI
, unsigned TypeIdx
, LLT Ty
) {
3579 unsigned Opc
= MI
.getOpcode();
3580 auto &TII
= *MI
.getMF()->getSubtarget().getInstrInfo();
3581 auto isSupported
= [this](const LegalityQuery
&Q
) {
3582 auto QAction
= LI
.getAction(Q
).Action
;
3583 return QAction
== Legal
|| QAction
== Libcall
|| QAction
== Custom
;
3587 return UnableToLegalize
;
3588 case TargetOpcode::G_CTLZ_ZERO_UNDEF
: {
3589 // This trivially expands to CTLZ.
3590 Observer
.changingInstr(MI
);
3591 MI
.setDesc(TII
.get(TargetOpcode::G_CTLZ
));
3592 Observer
.changedInstr(MI
);
3595 case TargetOpcode::G_CTLZ
: {
3596 Register SrcReg
= MI
.getOperand(1).getReg();
3597 unsigned Len
= Ty
.getSizeInBits();
3598 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF
, {Ty
, Ty
}})) {
3599 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
3600 auto MIBCtlzZU
= MIRBuilder
.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF
,
3602 auto MIBZero
= MIRBuilder
.buildConstant(Ty
, 0);
3603 auto MIBLen
= MIRBuilder
.buildConstant(Ty
, Len
);
3604 auto MIBICmp
= MIRBuilder
.buildICmp(CmpInst::ICMP_EQ
, LLT::scalar(1),
3606 MIRBuilder
.buildSelect(MI
.getOperand(0).getReg(), MIBICmp
, MIBLen
,
3608 MI
.eraseFromParent();
3611 // for now, we do this:
3612 // NewLen = NextPowerOf2(Len);
3613 // x = x | (x >> 1);
3614 // x = x | (x >> 2);
3616 // x = x | (x >>16);
3617 // x = x | (x >>32); // for 64-bit input
3619 // return Len - popcount(x);
3621 // Ref: "Hacker's Delight" by Henry Warren
3622 Register Op
= SrcReg
;
3623 unsigned NewLen
= PowerOf2Ceil(Len
);
3624 for (unsigned i
= 0; (1U << i
) <= (NewLen
/ 2); ++i
) {
3625 auto MIBShiftAmt
= MIRBuilder
.buildConstant(Ty
, 1ULL << i
);
3626 auto MIBOp
= MIRBuilder
.buildInstr(
3627 TargetOpcode::G_OR
, {Ty
},
3628 {Op
, MIRBuilder
.buildInstr(TargetOpcode::G_LSHR
, {Ty
},
3629 {Op
, MIBShiftAmt
})});
3630 Op
= MIBOp
->getOperand(0).getReg();
3632 auto MIBPop
= MIRBuilder
.buildInstr(TargetOpcode::G_CTPOP
, {Ty
}, {Op
});
3633 MIRBuilder
.buildInstr(TargetOpcode::G_SUB
, {MI
.getOperand(0).getReg()},
3634 {MIRBuilder
.buildConstant(Ty
, Len
), MIBPop
});
3635 MI
.eraseFromParent();
3638 case TargetOpcode::G_CTTZ_ZERO_UNDEF
: {
3639 // This trivially expands to CTTZ.
3640 Observer
.changingInstr(MI
);
3641 MI
.setDesc(TII
.get(TargetOpcode::G_CTTZ
));
3642 Observer
.changedInstr(MI
);
3645 case TargetOpcode::G_CTTZ
: {
3646 Register SrcReg
= MI
.getOperand(1).getReg();
3647 unsigned Len
= Ty
.getSizeInBits();
3648 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF
, {Ty
, Ty
}})) {
3649 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
3651 auto MIBCttzZU
= MIRBuilder
.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF
,
3653 auto MIBZero
= MIRBuilder
.buildConstant(Ty
, 0);
3654 auto MIBLen
= MIRBuilder
.buildConstant(Ty
, Len
);
3655 auto MIBICmp
= MIRBuilder
.buildICmp(CmpInst::ICMP_EQ
, LLT::scalar(1),
3657 MIRBuilder
.buildSelect(MI
.getOperand(0).getReg(), MIBICmp
, MIBLen
,
3659 MI
.eraseFromParent();
3662 // for now, we use: { return popcount(~x & (x - 1)); }
3663 // unless the target has ctlz but not ctpop, in which case we use:
3664 // { return 32 - nlz(~x & (x-1)); }
3665 // Ref: "Hacker's Delight" by Henry Warren
3666 auto MIBCstNeg1
= MIRBuilder
.buildConstant(Ty
, -1);
3668 MIRBuilder
.buildInstr(TargetOpcode::G_XOR
, {Ty
}, {SrcReg
, MIBCstNeg1
});
3669 auto MIBTmp
= MIRBuilder
.buildInstr(
3670 TargetOpcode::G_AND
, {Ty
},
3671 {MIBNot
, MIRBuilder
.buildInstr(TargetOpcode::G_ADD
, {Ty
},
3672 {SrcReg
, MIBCstNeg1
})});
3673 if (!isSupported({TargetOpcode::G_CTPOP
, {Ty
, Ty
}}) &&
3674 isSupported({TargetOpcode::G_CTLZ
, {Ty
, Ty
}})) {
3675 auto MIBCstLen
= MIRBuilder
.buildConstant(Ty
, Len
);
3676 MIRBuilder
.buildInstr(
3677 TargetOpcode::G_SUB
, {MI
.getOperand(0).getReg()},
3679 MIRBuilder
.buildInstr(TargetOpcode::G_CTLZ
, {Ty
}, {MIBTmp
})});
3680 MI
.eraseFromParent();
3683 MI
.setDesc(TII
.get(TargetOpcode::G_CTPOP
));
3684 MI
.getOperand(1).setReg(MIBTmp
->getOperand(0).getReg());
3690 // Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
3692 LegalizerHelper::LegalizeResult
3693 LegalizerHelper::lowerU64ToF32BitOps(MachineInstr
&MI
) {
3694 Register Dst
= MI
.getOperand(0).getReg();
3695 Register Src
= MI
.getOperand(1).getReg();
3696 const LLT S64
= LLT::scalar(64);
3697 const LLT S32
= LLT::scalar(32);
3698 const LLT S1
= LLT::scalar(1);
3700 assert(MRI
.getType(Src
) == S64
&& MRI
.getType(Dst
) == S32
);
3702 // unsigned cul2f(ulong u) {
3703 // uint lz = clz(u);
3704 // uint e = (u != 0) ? 127U + 63U - lz : 0;
3705 // u = (u << lz) & 0x7fffffffffffffffUL;
3706 // ulong t = u & 0xffffffffffUL;
3707 // uint v = (e << 23) | (uint)(u >> 40);
3708 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
3709 // return as_float(v + r);
3712 auto Zero32
= MIRBuilder
.buildConstant(S32
, 0);
3713 auto Zero64
= MIRBuilder
.buildConstant(S64
, 0);
3715 auto LZ
= MIRBuilder
.buildCTLZ_ZERO_UNDEF(S32
, Src
);
3717 auto K
= MIRBuilder
.buildConstant(S32
, 127U + 63U);
3718 auto Sub
= MIRBuilder
.buildSub(S32
, K
, LZ
);
3720 auto NotZero
= MIRBuilder
.buildICmp(CmpInst::ICMP_NE
, S1
, Src
, Zero64
);
3721 auto E
= MIRBuilder
.buildSelect(S32
, NotZero
, Sub
, Zero32
);
3723 auto Mask0
= MIRBuilder
.buildConstant(S64
, (-1ULL) >> 1);
3724 auto ShlLZ
= MIRBuilder
.buildShl(S64
, Src
, LZ
);
3726 auto U
= MIRBuilder
.buildAnd(S64
, ShlLZ
, Mask0
);
3728 auto Mask1
= MIRBuilder
.buildConstant(S64
, 0xffffffffffULL
);
3729 auto T
= MIRBuilder
.buildAnd(S64
, U
, Mask1
);
3731 auto UShl
= MIRBuilder
.buildLShr(S64
, U
, MIRBuilder
.buildConstant(S64
, 40));
3732 auto ShlE
= MIRBuilder
.buildShl(S32
, E
, MIRBuilder
.buildConstant(S32
, 23));
3733 auto V
= MIRBuilder
.buildOr(S32
, ShlE
, MIRBuilder
.buildTrunc(S32
, UShl
));
3735 auto C
= MIRBuilder
.buildConstant(S64
, 0x8000000000ULL
);
3736 auto RCmp
= MIRBuilder
.buildICmp(CmpInst::ICMP_UGT
, S1
, T
, C
);
3737 auto TCmp
= MIRBuilder
.buildICmp(CmpInst::ICMP_EQ
, S1
, T
, C
);
3738 auto One
= MIRBuilder
.buildConstant(S32
, 1);
3740 auto VTrunc1
= MIRBuilder
.buildAnd(S32
, V
, One
);
3741 auto Select0
= MIRBuilder
.buildSelect(S32
, TCmp
, VTrunc1
, Zero32
);
3742 auto R
= MIRBuilder
.buildSelect(S32
, RCmp
, One
, Select0
);
3743 MIRBuilder
.buildAdd(Dst
, V
, R
);
3748 LegalizerHelper::LegalizeResult
3749 LegalizerHelper::lowerUITOFP(MachineInstr
&MI
, unsigned TypeIdx
, LLT Ty
) {
3750 Register Dst
= MI
.getOperand(0).getReg();
3751 Register Src
= MI
.getOperand(1).getReg();
3752 LLT DstTy
= MRI
.getType(Dst
);
3753 LLT SrcTy
= MRI
.getType(Src
);
3755 if (SrcTy
!= LLT::scalar(64))
3756 return UnableToLegalize
;
3758 if (DstTy
== LLT::scalar(32)) {
3759 // TODO: SelectionDAG has several alternative expansions to port which may
3760 // be more reasonble depending on the available instructions. If a target
3761 // has sitofp, does not have CTLZ, or can efficiently use f64 as an
3762 // intermediate type, this is probably worse.
3763 return lowerU64ToF32BitOps(MI
);
3766 return UnableToLegalize
;
3769 LegalizerHelper::LegalizeResult
3770 LegalizerHelper::lowerSITOFP(MachineInstr
&MI
, unsigned TypeIdx
, LLT Ty
) {
3771 Register Dst
= MI
.getOperand(0).getReg();
3772 Register Src
= MI
.getOperand(1).getReg();
3773 LLT DstTy
= MRI
.getType(Dst
);
3774 LLT SrcTy
= MRI
.getType(Src
);
3776 const LLT S64
= LLT::scalar(64);
3777 const LLT S32
= LLT::scalar(32);
3778 const LLT S1
= LLT::scalar(1);
3781 return UnableToLegalize
;
3784 // signed cl2f(long l) {
3785 // long s = l >> 63;
3786 // float r = cul2f((l + s) ^ s);
3787 // return s ? -r : r;
3790 auto SignBit
= MIRBuilder
.buildConstant(S64
, 63);
3791 auto S
= MIRBuilder
.buildAShr(S64
, L
, SignBit
);
3793 auto LPlusS
= MIRBuilder
.buildAdd(S64
, L
, S
);
3794 auto Xor
= MIRBuilder
.buildXor(S64
, LPlusS
, S
);
3795 auto R
= MIRBuilder
.buildUITOFP(S32
, Xor
);
3797 auto RNeg
= MIRBuilder
.buildFNeg(S32
, R
);
3798 auto SignNotZero
= MIRBuilder
.buildICmp(CmpInst::ICMP_NE
, S1
, S
,
3799 MIRBuilder
.buildConstant(S64
, 0));
3800 MIRBuilder
.buildSelect(Dst
, SignNotZero
, RNeg
, R
);
3804 return UnableToLegalize
;
3807 LegalizerHelper::LegalizeResult
3808 LegalizerHelper::lowerFPTOUI(MachineInstr
&MI
, unsigned TypeIdx
, LLT Ty
) {
3809 Register Dst
= MI
.getOperand(0).getReg();
3810 Register Src
= MI
.getOperand(1).getReg();
3811 LLT DstTy
= MRI
.getType(Dst
);
3812 LLT SrcTy
= MRI
.getType(Src
);
3813 const LLT S64
= LLT::scalar(64);
3814 const LLT S32
= LLT::scalar(32);
3816 if (SrcTy
!= S64
&& SrcTy
!= S32
)
3817 return UnableToLegalize
;
3818 if (DstTy
!= S32
&& DstTy
!= S64
)
3819 return UnableToLegalize
;
3821 // FPTOSI gives same result as FPTOUI for positive signed integers.
3822 // FPTOUI needs to deal with fp values that convert to unsigned integers
3823 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
3825 APInt TwoPExpInt
= APInt::getSignMask(DstTy
.getSizeInBits());
3826 APFloat
TwoPExpFP(SrcTy
.getSizeInBits() == 32 ? APFloat::IEEEsingle()
3827 : APFloat::IEEEdouble(),
3828 APInt::getNullValue(SrcTy
.getSizeInBits()));
3829 TwoPExpFP
.convertFromAPInt(TwoPExpInt
, false, APFloat::rmNearestTiesToEven
);
3831 MachineInstrBuilder FPTOSI
= MIRBuilder
.buildFPTOSI(DstTy
, Src
);
3833 MachineInstrBuilder Threshold
= MIRBuilder
.buildFConstant(SrcTy
, TwoPExpFP
);
3834 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
3835 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
3836 MachineInstrBuilder FSub
= MIRBuilder
.buildFSub(SrcTy
, Src
, Threshold
);
3837 MachineInstrBuilder ResLowBits
= MIRBuilder
.buildFPTOSI(DstTy
, FSub
);
3838 MachineInstrBuilder ResHighBit
= MIRBuilder
.buildConstant(DstTy
, TwoPExpInt
);
3839 MachineInstrBuilder Res
= MIRBuilder
.buildXor(DstTy
, ResLowBits
, ResHighBit
);
3841 MachineInstrBuilder FCMP
=
3842 MIRBuilder
.buildFCmp(CmpInst::FCMP_ULT
, DstTy
, Src
, Threshold
);
3843 MIRBuilder
.buildSelect(Dst
, FCMP
, FPTOSI
, Res
);
3845 MI
.eraseFromParent();
3849 static CmpInst::Predicate
minMaxToCompare(unsigned Opc
) {
3851 case TargetOpcode::G_SMIN
:
3852 return CmpInst::ICMP_SLT
;
3853 case TargetOpcode::G_SMAX
:
3854 return CmpInst::ICMP_SGT
;
3855 case TargetOpcode::G_UMIN
:
3856 return CmpInst::ICMP_ULT
;
3857 case TargetOpcode::G_UMAX
:
3858 return CmpInst::ICMP_UGT
;
3860 llvm_unreachable("not in integer min/max");
3864 LegalizerHelper::LegalizeResult
3865 LegalizerHelper::lowerMinMax(MachineInstr
&MI
, unsigned TypeIdx
, LLT Ty
) {
3866 Register Dst
= MI
.getOperand(0).getReg();
3867 Register Src0
= MI
.getOperand(1).getReg();
3868 Register Src1
= MI
.getOperand(2).getReg();
3870 const CmpInst::Predicate Pred
= minMaxToCompare(MI
.getOpcode());
3871 LLT CmpType
= MRI
.getType(Dst
).changeElementSize(1);
3873 auto Cmp
= MIRBuilder
.buildICmp(Pred
, CmpType
, Src0
, Src1
);
3874 MIRBuilder
.buildSelect(Dst
, Cmp
, Src0
, Src1
);
3876 MI
.eraseFromParent();
3880 LegalizerHelper::LegalizeResult
3881 LegalizerHelper::lowerFCopySign(MachineInstr
&MI
, unsigned TypeIdx
, LLT Ty
) {
3882 Register Dst
= MI
.getOperand(0).getReg();
3883 Register Src0
= MI
.getOperand(1).getReg();
3884 Register Src1
= MI
.getOperand(2).getReg();
3886 const LLT Src0Ty
= MRI
.getType(Src0
);
3887 const LLT Src1Ty
= MRI
.getType(Src1
);
3889 const int Src0Size
= Src0Ty
.getScalarSizeInBits();
3890 const int Src1Size
= Src1Ty
.getScalarSizeInBits();
3892 auto SignBitMask
= MIRBuilder
.buildConstant(
3893 Src0Ty
, APInt::getSignMask(Src0Size
));
3895 auto NotSignBitMask
= MIRBuilder
.buildConstant(
3896 Src0Ty
, APInt::getLowBitsSet(Src0Size
, Src0Size
- 1));
3898 auto And0
= MIRBuilder
.buildAnd(Src0Ty
, Src0
, NotSignBitMask
);
3901 if (Src0Ty
== Src1Ty
) {
3902 auto And1
= MIRBuilder
.buildAnd(Src1Ty
, Src0
, SignBitMask
);
3903 Or
= MIRBuilder
.buildOr(Dst
, And0
, And1
);
3904 } else if (Src0Size
> Src1Size
) {
3905 auto ShiftAmt
= MIRBuilder
.buildConstant(Src0Ty
, Src0Size
- Src1Size
);
3906 auto Zext
= MIRBuilder
.buildZExt(Src0Ty
, Src1
);
3907 auto Shift
= MIRBuilder
.buildShl(Src0Ty
, Zext
, ShiftAmt
);
3908 auto And1
= MIRBuilder
.buildAnd(Src0Ty
, Shift
, SignBitMask
);
3909 Or
= MIRBuilder
.buildOr(Dst
, And0
, And1
);
3911 auto ShiftAmt
= MIRBuilder
.buildConstant(Src1Ty
, Src1Size
- Src0Size
);
3912 auto Shift
= MIRBuilder
.buildLShr(Src1Ty
, Src1
, ShiftAmt
);
3913 auto Trunc
= MIRBuilder
.buildTrunc(Src0Ty
, Shift
);
3914 auto And1
= MIRBuilder
.buildAnd(Src0Ty
, Trunc
, SignBitMask
);
3915 Or
= MIRBuilder
.buildOr(Dst
, And0
, And1
);
3918 // Be careful about setting nsz/nnan/ninf on every instruction, since the
3919 // constants are a nan and -0.0, but the final result should preserve
3921 if (unsigned Flags
= MI
.getFlags())
3922 Or
->setFlags(Flags
);
3924 MI
.eraseFromParent();
3928 LegalizerHelper::LegalizeResult
3929 LegalizerHelper::lowerFMinNumMaxNum(MachineInstr
&MI
) {
3930 unsigned NewOp
= MI
.getOpcode() == TargetOpcode::G_FMINNUM
?
3931 TargetOpcode::G_FMINNUM_IEEE
: TargetOpcode::G_FMAXNUM_IEEE
;
3933 Register Dst
= MI
.getOperand(0).getReg();
3934 Register Src0
= MI
.getOperand(1).getReg();
3935 Register Src1
= MI
.getOperand(2).getReg();
3936 LLT Ty
= MRI
.getType(Dst
);
3938 if (!MI
.getFlag(MachineInstr::FmNoNans
)) {
3939 // Insert canonicalizes if it's possible we need to quiet to get correct
3942 // Note this must be done here, and not as an optimization combine in the
3943 // absence of a dedicate quiet-snan instruction as we're using an
3944 // omni-purpose G_FCANONICALIZE.
3945 if (!isKnownNeverSNaN(Src0
, MRI
))
3946 Src0
= MIRBuilder
.buildFCanonicalize(Ty
, Src0
, MI
.getFlags()).getReg(0);
3948 if (!isKnownNeverSNaN(Src1
, MRI
))
3949 Src1
= MIRBuilder
.buildFCanonicalize(Ty
, Src1
, MI
.getFlags()).getReg(0);
3952 // If there are no nans, it's safe to simply replace this with the non-IEEE
3954 MIRBuilder
.buildInstr(NewOp
, {Dst
}, {Src0
, Src1
}, MI
.getFlags());
3955 MI
.eraseFromParent();
3959 LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFMad(MachineInstr
&MI
) {
3960 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
3961 Register DstReg
= MI
.getOperand(0).getReg();
3962 LLT Ty
= MRI
.getType(DstReg
);
3963 unsigned Flags
= MI
.getFlags();
3965 auto Mul
= MIRBuilder
.buildFMul(Ty
, MI
.getOperand(1), MI
.getOperand(2),
3967 MIRBuilder
.buildFAdd(DstReg
, Mul
, MI
.getOperand(3), Flags
);
3968 MI
.eraseFromParent();
3972 LegalizerHelper::LegalizeResult
3973 LegalizerHelper::lowerUnmergeValues(MachineInstr
&MI
) {
3974 const unsigned NumDst
= MI
.getNumOperands() - 1;
3975 const Register SrcReg
= MI
.getOperand(NumDst
).getReg();
3976 LLT SrcTy
= MRI
.getType(SrcReg
);
3978 Register Dst0Reg
= MI
.getOperand(0).getReg();
3979 LLT DstTy
= MRI
.getType(Dst0Reg
);
3982 // Expand scalarizing unmerge as bitcast to integer and shift.
3983 if (!DstTy
.isVector() && SrcTy
.isVector() &&
3984 SrcTy
.getElementType() == DstTy
) {
3985 LLT IntTy
= LLT::scalar(SrcTy
.getSizeInBits());
3986 Register Cast
= MIRBuilder
.buildBitcast(IntTy
, SrcReg
).getReg(0);
3988 MIRBuilder
.buildTrunc(Dst0Reg
, Cast
);
3990 const unsigned DstSize
= DstTy
.getSizeInBits();
3991 unsigned Offset
= DstSize
;
3992 for (unsigned I
= 1; I
!= NumDst
; ++I
, Offset
+= DstSize
) {
3993 auto ShiftAmt
= MIRBuilder
.buildConstant(IntTy
, Offset
);
3994 auto Shift
= MIRBuilder
.buildLShr(IntTy
, Cast
, ShiftAmt
);
3995 MIRBuilder
.buildTrunc(MI
.getOperand(I
), Shift
);
3998 MI
.eraseFromParent();
4002 return UnableToLegalize
;
4005 LegalizerHelper::LegalizeResult
4006 LegalizerHelper::lowerShuffleVector(MachineInstr
&MI
) {
4007 Register DstReg
= MI
.getOperand(0).getReg();
4008 Register Src0Reg
= MI
.getOperand(1).getReg();
4009 Register Src1Reg
= MI
.getOperand(2).getReg();
4010 LLT Src0Ty
= MRI
.getType(Src0Reg
);
4011 LLT DstTy
= MRI
.getType(DstReg
);
4012 LLT IdxTy
= LLT::scalar(32);
4014 const Constant
*ShufMask
= MI
.getOperand(3).getShuffleMask();
4016 SmallVector
<int, 32> Mask
;
4017 ShuffleVectorInst::getShuffleMask(ShufMask
, Mask
);
4019 if (DstTy
.isScalar()) {
4020 if (Src0Ty
.isVector())
4021 return UnableToLegalize
;
4023 // This is just a SELECT.
4024 assert(Mask
.size() == 1 && "Expected a single mask element");
4026 if (Mask
[0] < 0 || Mask
[0] > 1)
4027 Val
= MIRBuilder
.buildUndef(DstTy
).getReg(0);
4029 Val
= Mask
[0] == 0 ? Src0Reg
: Src1Reg
;
4030 MIRBuilder
.buildCopy(DstReg
, Val
);
4031 MI
.eraseFromParent();
4036 SmallVector
<Register
, 32> BuildVec
;
4037 LLT EltTy
= DstTy
.getElementType();
4039 for (int Idx
: Mask
) {
4041 if (!Undef
.isValid())
4042 Undef
= MIRBuilder
.buildUndef(EltTy
).getReg(0);
4043 BuildVec
.push_back(Undef
);
4047 if (Src0Ty
.isScalar()) {
4048 BuildVec
.push_back(Idx
== 0 ? Src0Reg
: Src1Reg
);
4050 int NumElts
= Src0Ty
.getNumElements();
4051 Register SrcVec
= Idx
< NumElts
? Src0Reg
: Src1Reg
;
4052 int ExtractIdx
= Idx
< NumElts
? Idx
: Idx
- NumElts
;
4053 auto IdxK
= MIRBuilder
.buildConstant(IdxTy
, ExtractIdx
);
4054 auto Extract
= MIRBuilder
.buildExtractVectorElement(EltTy
, SrcVec
, IdxK
);
4055 BuildVec
.push_back(Extract
.getReg(0));
4059 MIRBuilder
.buildBuildVector(DstReg
, BuildVec
);
4060 MI
.eraseFromParent();
4064 LegalizerHelper::LegalizeResult
4065 LegalizerHelper::lowerDynStackAlloc(MachineInstr
&MI
) {
4066 Register Dst
= MI
.getOperand(0).getReg();
4067 Register AllocSize
= MI
.getOperand(1).getReg();
4068 unsigned Align
= MI
.getOperand(2).getImm();
4070 const auto &MF
= *MI
.getMF();
4071 const auto &TLI
= *MF
.getSubtarget().getTargetLowering();
4073 LLT PtrTy
= MRI
.getType(Dst
);
4074 LLT IntPtrTy
= LLT::scalar(PtrTy
.getSizeInBits());
4076 Register SPReg
= TLI
.getStackPointerRegisterToSaveRestore();
4077 auto SPTmp
= MIRBuilder
.buildCopy(PtrTy
, SPReg
);
4078 SPTmp
= MIRBuilder
.buildCast(IntPtrTy
, SPTmp
);
4080 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
4081 // have to generate an extra instruction to negate the alloc and then use
4082 // G_GEP to add the negative offset.
4083 auto Alloc
= MIRBuilder
.buildSub(IntPtrTy
, SPTmp
, AllocSize
);
4085 APInt
AlignMask(IntPtrTy
.getSizeInBits(), Align
, true);
4087 auto AlignCst
= MIRBuilder
.buildConstant(IntPtrTy
, AlignMask
);
4088 Alloc
= MIRBuilder
.buildAnd(IntPtrTy
, Alloc
, AlignCst
);
4091 SPTmp
= MIRBuilder
.buildCast(PtrTy
, Alloc
);
4092 MIRBuilder
.buildCopy(SPReg
, SPTmp
);
4093 MIRBuilder
.buildCopy(Dst
, SPTmp
);
4095 MI
.eraseFromParent();