1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// \file This file implements the LegalizerHelper class to legalize
10 /// individual instructions and the LegalizeMachineIR wrapper pass for the
11 /// primary legalization.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
16 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
17 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/TargetFrameLowering.h"
21 #include "llvm/CodeGen/TargetInstrInfo.h"
22 #include "llvm/CodeGen/TargetLowering.h"
23 #include "llvm/CodeGen/TargetSubtargetInfo.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Support/raw_ostream.h"
28 #define DEBUG_TYPE "legalizer"
31 using namespace LegalizeActions
;
33 /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
35 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
36 /// with any leftover piece as type \p LeftoverTy
38 /// Returns -1 in the first element of the pair if the breakdown is not
40 static std::pair
<int, int>
41 getNarrowTypeBreakDown(LLT OrigTy
, LLT NarrowTy
, LLT
&LeftoverTy
) {
42 assert(!LeftoverTy
.isValid() && "this is an out argument");
44 unsigned Size
= OrigTy
.getSizeInBits();
45 unsigned NarrowSize
= NarrowTy
.getSizeInBits();
46 unsigned NumParts
= Size
/ NarrowSize
;
47 unsigned LeftoverSize
= Size
- NumParts
* NarrowSize
;
48 assert(Size
> NarrowSize
);
50 if (LeftoverSize
== 0)
53 if (NarrowTy
.isVector()) {
54 unsigned EltSize
= OrigTy
.getScalarSizeInBits();
55 if (LeftoverSize
% EltSize
!= 0)
57 LeftoverTy
= LLT::scalarOrVector(LeftoverSize
/ EltSize
, EltSize
);
59 LeftoverTy
= LLT::scalar(LeftoverSize
);
62 int NumLeftover
= LeftoverSize
/ LeftoverTy
.getSizeInBits();
63 return std::make_pair(NumParts
, NumLeftover
);
66 LegalizerHelper::LegalizerHelper(MachineFunction
&MF
,
67 GISelChangeObserver
&Observer
,
68 MachineIRBuilder
&Builder
)
69 : MIRBuilder(Builder
), MRI(MF
.getRegInfo()),
70 LI(*MF
.getSubtarget().getLegalizerInfo()), Observer(Observer
) {
72 MIRBuilder
.setChangeObserver(Observer
);
75 LegalizerHelper::LegalizerHelper(MachineFunction
&MF
, const LegalizerInfo
&LI
,
76 GISelChangeObserver
&Observer
,
78 : MIRBuilder(B
), MRI(MF
.getRegInfo()), LI(LI
), Observer(Observer
) {
80 MIRBuilder
.setChangeObserver(Observer
);
82 LegalizerHelper::LegalizeResult
83 LegalizerHelper::legalizeInstrStep(MachineInstr
&MI
) {
84 LLVM_DEBUG(dbgs() << "Legalizing: "; MI
.print(dbgs()));
86 if (MI
.getOpcode() == TargetOpcode::G_INTRINSIC
||
87 MI
.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
)
88 return LI
.legalizeIntrinsic(MI
, MRI
, MIRBuilder
) ? Legalized
90 auto Step
= LI
.getAction(MI
, MRI
);
91 switch (Step
.Action
) {
93 LLVM_DEBUG(dbgs() << ".. Already legal\n");
96 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
99 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
100 return narrowScalar(MI
, Step
.TypeIdx
, Step
.NewType
);
102 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
103 return widenScalar(MI
, Step
.TypeIdx
, Step
.NewType
);
105 LLVM_DEBUG(dbgs() << ".. Lower\n");
106 return lower(MI
, Step
.TypeIdx
, Step
.NewType
);
108 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
109 return fewerElementsVector(MI
, Step
.TypeIdx
, Step
.NewType
);
111 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
112 return moreElementsVector(MI
, Step
.TypeIdx
, Step
.NewType
);
114 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
115 return LI
.legalizeCustom(MI
, MRI
, MIRBuilder
, Observer
) ? Legalized
118 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
119 return UnableToLegalize
;
123 void LegalizerHelper::extractParts(Register Reg
, LLT Ty
, int NumParts
,
124 SmallVectorImpl
<Register
> &VRegs
) {
125 for (int i
= 0; i
< NumParts
; ++i
)
126 VRegs
.push_back(MRI
.createGenericVirtualRegister(Ty
));
127 MIRBuilder
.buildUnmerge(VRegs
, Reg
);
130 bool LegalizerHelper::extractParts(Register Reg
, LLT RegTy
,
131 LLT MainTy
, LLT
&LeftoverTy
,
132 SmallVectorImpl
<Register
> &VRegs
,
133 SmallVectorImpl
<Register
> &LeftoverRegs
) {
134 assert(!LeftoverTy
.isValid() && "this is an out argument");
136 unsigned RegSize
= RegTy
.getSizeInBits();
137 unsigned MainSize
= MainTy
.getSizeInBits();
138 unsigned NumParts
= RegSize
/ MainSize
;
139 unsigned LeftoverSize
= RegSize
- NumParts
* MainSize
;
141 // Use an unmerge when possible.
142 if (LeftoverSize
== 0) {
143 for (unsigned I
= 0; I
< NumParts
; ++I
)
144 VRegs
.push_back(MRI
.createGenericVirtualRegister(MainTy
));
145 MIRBuilder
.buildUnmerge(VRegs
, Reg
);
149 if (MainTy
.isVector()) {
150 unsigned EltSize
= MainTy
.getScalarSizeInBits();
151 if (LeftoverSize
% EltSize
!= 0)
153 LeftoverTy
= LLT::scalarOrVector(LeftoverSize
/ EltSize
, EltSize
);
155 LeftoverTy
= LLT::scalar(LeftoverSize
);
158 // For irregular sizes, extract the individual parts.
159 for (unsigned I
= 0; I
!= NumParts
; ++I
) {
160 Register NewReg
= MRI
.createGenericVirtualRegister(MainTy
);
161 VRegs
.push_back(NewReg
);
162 MIRBuilder
.buildExtract(NewReg
, Reg
, MainSize
* I
);
165 for (unsigned Offset
= MainSize
* NumParts
; Offset
< RegSize
;
166 Offset
+= LeftoverSize
) {
167 Register NewReg
= MRI
.createGenericVirtualRegister(LeftoverTy
);
168 LeftoverRegs
.push_back(NewReg
);
169 MIRBuilder
.buildExtract(NewReg
, Reg
, Offset
);
175 static LLT
getGCDType(LLT OrigTy
, LLT TargetTy
) {
176 if (OrigTy
.isVector() && TargetTy
.isVector()) {
177 assert(OrigTy
.getElementType() == TargetTy
.getElementType());
178 int GCD
= greatestCommonDivisor(OrigTy
.getNumElements(),
179 TargetTy
.getNumElements());
180 return LLT::scalarOrVector(GCD
, OrigTy
.getElementType());
183 if (OrigTy
.isVector() && !TargetTy
.isVector()) {
184 assert(OrigTy
.getElementType() == TargetTy
);
188 assert(!OrigTy
.isVector() && !TargetTy
.isVector());
190 int GCD
= greatestCommonDivisor(OrigTy
.getSizeInBits(),
191 TargetTy
.getSizeInBits());
192 return LLT::scalar(GCD
);
195 void LegalizerHelper::insertParts(Register DstReg
,
196 LLT ResultTy
, LLT PartTy
,
197 ArrayRef
<Register
> PartRegs
,
199 ArrayRef
<Register
> LeftoverRegs
) {
200 if (!LeftoverTy
.isValid()) {
201 assert(LeftoverRegs
.empty());
203 if (!ResultTy
.isVector()) {
204 MIRBuilder
.buildMerge(DstReg
, PartRegs
);
208 if (PartTy
.isVector())
209 MIRBuilder
.buildConcatVectors(DstReg
, PartRegs
);
211 MIRBuilder
.buildBuildVector(DstReg
, PartRegs
);
215 unsigned PartSize
= PartTy
.getSizeInBits();
216 unsigned LeftoverPartSize
= LeftoverTy
.getSizeInBits();
218 Register CurResultReg
= MRI
.createGenericVirtualRegister(ResultTy
);
219 MIRBuilder
.buildUndef(CurResultReg
);
222 for (Register PartReg
: PartRegs
) {
223 Register NewResultReg
= MRI
.createGenericVirtualRegister(ResultTy
);
224 MIRBuilder
.buildInsert(NewResultReg
, CurResultReg
, PartReg
, Offset
);
225 CurResultReg
= NewResultReg
;
229 for (unsigned I
= 0, E
= LeftoverRegs
.size(); I
!= E
; ++I
) {
230 // Use the original output register for the final insert to avoid a copy.
231 Register NewResultReg
= (I
+ 1 == E
) ?
232 DstReg
: MRI
.createGenericVirtualRegister(ResultTy
);
234 MIRBuilder
.buildInsert(NewResultReg
, CurResultReg
, LeftoverRegs
[I
], Offset
);
235 CurResultReg
= NewResultReg
;
236 Offset
+= LeftoverPartSize
;
240 static RTLIB::Libcall
getRTLibDesc(unsigned Opcode
, unsigned Size
) {
242 case TargetOpcode::G_SDIV
:
243 assert((Size
== 32 || Size
== 64 || Size
== 128) && "Unsupported size");
246 return RTLIB::SDIV_I32
;
248 return RTLIB::SDIV_I64
;
250 return RTLIB::SDIV_I128
;
252 llvm_unreachable("unexpected size");
254 case TargetOpcode::G_UDIV
:
255 assert((Size
== 32 || Size
== 64 || Size
== 128) && "Unsupported size");
258 return RTLIB::UDIV_I32
;
260 return RTLIB::UDIV_I64
;
262 return RTLIB::UDIV_I128
;
264 llvm_unreachable("unexpected size");
266 case TargetOpcode::G_SREM
:
267 assert((Size
== 32 || Size
== 64) && "Unsupported size");
268 return Size
== 64 ? RTLIB::SREM_I64
: RTLIB::SREM_I32
;
269 case TargetOpcode::G_UREM
:
270 assert((Size
== 32 || Size
== 64) && "Unsupported size");
271 return Size
== 64 ? RTLIB::UREM_I64
: RTLIB::UREM_I32
;
272 case TargetOpcode::G_CTLZ_ZERO_UNDEF
:
273 assert(Size
== 32 && "Unsupported size");
274 return RTLIB::CTLZ_I32
;
275 case TargetOpcode::G_FADD
:
276 assert((Size
== 32 || Size
== 64) && "Unsupported size");
277 return Size
== 64 ? RTLIB::ADD_F64
: RTLIB::ADD_F32
;
278 case TargetOpcode::G_FSUB
:
279 assert((Size
== 32 || Size
== 64) && "Unsupported size");
280 return Size
== 64 ? RTLIB::SUB_F64
: RTLIB::SUB_F32
;
281 case TargetOpcode::G_FMUL
:
282 assert((Size
== 32 || Size
== 64) && "Unsupported size");
283 return Size
== 64 ? RTLIB::MUL_F64
: RTLIB::MUL_F32
;
284 case TargetOpcode::G_FDIV
:
285 assert((Size
== 32 || Size
== 64) && "Unsupported size");
286 return Size
== 64 ? RTLIB::DIV_F64
: RTLIB::DIV_F32
;
287 case TargetOpcode::G_FEXP
:
288 assert((Size
== 32 || Size
== 64) && "Unsupported size");
289 return Size
== 64 ? RTLIB::EXP_F64
: RTLIB::EXP_F32
;
290 case TargetOpcode::G_FEXP2
:
291 assert((Size
== 32 || Size
== 64) && "Unsupported size");
292 return Size
== 64 ? RTLIB::EXP2_F64
: RTLIB::EXP2_F32
;
293 case TargetOpcode::G_FREM
:
294 return Size
== 64 ? RTLIB::REM_F64
: RTLIB::REM_F32
;
295 case TargetOpcode::G_FPOW
:
296 return Size
== 64 ? RTLIB::POW_F64
: RTLIB::POW_F32
;
297 case TargetOpcode::G_FMA
:
298 assert((Size
== 32 || Size
== 64) && "Unsupported size");
299 return Size
== 64 ? RTLIB::FMA_F64
: RTLIB::FMA_F32
;
300 case TargetOpcode::G_FSIN
:
301 assert((Size
== 32 || Size
== 64 || Size
== 128) && "Unsupported size");
302 return Size
== 128 ? RTLIB::SIN_F128
303 : Size
== 64 ? RTLIB::SIN_F64
: RTLIB::SIN_F32
;
304 case TargetOpcode::G_FCOS
:
305 assert((Size
== 32 || Size
== 64 || Size
== 128) && "Unsupported size");
306 return Size
== 128 ? RTLIB::COS_F128
307 : Size
== 64 ? RTLIB::COS_F64
: RTLIB::COS_F32
;
308 case TargetOpcode::G_FLOG10
:
309 assert((Size
== 32 || Size
== 64 || Size
== 128) && "Unsupported size");
310 return Size
== 128 ? RTLIB::LOG10_F128
311 : Size
== 64 ? RTLIB::LOG10_F64
: RTLIB::LOG10_F32
;
312 case TargetOpcode::G_FLOG
:
313 assert((Size
== 32 || Size
== 64 || Size
== 128) && "Unsupported size");
314 return Size
== 128 ? RTLIB::LOG_F128
315 : Size
== 64 ? RTLIB::LOG_F64
: RTLIB::LOG_F32
;
316 case TargetOpcode::G_FLOG2
:
317 assert((Size
== 32 || Size
== 64 || Size
== 128) && "Unsupported size");
318 return Size
== 128 ? RTLIB::LOG2_F128
319 : Size
== 64 ? RTLIB::LOG2_F64
: RTLIB::LOG2_F32
;
320 case TargetOpcode::G_FCEIL
:
321 assert((Size
== 32 || Size
== 64) && "Unsupported size");
322 return Size
== 64 ? RTLIB::CEIL_F64
: RTLIB::CEIL_F32
;
323 case TargetOpcode::G_FFLOOR
:
324 assert((Size
== 32 || Size
== 64) && "Unsupported size");
325 return Size
== 64 ? RTLIB::FLOOR_F64
: RTLIB::FLOOR_F32
;
327 llvm_unreachable("Unknown libcall function");
330 /// True if an instruction is in tail position in its caller. Intended for
331 /// legalizing libcalls as tail calls when possible.
332 static bool isLibCallInTailPosition(MachineInstr
&MI
) {
333 const Function
&F
= MI
.getParent()->getParent()->getFunction();
335 // Conservatively require the attributes of the call to match those of
336 // the return. Ignore NoAlias and NonNull because they don't affect the
338 AttributeList CallerAttrs
= F
.getAttributes();
339 if (AttrBuilder(CallerAttrs
, AttributeList::ReturnIndex
)
340 .removeAttribute(Attribute::NoAlias
)
341 .removeAttribute(Attribute::NonNull
)
345 // It's not safe to eliminate the sign / zero extension of the return value.
346 if (CallerAttrs
.hasAttribute(AttributeList::ReturnIndex
, Attribute::ZExt
) ||
347 CallerAttrs
.hasAttribute(AttributeList::ReturnIndex
, Attribute::SExt
))
350 // Only tail call if the following instruction is a standard return.
351 auto &TII
= *MI
.getMF()->getSubtarget().getInstrInfo();
352 MachineInstr
*Next
= MI
.getNextNode();
353 if (!Next
|| TII
.isTailCall(*Next
) || !Next
->isReturn())
359 LegalizerHelper::LegalizeResult
360 llvm::createLibcall(MachineIRBuilder
&MIRBuilder
, RTLIB::Libcall Libcall
,
361 const CallLowering::ArgInfo
&Result
,
362 ArrayRef
<CallLowering::ArgInfo
> Args
) {
363 auto &CLI
= *MIRBuilder
.getMF().getSubtarget().getCallLowering();
364 auto &TLI
= *MIRBuilder
.getMF().getSubtarget().getTargetLowering();
365 const char *Name
= TLI
.getLibcallName(Libcall
);
367 CallLowering::CallLoweringInfo Info
;
368 Info
.CallConv
= TLI
.getLibcallCallingConv(Libcall
);
369 Info
.Callee
= MachineOperand::CreateES(Name
);
370 Info
.OrigRet
= Result
;
371 std::copy(Args
.begin(), Args
.end(), std::back_inserter(Info
.OrigArgs
));
372 if (!CLI
.lowerCall(MIRBuilder
, Info
))
373 return LegalizerHelper::UnableToLegalize
;
375 return LegalizerHelper::Legalized
;
378 // Useful for libcalls where all operands have the same type.
379 static LegalizerHelper::LegalizeResult
380 simpleLibcall(MachineInstr
&MI
, MachineIRBuilder
&MIRBuilder
, unsigned Size
,
382 auto Libcall
= getRTLibDesc(MI
.getOpcode(), Size
);
384 SmallVector
<CallLowering::ArgInfo
, 3> Args
;
385 for (unsigned i
= 1; i
< MI
.getNumOperands(); i
++)
386 Args
.push_back({MI
.getOperand(i
).getReg(), OpType
});
387 return createLibcall(MIRBuilder
, Libcall
, {MI
.getOperand(0).getReg(), OpType
},
391 LegalizerHelper::LegalizeResult
392 llvm::createMemLibcall(MachineIRBuilder
&MIRBuilder
, MachineRegisterInfo
&MRI
,
394 assert(MI
.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
);
395 auto &Ctx
= MIRBuilder
.getMF().getFunction().getContext();
397 SmallVector
<CallLowering::ArgInfo
, 3> Args
;
398 // Add all the args, except for the last which is an imm denoting 'tail'.
399 for (unsigned i
= 1; i
< MI
.getNumOperands() - 1; i
++) {
400 Register Reg
= MI
.getOperand(i
).getReg();
402 // Need derive an IR type for call lowering.
403 LLT OpLLT
= MRI
.getType(Reg
);
404 Type
*OpTy
= nullptr;
405 if (OpLLT
.isPointer())
406 OpTy
= Type::getInt8PtrTy(Ctx
, OpLLT
.getAddressSpace());
408 OpTy
= IntegerType::get(Ctx
, OpLLT
.getSizeInBits());
409 Args
.push_back({Reg
, OpTy
});
412 auto &CLI
= *MIRBuilder
.getMF().getSubtarget().getCallLowering();
413 auto &TLI
= *MIRBuilder
.getMF().getSubtarget().getTargetLowering();
414 Intrinsic::ID ID
= MI
.getOperand(0).getIntrinsicID();
415 RTLIB::Libcall RTLibcall
;
417 case Intrinsic::memcpy
:
418 RTLibcall
= RTLIB::MEMCPY
;
420 case Intrinsic::memset
:
421 RTLibcall
= RTLIB::MEMSET
;
423 case Intrinsic::memmove
:
424 RTLibcall
= RTLIB::MEMMOVE
;
427 return LegalizerHelper::UnableToLegalize
;
429 const char *Name
= TLI
.getLibcallName(RTLibcall
);
431 MIRBuilder
.setInstr(MI
);
433 CallLowering::CallLoweringInfo Info
;
434 Info
.CallConv
= TLI
.getLibcallCallingConv(RTLibcall
);
435 Info
.Callee
= MachineOperand::CreateES(Name
);
436 Info
.OrigRet
= CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx
));
437 Info
.IsTailCall
= MI
.getOperand(MI
.getNumOperands() - 1).getImm() == 1 &&
438 isLibCallInTailPosition(MI
);
440 std::copy(Args
.begin(), Args
.end(), std::back_inserter(Info
.OrigArgs
));
441 if (!CLI
.lowerCall(MIRBuilder
, Info
))
442 return LegalizerHelper::UnableToLegalize
;
444 if (Info
.LoweredTailCall
) {
445 assert(Info
.IsTailCall
&& "Lowered tail call when it wasn't a tail call?");
446 // We must have a return following the call to get past
447 // isLibCallInTailPosition.
448 assert(MI
.getNextNode() && MI
.getNextNode()->isReturn() &&
449 "Expected instr following MI to be a return?");
451 // We lowered a tail call, so the call is now the return from the block.
452 // Delete the old return.
453 MI
.getNextNode()->eraseFromParent();
456 return LegalizerHelper::Legalized
;
459 static RTLIB::Libcall
getConvRTLibDesc(unsigned Opcode
, Type
*ToType
,
461 auto ToMVT
= MVT::getVT(ToType
);
462 auto FromMVT
= MVT::getVT(FromType
);
465 case TargetOpcode::G_FPEXT
:
466 return RTLIB::getFPEXT(FromMVT
, ToMVT
);
467 case TargetOpcode::G_FPTRUNC
:
468 return RTLIB::getFPROUND(FromMVT
, ToMVT
);
469 case TargetOpcode::G_FPTOSI
:
470 return RTLIB::getFPTOSINT(FromMVT
, ToMVT
);
471 case TargetOpcode::G_FPTOUI
:
472 return RTLIB::getFPTOUINT(FromMVT
, ToMVT
);
473 case TargetOpcode::G_SITOFP
:
474 return RTLIB::getSINTTOFP(FromMVT
, ToMVT
);
475 case TargetOpcode::G_UITOFP
:
476 return RTLIB::getUINTTOFP(FromMVT
, ToMVT
);
478 llvm_unreachable("Unsupported libcall function");
481 static LegalizerHelper::LegalizeResult
482 conversionLibcall(MachineInstr
&MI
, MachineIRBuilder
&MIRBuilder
, Type
*ToType
,
484 RTLIB::Libcall Libcall
= getConvRTLibDesc(MI
.getOpcode(), ToType
, FromType
);
485 return createLibcall(MIRBuilder
, Libcall
, {MI
.getOperand(0).getReg(), ToType
},
486 {{MI
.getOperand(1).getReg(), FromType
}});
489 LegalizerHelper::LegalizeResult
490 LegalizerHelper::libcall(MachineInstr
&MI
) {
491 LLT LLTy
= MRI
.getType(MI
.getOperand(0).getReg());
492 unsigned Size
= LLTy
.getSizeInBits();
493 auto &Ctx
= MIRBuilder
.getMF().getFunction().getContext();
495 MIRBuilder
.setInstr(MI
);
497 switch (MI
.getOpcode()) {
499 return UnableToLegalize
;
500 case TargetOpcode::G_SDIV
:
501 case TargetOpcode::G_UDIV
:
502 case TargetOpcode::G_SREM
:
503 case TargetOpcode::G_UREM
:
504 case TargetOpcode::G_CTLZ_ZERO_UNDEF
: {
505 Type
*HLTy
= IntegerType::get(Ctx
, Size
);
506 auto Status
= simpleLibcall(MI
, MIRBuilder
, Size
, HLTy
);
507 if (Status
!= Legalized
)
511 case TargetOpcode::G_FADD
:
512 case TargetOpcode::G_FSUB
:
513 case TargetOpcode::G_FMUL
:
514 case TargetOpcode::G_FDIV
:
515 case TargetOpcode::G_FMA
:
516 case TargetOpcode::G_FPOW
:
517 case TargetOpcode::G_FREM
:
518 case TargetOpcode::G_FCOS
:
519 case TargetOpcode::G_FSIN
:
520 case TargetOpcode::G_FLOG10
:
521 case TargetOpcode::G_FLOG
:
522 case TargetOpcode::G_FLOG2
:
523 case TargetOpcode::G_FEXP
:
524 case TargetOpcode::G_FEXP2
:
525 case TargetOpcode::G_FCEIL
:
526 case TargetOpcode::G_FFLOOR
: {
528 LLVM_DEBUG(dbgs() << "Size " << Size
<< " too large to legalize.\n");
529 return UnableToLegalize
;
531 Type
*HLTy
= Size
== 64 ? Type::getDoubleTy(Ctx
) : Type::getFloatTy(Ctx
);
532 auto Status
= simpleLibcall(MI
, MIRBuilder
, Size
, HLTy
);
533 if (Status
!= Legalized
)
537 case TargetOpcode::G_FPEXT
: {
538 // FIXME: Support other floating point types (half, fp128 etc)
539 unsigned FromSize
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
540 unsigned ToSize
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
541 if (ToSize
!= 64 || FromSize
!= 32)
542 return UnableToLegalize
;
543 LegalizeResult Status
= conversionLibcall(
544 MI
, MIRBuilder
, Type::getDoubleTy(Ctx
), Type::getFloatTy(Ctx
));
545 if (Status
!= Legalized
)
549 case TargetOpcode::G_FPTRUNC
: {
550 // FIXME: Support other floating point types (half, fp128 etc)
551 unsigned FromSize
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
552 unsigned ToSize
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
553 if (ToSize
!= 32 || FromSize
!= 64)
554 return UnableToLegalize
;
555 LegalizeResult Status
= conversionLibcall(
556 MI
, MIRBuilder
, Type::getFloatTy(Ctx
), Type::getDoubleTy(Ctx
));
557 if (Status
!= Legalized
)
561 case TargetOpcode::G_FPTOSI
:
562 case TargetOpcode::G_FPTOUI
: {
563 // FIXME: Support other types
564 unsigned FromSize
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
565 unsigned ToSize
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
566 if ((ToSize
!= 32 && ToSize
!= 64) || (FromSize
!= 32 && FromSize
!= 64))
567 return UnableToLegalize
;
568 LegalizeResult Status
= conversionLibcall(
570 ToSize
== 32 ? Type::getInt32Ty(Ctx
) : Type::getInt64Ty(Ctx
),
571 FromSize
== 64 ? Type::getDoubleTy(Ctx
) : Type::getFloatTy(Ctx
));
572 if (Status
!= Legalized
)
576 case TargetOpcode::G_SITOFP
:
577 case TargetOpcode::G_UITOFP
: {
578 // FIXME: Support other types
579 unsigned FromSize
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
580 unsigned ToSize
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
581 if ((FromSize
!= 32 && FromSize
!= 64) || (ToSize
!= 32 && ToSize
!= 64))
582 return UnableToLegalize
;
583 LegalizeResult Status
= conversionLibcall(
585 ToSize
== 64 ? Type::getDoubleTy(Ctx
) : Type::getFloatTy(Ctx
),
586 FromSize
== 32 ? Type::getInt32Ty(Ctx
) : Type::getInt64Ty(Ctx
));
587 if (Status
!= Legalized
)
593 MI
.eraseFromParent();
597 LegalizerHelper::LegalizeResult
LegalizerHelper::narrowScalar(MachineInstr
&MI
,
600 MIRBuilder
.setInstr(MI
);
602 uint64_t SizeOp0
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
603 uint64_t NarrowSize
= NarrowTy
.getSizeInBits();
605 switch (MI
.getOpcode()) {
607 return UnableToLegalize
;
608 case TargetOpcode::G_IMPLICIT_DEF
: {
609 // FIXME: add support for when SizeOp0 isn't an exact multiple of
611 if (SizeOp0
% NarrowSize
!= 0)
612 return UnableToLegalize
;
613 int NumParts
= SizeOp0
/ NarrowSize
;
615 SmallVector
<Register
, 2> DstRegs
;
616 for (int i
= 0; i
< NumParts
; ++i
)
618 MIRBuilder
.buildUndef(NarrowTy
)->getOperand(0).getReg());
620 Register DstReg
= MI
.getOperand(0).getReg();
621 if(MRI
.getType(DstReg
).isVector())
622 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
624 MIRBuilder
.buildMerge(DstReg
, DstRegs
);
625 MI
.eraseFromParent();
628 case TargetOpcode::G_CONSTANT
: {
629 LLT Ty
= MRI
.getType(MI
.getOperand(0).getReg());
630 const APInt
&Val
= MI
.getOperand(1).getCImm()->getValue();
631 unsigned TotalSize
= Ty
.getSizeInBits();
632 unsigned NarrowSize
= NarrowTy
.getSizeInBits();
633 int NumParts
= TotalSize
/ NarrowSize
;
635 SmallVector
<Register
, 4> PartRegs
;
636 for (int I
= 0; I
!= NumParts
; ++I
) {
637 unsigned Offset
= I
* NarrowSize
;
638 auto K
= MIRBuilder
.buildConstant(NarrowTy
,
639 Val
.lshr(Offset
).trunc(NarrowSize
));
640 PartRegs
.push_back(K
.getReg(0));
644 unsigned LeftoverBits
= TotalSize
- NumParts
* NarrowSize
;
645 SmallVector
<Register
, 1> LeftoverRegs
;
646 if (LeftoverBits
!= 0) {
647 LeftoverTy
= LLT::scalar(LeftoverBits
);
648 auto K
= MIRBuilder
.buildConstant(
650 Val
.lshr(NumParts
* NarrowSize
).trunc(LeftoverBits
));
651 LeftoverRegs
.push_back(K
.getReg(0));
654 insertParts(MI
.getOperand(0).getReg(),
655 Ty
, NarrowTy
, PartRegs
, LeftoverTy
, LeftoverRegs
);
657 MI
.eraseFromParent();
660 case TargetOpcode::G_SEXT
: {
662 return UnableToLegalize
;
664 Register SrcReg
= MI
.getOperand(1).getReg();
665 LLT SrcTy
= MRI
.getType(SrcReg
);
667 // FIXME: support the general case where the requested NarrowTy may not be
668 // the same as the source type. E.g. s128 = sext(s32)
669 if ((SrcTy
.getSizeInBits() != SizeOp0
/ 2) ||
670 SrcTy
.getSizeInBits() != NarrowTy
.getSizeInBits()) {
671 LLVM_DEBUG(dbgs() << "Can't narrow sext to type " << NarrowTy
<< "\n");
672 return UnableToLegalize
;
675 // Shift the sign bit of the low register through the high register.
677 MIRBuilder
.buildConstant(LLT::scalar(64), NarrowTy
.getSizeInBits() - 1);
678 auto Shift
= MIRBuilder
.buildAShr(NarrowTy
, SrcReg
, ShiftAmt
);
679 MIRBuilder
.buildMerge(MI
.getOperand(0).getReg(), {SrcReg
, Shift
.getReg(0)});
680 MI
.eraseFromParent();
683 case TargetOpcode::G_ZEXT
: {
685 return UnableToLegalize
;
687 LLT SrcTy
= MRI
.getType(MI
.getOperand(1).getReg());
688 uint64_t SizeOp1
= SrcTy
.getSizeInBits();
689 if (SizeOp0
% SizeOp1
!= 0)
690 return UnableToLegalize
;
692 // Generate a merge where the bottom bits are taken from the source, and
693 // zero everything else.
694 Register ZeroReg
= MIRBuilder
.buildConstant(SrcTy
, 0).getReg(0);
695 unsigned NumParts
= SizeOp0
/ SizeOp1
;
696 SmallVector
<Register
, 4> Srcs
= {MI
.getOperand(1).getReg()};
697 for (unsigned Part
= 1; Part
< NumParts
; ++Part
)
698 Srcs
.push_back(ZeroReg
);
699 MIRBuilder
.buildMerge(MI
.getOperand(0).getReg(), Srcs
);
700 MI
.eraseFromParent();
703 case TargetOpcode::G_TRUNC
: {
705 return UnableToLegalize
;
707 uint64_t SizeOp1
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
708 if (NarrowTy
.getSizeInBits() * 2 != SizeOp1
) {
709 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy
<< "\n");
710 return UnableToLegalize
;
713 auto Unmerge
= MIRBuilder
.buildUnmerge(NarrowTy
, MI
.getOperand(1).getReg());
714 MIRBuilder
.buildCopy(MI
.getOperand(0).getReg(), Unmerge
.getReg(0));
715 MI
.eraseFromParent();
719 case TargetOpcode::G_ADD
: {
720 // FIXME: add support for when SizeOp0 isn't an exact multiple of
722 if (SizeOp0
% NarrowSize
!= 0)
723 return UnableToLegalize
;
724 // Expand in terms of carry-setting/consuming G_ADDE instructions.
725 int NumParts
= SizeOp0
/ NarrowTy
.getSizeInBits();
727 SmallVector
<Register
, 2> Src1Regs
, Src2Regs
, DstRegs
;
728 extractParts(MI
.getOperand(1).getReg(), NarrowTy
, NumParts
, Src1Regs
);
729 extractParts(MI
.getOperand(2).getReg(), NarrowTy
, NumParts
, Src2Regs
);
732 for (int i
= 0; i
< NumParts
; ++i
) {
733 Register DstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
734 Register CarryOut
= MRI
.createGenericVirtualRegister(LLT::scalar(1));
737 MIRBuilder
.buildUAddo(DstReg
, CarryOut
, Src1Regs
[i
], Src2Regs
[i
]);
739 MIRBuilder
.buildUAdde(DstReg
, CarryOut
, Src1Regs
[i
],
740 Src2Regs
[i
], CarryIn
);
743 DstRegs
.push_back(DstReg
);
746 Register DstReg
= MI
.getOperand(0).getReg();
747 if(MRI
.getType(DstReg
).isVector())
748 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
750 MIRBuilder
.buildMerge(DstReg
, DstRegs
);
751 MI
.eraseFromParent();
754 case TargetOpcode::G_SUB
: {
755 // FIXME: add support for when SizeOp0 isn't an exact multiple of
757 if (SizeOp0
% NarrowSize
!= 0)
758 return UnableToLegalize
;
760 int NumParts
= SizeOp0
/ NarrowTy
.getSizeInBits();
762 SmallVector
<Register
, 2> Src1Regs
, Src2Regs
, DstRegs
;
763 extractParts(MI
.getOperand(1).getReg(), NarrowTy
, NumParts
, Src1Regs
);
764 extractParts(MI
.getOperand(2).getReg(), NarrowTy
, NumParts
, Src2Regs
);
766 Register DstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
767 Register BorrowOut
= MRI
.createGenericVirtualRegister(LLT::scalar(1));
768 MIRBuilder
.buildInstr(TargetOpcode::G_USUBO
, {DstReg
, BorrowOut
},
769 {Src1Regs
[0], Src2Regs
[0]});
770 DstRegs
.push_back(DstReg
);
771 Register BorrowIn
= BorrowOut
;
772 for (int i
= 1; i
< NumParts
; ++i
) {
773 DstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
774 BorrowOut
= MRI
.createGenericVirtualRegister(LLT::scalar(1));
776 MIRBuilder
.buildInstr(TargetOpcode::G_USUBE
, {DstReg
, BorrowOut
},
777 {Src1Regs
[i
], Src2Regs
[i
], BorrowIn
});
779 DstRegs
.push_back(DstReg
);
780 BorrowIn
= BorrowOut
;
782 MIRBuilder
.buildMerge(MI
.getOperand(0).getReg(), DstRegs
);
783 MI
.eraseFromParent();
786 case TargetOpcode::G_MUL
:
787 case TargetOpcode::G_UMULH
:
788 return narrowScalarMul(MI
, NarrowTy
);
789 case TargetOpcode::G_EXTRACT
:
790 return narrowScalarExtract(MI
, TypeIdx
, NarrowTy
);
791 case TargetOpcode::G_INSERT
:
792 return narrowScalarInsert(MI
, TypeIdx
, NarrowTy
);
793 case TargetOpcode::G_LOAD
: {
794 const auto &MMO
= **MI
.memoperands_begin();
795 Register DstReg
= MI
.getOperand(0).getReg();
796 LLT DstTy
= MRI
.getType(DstReg
);
797 if (DstTy
.isVector())
798 return UnableToLegalize
;
800 if (8 * MMO
.getSize() != DstTy
.getSizeInBits()) {
801 Register TmpReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
802 auto &MMO
= **MI
.memoperands_begin();
803 MIRBuilder
.buildLoad(TmpReg
, MI
.getOperand(1).getReg(), MMO
);
804 MIRBuilder
.buildAnyExt(DstReg
, TmpReg
);
805 MI
.eraseFromParent();
809 return reduceLoadStoreWidth(MI
, TypeIdx
, NarrowTy
);
811 case TargetOpcode::G_ZEXTLOAD
:
812 case TargetOpcode::G_SEXTLOAD
: {
813 bool ZExt
= MI
.getOpcode() == TargetOpcode::G_ZEXTLOAD
;
814 Register DstReg
= MI
.getOperand(0).getReg();
815 Register PtrReg
= MI
.getOperand(1).getReg();
817 Register TmpReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
818 auto &MMO
= **MI
.memoperands_begin();
819 if (MMO
.getSizeInBits() == NarrowSize
) {
820 MIRBuilder
.buildLoad(TmpReg
, PtrReg
, MMO
);
822 unsigned ExtLoad
= ZExt
? TargetOpcode::G_ZEXTLOAD
823 : TargetOpcode::G_SEXTLOAD
;
824 MIRBuilder
.buildInstr(ExtLoad
)
827 .addMemOperand(&MMO
);
831 MIRBuilder
.buildZExt(DstReg
, TmpReg
);
833 MIRBuilder
.buildSExt(DstReg
, TmpReg
);
835 MI
.eraseFromParent();
838 case TargetOpcode::G_STORE
: {
839 const auto &MMO
= **MI
.memoperands_begin();
841 Register SrcReg
= MI
.getOperand(0).getReg();
842 LLT SrcTy
= MRI
.getType(SrcReg
);
843 if (SrcTy
.isVector())
844 return UnableToLegalize
;
846 int NumParts
= SizeOp0
/ NarrowSize
;
847 unsigned HandledSize
= NumParts
* NarrowTy
.getSizeInBits();
848 unsigned LeftoverBits
= SrcTy
.getSizeInBits() - HandledSize
;
849 if (SrcTy
.isVector() && LeftoverBits
!= 0)
850 return UnableToLegalize
;
852 if (8 * MMO
.getSize() != SrcTy
.getSizeInBits()) {
853 Register TmpReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
854 auto &MMO
= **MI
.memoperands_begin();
855 MIRBuilder
.buildTrunc(TmpReg
, SrcReg
);
856 MIRBuilder
.buildStore(TmpReg
, MI
.getOperand(1).getReg(), MMO
);
857 MI
.eraseFromParent();
861 return reduceLoadStoreWidth(MI
, 0, NarrowTy
);
863 case TargetOpcode::G_SELECT
:
864 return narrowScalarSelect(MI
, TypeIdx
, NarrowTy
);
865 case TargetOpcode::G_AND
:
866 case TargetOpcode::G_OR
:
867 case TargetOpcode::G_XOR
: {
868 // Legalize bitwise operation:
869 // A = BinOp<Ty> B, C
871 // B1, ..., BN = G_UNMERGE_VALUES B
872 // C1, ..., CN = G_UNMERGE_VALUES C
873 // A1 = BinOp<Ty/N> B1, C2
875 // AN = BinOp<Ty/N> BN, CN
876 // A = G_MERGE_VALUES A1, ..., AN
877 return narrowScalarBasic(MI
, TypeIdx
, NarrowTy
);
879 case TargetOpcode::G_SHL
:
880 case TargetOpcode::G_LSHR
:
881 case TargetOpcode::G_ASHR
:
882 return narrowScalarShift(MI
, TypeIdx
, NarrowTy
);
883 case TargetOpcode::G_CTLZ
:
884 case TargetOpcode::G_CTLZ_ZERO_UNDEF
:
885 case TargetOpcode::G_CTTZ
:
886 case TargetOpcode::G_CTTZ_ZERO_UNDEF
:
887 case TargetOpcode::G_CTPOP
:
889 return UnableToLegalize
; // TODO
891 Observer
.changingInstr(MI
);
892 narrowScalarDst(MI
, NarrowTy
, 0, TargetOpcode::G_ZEXT
);
893 Observer
.changedInstr(MI
);
895 case TargetOpcode::G_INTTOPTR
:
897 return UnableToLegalize
;
899 Observer
.changingInstr(MI
);
900 narrowScalarSrc(MI
, NarrowTy
, 1);
901 Observer
.changedInstr(MI
);
903 case TargetOpcode::G_PTRTOINT
:
905 return UnableToLegalize
;
907 Observer
.changingInstr(MI
);
908 narrowScalarDst(MI
, NarrowTy
, 0, TargetOpcode::G_ZEXT
);
909 Observer
.changedInstr(MI
);
911 case TargetOpcode::G_PHI
: {
912 unsigned NumParts
= SizeOp0
/ NarrowSize
;
913 SmallVector
<Register
, 2> DstRegs
;
914 SmallVector
<SmallVector
<Register
, 2>, 2> SrcRegs
;
915 DstRegs
.resize(NumParts
);
916 SrcRegs
.resize(MI
.getNumOperands() / 2);
917 Observer
.changingInstr(MI
);
918 for (unsigned i
= 1; i
< MI
.getNumOperands(); i
+= 2) {
919 MachineBasicBlock
&OpMBB
= *MI
.getOperand(i
+ 1).getMBB();
920 MIRBuilder
.setInsertPt(OpMBB
, OpMBB
.getFirstTerminator());
921 extractParts(MI
.getOperand(i
).getReg(), NarrowTy
, NumParts
,
924 MachineBasicBlock
&MBB
= *MI
.getParent();
925 MIRBuilder
.setInsertPt(MBB
, MI
);
926 for (unsigned i
= 0; i
< NumParts
; ++i
) {
927 DstRegs
[i
] = MRI
.createGenericVirtualRegister(NarrowTy
);
928 MachineInstrBuilder MIB
=
929 MIRBuilder
.buildInstr(TargetOpcode::G_PHI
).addDef(DstRegs
[i
]);
930 for (unsigned j
= 1; j
< MI
.getNumOperands(); j
+= 2)
931 MIB
.addUse(SrcRegs
[j
/ 2][i
]).add(MI
.getOperand(j
+ 1));
933 MIRBuilder
.setInsertPt(MBB
, MBB
.getFirstNonPHI());
934 MIRBuilder
.buildMerge(MI
.getOperand(0).getReg(), DstRegs
);
935 Observer
.changedInstr(MI
);
936 MI
.eraseFromParent();
939 case TargetOpcode::G_EXTRACT_VECTOR_ELT
:
940 case TargetOpcode::G_INSERT_VECTOR_ELT
: {
942 return UnableToLegalize
;
944 int OpIdx
= MI
.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT
? 2 : 3;
945 Observer
.changingInstr(MI
);
946 narrowScalarSrc(MI
, NarrowTy
, OpIdx
);
947 Observer
.changedInstr(MI
);
950 case TargetOpcode::G_ICMP
: {
951 uint64_t SrcSize
= MRI
.getType(MI
.getOperand(2).getReg()).getSizeInBits();
952 if (NarrowSize
* 2 != SrcSize
)
953 return UnableToLegalize
;
955 Observer
.changingInstr(MI
);
956 Register LHSL
= MRI
.createGenericVirtualRegister(NarrowTy
);
957 Register LHSH
= MRI
.createGenericVirtualRegister(NarrowTy
);
958 MIRBuilder
.buildUnmerge({LHSL
, LHSH
}, MI
.getOperand(2).getReg());
960 Register RHSL
= MRI
.createGenericVirtualRegister(NarrowTy
);
961 Register RHSH
= MRI
.createGenericVirtualRegister(NarrowTy
);
962 MIRBuilder
.buildUnmerge({RHSL
, RHSH
}, MI
.getOperand(3).getReg());
964 CmpInst::Predicate Pred
=
965 static_cast<CmpInst::Predicate
>(MI
.getOperand(1).getPredicate());
966 LLT ResTy
= MRI
.getType(MI
.getOperand(0).getReg());
968 if (Pred
== CmpInst::ICMP_EQ
|| Pred
== CmpInst::ICMP_NE
) {
969 MachineInstrBuilder XorL
= MIRBuilder
.buildXor(NarrowTy
, LHSL
, RHSL
);
970 MachineInstrBuilder XorH
= MIRBuilder
.buildXor(NarrowTy
, LHSH
, RHSH
);
971 MachineInstrBuilder Or
= MIRBuilder
.buildOr(NarrowTy
, XorL
, XorH
);
972 MachineInstrBuilder Zero
= MIRBuilder
.buildConstant(NarrowTy
, 0);
973 MIRBuilder
.buildICmp(Pred
, MI
.getOperand(0).getReg(), Or
, Zero
);
975 MachineInstrBuilder CmpH
= MIRBuilder
.buildICmp(Pred
, ResTy
, LHSH
, RHSH
);
976 MachineInstrBuilder CmpHEQ
=
977 MIRBuilder
.buildICmp(CmpInst::Predicate::ICMP_EQ
, ResTy
, LHSH
, RHSH
);
978 MachineInstrBuilder CmpLU
= MIRBuilder
.buildICmp(
979 ICmpInst::getUnsignedPredicate(Pred
), ResTy
, LHSL
, RHSL
);
980 MIRBuilder
.buildSelect(MI
.getOperand(0).getReg(), CmpHEQ
, CmpLU
, CmpH
);
982 Observer
.changedInstr(MI
);
983 MI
.eraseFromParent();
986 case TargetOpcode::G_SEXT_INREG
: {
988 return UnableToLegalize
;
990 if (!MI
.getOperand(2).isImm())
991 return UnableToLegalize
;
992 int64_t SizeInBits
= MI
.getOperand(2).getImm();
994 // So long as the new type has more bits than the bits we're extending we
995 // don't need to break it apart.
996 if (NarrowTy
.getScalarSizeInBits() >= SizeInBits
) {
997 Observer
.changingInstr(MI
);
998 // We don't lose any non-extension bits by truncating the src and
999 // sign-extending the dst.
1000 MachineOperand
&MO1
= MI
.getOperand(1);
1001 auto TruncMIB
= MIRBuilder
.buildTrunc(NarrowTy
, MO1
.getReg());
1002 MO1
.setReg(TruncMIB
->getOperand(0).getReg());
1004 MachineOperand
&MO2
= MI
.getOperand(0);
1005 Register DstExt
= MRI
.createGenericVirtualRegister(NarrowTy
);
1006 MIRBuilder
.setInsertPt(MIRBuilder
.getMBB(), ++MIRBuilder
.getInsertPt());
1007 MIRBuilder
.buildInstr(TargetOpcode::G_SEXT
, {MO2
.getReg()}, {DstExt
});
1009 Observer
.changedInstr(MI
);
1013 // Break it apart. Components below the extension point are unmodified. The
1014 // component containing the extension point becomes a narrower SEXT_INREG.
1015 // Components above it are ashr'd from the component containing the
1017 if (SizeOp0
% NarrowSize
!= 0)
1018 return UnableToLegalize
;
1019 int NumParts
= SizeOp0
/ NarrowSize
;
1021 // List the registers where the destination will be scattered.
1022 SmallVector
<Register
, 2> DstRegs
;
1023 // List the registers where the source will be split.
1024 SmallVector
<Register
, 2> SrcRegs
;
1026 // Create all the temporary registers.
1027 for (int i
= 0; i
< NumParts
; ++i
) {
1028 Register SrcReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
1030 SrcRegs
.push_back(SrcReg
);
1033 // Explode the big arguments into smaller chunks.
1034 MIRBuilder
.buildUnmerge(SrcRegs
, MI
.getOperand(1).getReg());
1036 Register AshrCstReg
=
1037 MIRBuilder
.buildConstant(NarrowTy
, NarrowTy
.getScalarSizeInBits() - 1)
1040 Register FullExtensionReg
= 0;
1041 Register PartialExtensionReg
= 0;
1043 // Do the operation on each small part.
1044 for (int i
= 0; i
< NumParts
; ++i
) {
1045 if ((i
+ 1) * NarrowTy
.getScalarSizeInBits() < SizeInBits
)
1046 DstRegs
.push_back(SrcRegs
[i
]);
1047 else if (i
* NarrowTy
.getScalarSizeInBits() > SizeInBits
) {
1048 assert(PartialExtensionReg
&&
1049 "Expected to visit partial extension before full");
1050 if (FullExtensionReg
) {
1051 DstRegs
.push_back(FullExtensionReg
);
1054 DstRegs
.push_back(MIRBuilder
1055 .buildInstr(TargetOpcode::G_ASHR
, {NarrowTy
},
1056 {PartialExtensionReg
, AshrCstReg
})
1059 FullExtensionReg
= DstRegs
.back();
1064 TargetOpcode::G_SEXT_INREG
, {NarrowTy
},
1065 {SrcRegs
[i
], SizeInBits
% NarrowTy
.getScalarSizeInBits()})
1068 PartialExtensionReg
= DstRegs
.back();
1072 // Gather the destination registers into the final destination.
1073 Register DstReg
= MI
.getOperand(0).getReg();
1074 MIRBuilder
.buildMerge(DstReg
, DstRegs
);
1075 MI
.eraseFromParent();
1081 void LegalizerHelper::widenScalarSrc(MachineInstr
&MI
, LLT WideTy
,
1082 unsigned OpIdx
, unsigned ExtOpcode
) {
1083 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
1084 auto ExtB
= MIRBuilder
.buildInstr(ExtOpcode
, {WideTy
}, {MO
.getReg()});
1085 MO
.setReg(ExtB
->getOperand(0).getReg());
1088 void LegalizerHelper::narrowScalarSrc(MachineInstr
&MI
, LLT NarrowTy
,
1090 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
1091 auto ExtB
= MIRBuilder
.buildInstr(TargetOpcode::G_TRUNC
, {NarrowTy
},
1093 MO
.setReg(ExtB
->getOperand(0).getReg());
1096 void LegalizerHelper::widenScalarDst(MachineInstr
&MI
, LLT WideTy
,
1097 unsigned OpIdx
, unsigned TruncOpcode
) {
1098 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
1099 Register DstExt
= MRI
.createGenericVirtualRegister(WideTy
);
1100 MIRBuilder
.setInsertPt(MIRBuilder
.getMBB(), ++MIRBuilder
.getInsertPt());
1101 MIRBuilder
.buildInstr(TruncOpcode
, {MO
.getReg()}, {DstExt
});
1105 void LegalizerHelper::narrowScalarDst(MachineInstr
&MI
, LLT NarrowTy
,
1106 unsigned OpIdx
, unsigned ExtOpcode
) {
1107 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
1108 Register DstTrunc
= MRI
.createGenericVirtualRegister(NarrowTy
);
1109 MIRBuilder
.setInsertPt(MIRBuilder
.getMBB(), ++MIRBuilder
.getInsertPt());
1110 MIRBuilder
.buildInstr(ExtOpcode
, {MO
.getReg()}, {DstTrunc
});
1111 MO
.setReg(DstTrunc
);
1114 void LegalizerHelper::moreElementsVectorDst(MachineInstr
&MI
, LLT WideTy
,
1116 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
1117 Register DstExt
= MRI
.createGenericVirtualRegister(WideTy
);
1118 MIRBuilder
.setInsertPt(MIRBuilder
.getMBB(), ++MIRBuilder
.getInsertPt());
1119 MIRBuilder
.buildExtract(MO
.getReg(), DstExt
, 0);
1123 void LegalizerHelper::moreElementsVectorSrc(MachineInstr
&MI
, LLT MoreTy
,
1125 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
1127 LLT OldTy
= MRI
.getType(MO
.getReg());
1128 unsigned OldElts
= OldTy
.getNumElements();
1129 unsigned NewElts
= MoreTy
.getNumElements();
1131 unsigned NumParts
= NewElts
/ OldElts
;
1133 // Use concat_vectors if the result is a multiple of the number of elements.
1134 if (NumParts
* OldElts
== NewElts
) {
1135 SmallVector
<Register
, 8> Parts
;
1136 Parts
.push_back(MO
.getReg());
1138 Register ImpDef
= MIRBuilder
.buildUndef(OldTy
).getReg(0);
1139 for (unsigned I
= 1; I
!= NumParts
; ++I
)
1140 Parts
.push_back(ImpDef
);
1142 auto Concat
= MIRBuilder
.buildConcatVectors(MoreTy
, Parts
);
1143 MO
.setReg(Concat
.getReg(0));
1147 Register MoreReg
= MRI
.createGenericVirtualRegister(MoreTy
);
1148 Register ImpDef
= MIRBuilder
.buildUndef(MoreTy
).getReg(0);
1149 MIRBuilder
.buildInsert(MoreReg
, ImpDef
, MO
.getReg(), 0);
1153 LegalizerHelper::LegalizeResult
1154 LegalizerHelper::widenScalarMergeValues(MachineInstr
&MI
, unsigned TypeIdx
,
1157 return UnableToLegalize
;
1159 Register DstReg
= MI
.getOperand(0).getReg();
1160 LLT DstTy
= MRI
.getType(DstReg
);
1161 if (DstTy
.isVector())
1162 return UnableToLegalize
;
1164 Register Src1
= MI
.getOperand(1).getReg();
1165 LLT SrcTy
= MRI
.getType(Src1
);
1166 const int DstSize
= DstTy
.getSizeInBits();
1167 const int SrcSize
= SrcTy
.getSizeInBits();
1168 const int WideSize
= WideTy
.getSizeInBits();
1169 const int NumMerge
= (DstSize
+ WideSize
- 1) / WideSize
;
1171 unsigned NumOps
= MI
.getNumOperands();
1172 unsigned NumSrc
= MI
.getNumOperands() - 1;
1173 unsigned PartSize
= DstTy
.getSizeInBits() / NumSrc
;
1175 if (WideSize
>= DstSize
) {
1176 // Directly pack the bits in the target type.
1177 Register ResultReg
= MIRBuilder
.buildZExt(WideTy
, Src1
).getReg(0);
1179 for (unsigned I
= 2; I
!= NumOps
; ++I
) {
1180 const unsigned Offset
= (I
- 1) * PartSize
;
1182 Register SrcReg
= MI
.getOperand(I
).getReg();
1183 assert(MRI
.getType(SrcReg
) == LLT::scalar(PartSize
));
1185 auto ZextInput
= MIRBuilder
.buildZExt(WideTy
, SrcReg
);
1187 Register NextResult
= I
+ 1 == NumOps
&& WideTy
== DstTy
? DstReg
:
1188 MRI
.createGenericVirtualRegister(WideTy
);
1190 auto ShiftAmt
= MIRBuilder
.buildConstant(WideTy
, Offset
);
1191 auto Shl
= MIRBuilder
.buildShl(WideTy
, ZextInput
, ShiftAmt
);
1192 MIRBuilder
.buildOr(NextResult
, ResultReg
, Shl
);
1193 ResultReg
= NextResult
;
1196 if (WideSize
> DstSize
)
1197 MIRBuilder
.buildTrunc(DstReg
, ResultReg
);
1198 else if (DstTy
.isPointer())
1199 MIRBuilder
.buildIntToPtr(DstReg
, ResultReg
);
1201 MI
.eraseFromParent();
1205 // Unmerge the original values to the GCD type, and recombine to the next
1206 // multiple greater than the original type.
1208 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
1209 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
1210 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
1211 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
1212 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
1213 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1214 // %12:_(s12) = G_MERGE_VALUES %10, %11
1216 // Padding with undef if necessary:
1218 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1219 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1220 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1221 // %7:_(s2) = G_IMPLICIT_DEF
1222 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1223 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1224 // %10:_(s12) = G_MERGE_VALUES %8, %9
1226 const int GCD
= greatestCommonDivisor(SrcSize
, WideSize
);
1227 LLT GCDTy
= LLT::scalar(GCD
);
1229 SmallVector
<Register
, 8> Parts
;
1230 SmallVector
<Register
, 8> NewMergeRegs
;
1231 SmallVector
<Register
, 8> Unmerges
;
1232 LLT WideDstTy
= LLT::scalar(NumMerge
* WideSize
);
1234 // Decompose the original operands if they don't evenly divide.
1235 for (int I
= 1, E
= MI
.getNumOperands(); I
!= E
; ++I
) {
1236 Register SrcReg
= MI
.getOperand(I
).getReg();
1237 if (GCD
== SrcSize
) {
1238 Unmerges
.push_back(SrcReg
);
1240 auto Unmerge
= MIRBuilder
.buildUnmerge(GCDTy
, SrcReg
);
1241 for (int J
= 0, JE
= Unmerge
->getNumOperands() - 1; J
!= JE
; ++J
)
1242 Unmerges
.push_back(Unmerge
.getReg(J
));
1246 // Pad with undef to the next size that is a multiple of the requested size.
1247 if (static_cast<int>(Unmerges
.size()) != NumMerge
* WideSize
) {
1248 Register UndefReg
= MIRBuilder
.buildUndef(GCDTy
).getReg(0);
1249 for (int I
= Unmerges
.size(); I
!= NumMerge
* WideSize
; ++I
)
1250 Unmerges
.push_back(UndefReg
);
1253 const int PartsPerGCD
= WideSize
/ GCD
;
1255 // Build merges of each piece.
1256 ArrayRef
<Register
> Slicer(Unmerges
);
1257 for (int I
= 0; I
!= NumMerge
; ++I
, Slicer
= Slicer
.drop_front(PartsPerGCD
)) {
1258 auto Merge
= MIRBuilder
.buildMerge(WideTy
, Slicer
.take_front(PartsPerGCD
));
1259 NewMergeRegs
.push_back(Merge
.getReg(0));
1262 // A truncate may be necessary if the requested type doesn't evenly divide the
1263 // original result type.
1264 if (DstTy
.getSizeInBits() == WideDstTy
.getSizeInBits()) {
1265 MIRBuilder
.buildMerge(DstReg
, NewMergeRegs
);
1267 auto FinalMerge
= MIRBuilder
.buildMerge(WideDstTy
, NewMergeRegs
);
1268 MIRBuilder
.buildTrunc(DstReg
, FinalMerge
.getReg(0));
1271 MI
.eraseFromParent();
1275 LegalizerHelper::LegalizeResult
1276 LegalizerHelper::widenScalarUnmergeValues(MachineInstr
&MI
, unsigned TypeIdx
,
1279 return UnableToLegalize
;
1281 unsigned NumDst
= MI
.getNumOperands() - 1;
1282 Register SrcReg
= MI
.getOperand(NumDst
).getReg();
1283 LLT SrcTy
= MRI
.getType(SrcReg
);
1284 if (!SrcTy
.isScalar())
1285 return UnableToLegalize
;
1287 Register Dst0Reg
= MI
.getOperand(0).getReg();
1288 LLT DstTy
= MRI
.getType(Dst0Reg
);
1289 if (!DstTy
.isScalar())
1290 return UnableToLegalize
;
1292 unsigned NewSrcSize
= NumDst
* WideTy
.getSizeInBits();
1293 LLT NewSrcTy
= LLT::scalar(NewSrcSize
);
1294 unsigned SizeDiff
= WideTy
.getSizeInBits() - DstTy
.getSizeInBits();
1296 auto WideSrc
= MIRBuilder
.buildZExt(NewSrcTy
, SrcReg
);
1298 for (unsigned I
= 1; I
!= NumDst
; ++I
) {
1299 auto ShiftAmt
= MIRBuilder
.buildConstant(NewSrcTy
, SizeDiff
* I
);
1300 auto Shl
= MIRBuilder
.buildShl(NewSrcTy
, WideSrc
, ShiftAmt
);
1301 WideSrc
= MIRBuilder
.buildOr(NewSrcTy
, WideSrc
, Shl
);
1304 Observer
.changingInstr(MI
);
1306 MI
.getOperand(NumDst
).setReg(WideSrc
->getOperand(0).getReg());
1307 for (unsigned I
= 0; I
!= NumDst
; ++I
)
1308 widenScalarDst(MI
, WideTy
, I
);
1310 Observer
.changedInstr(MI
);
1315 LegalizerHelper::LegalizeResult
1316 LegalizerHelper::widenScalarExtract(MachineInstr
&MI
, unsigned TypeIdx
,
1318 Register DstReg
= MI
.getOperand(0).getReg();
1319 Register SrcReg
= MI
.getOperand(1).getReg();
1320 LLT SrcTy
= MRI
.getType(SrcReg
);
1322 LLT DstTy
= MRI
.getType(DstReg
);
1323 unsigned Offset
= MI
.getOperand(2).getImm();
1326 if (SrcTy
.isVector() || DstTy
.isVector())
1327 return UnableToLegalize
;
1330 if (SrcTy
.isPointer()) {
1331 // Extracts from pointers can be handled only if they are really just
1333 const DataLayout
&DL
= MIRBuilder
.getDataLayout();
1334 if (DL
.isNonIntegralAddressSpace(SrcTy
.getAddressSpace()))
1335 return UnableToLegalize
;
1337 LLT SrcAsIntTy
= LLT::scalar(SrcTy
.getSizeInBits());
1338 Src
= MIRBuilder
.buildPtrToInt(SrcAsIntTy
, Src
);
1342 if (DstTy
.isPointer())
1343 return UnableToLegalize
;
1346 // Avoid a shift in the degenerate case.
1347 MIRBuilder
.buildTrunc(DstReg
,
1348 MIRBuilder
.buildAnyExtOrTrunc(WideTy
, Src
));
1349 MI
.eraseFromParent();
1353 // Do a shift in the source type.
1354 LLT ShiftTy
= SrcTy
;
1355 if (WideTy
.getSizeInBits() > SrcTy
.getSizeInBits()) {
1356 Src
= MIRBuilder
.buildAnyExt(WideTy
, Src
);
1358 } else if (WideTy
.getSizeInBits() > SrcTy
.getSizeInBits())
1359 return UnableToLegalize
;
1361 auto LShr
= MIRBuilder
.buildLShr(
1362 ShiftTy
, Src
, MIRBuilder
.buildConstant(ShiftTy
, Offset
));
1363 MIRBuilder
.buildTrunc(DstReg
, LShr
);
1364 MI
.eraseFromParent();
1368 if (SrcTy
.isScalar()) {
1369 Observer
.changingInstr(MI
);
1370 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
1371 Observer
.changedInstr(MI
);
1375 if (!SrcTy
.isVector())
1376 return UnableToLegalize
;
1378 if (DstTy
!= SrcTy
.getElementType())
1379 return UnableToLegalize
;
1381 if (Offset
% SrcTy
.getScalarSizeInBits() != 0)
1382 return UnableToLegalize
;
1384 Observer
.changingInstr(MI
);
1385 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
1387 MI
.getOperand(2).setImm((WideTy
.getSizeInBits() / SrcTy
.getSizeInBits()) *
1389 widenScalarDst(MI
, WideTy
.getScalarType(), 0);
1390 Observer
.changedInstr(MI
);
1394 LegalizerHelper::LegalizeResult
1395 LegalizerHelper::widenScalarInsert(MachineInstr
&MI
, unsigned TypeIdx
,
1398 return UnableToLegalize
;
1399 Observer
.changingInstr(MI
);
1400 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
1401 widenScalarDst(MI
, WideTy
);
1402 Observer
.changedInstr(MI
);
1406 LegalizerHelper::LegalizeResult
1407 LegalizerHelper::widenScalar(MachineInstr
&MI
, unsigned TypeIdx
, LLT WideTy
) {
1408 MIRBuilder
.setInstr(MI
);
1410 switch (MI
.getOpcode()) {
1412 return UnableToLegalize
;
1413 case TargetOpcode::G_EXTRACT
:
1414 return widenScalarExtract(MI
, TypeIdx
, WideTy
);
1415 case TargetOpcode::G_INSERT
:
1416 return widenScalarInsert(MI
, TypeIdx
, WideTy
);
1417 case TargetOpcode::G_MERGE_VALUES
:
1418 return widenScalarMergeValues(MI
, TypeIdx
, WideTy
);
1419 case TargetOpcode::G_UNMERGE_VALUES
:
1420 return widenScalarUnmergeValues(MI
, TypeIdx
, WideTy
);
1421 case TargetOpcode::G_UADDO
:
1422 case TargetOpcode::G_USUBO
: {
1424 return UnableToLegalize
; // TODO
1425 auto LHSZext
= MIRBuilder
.buildInstr(TargetOpcode::G_ZEXT
, {WideTy
},
1426 {MI
.getOperand(2).getReg()});
1427 auto RHSZext
= MIRBuilder
.buildInstr(TargetOpcode::G_ZEXT
, {WideTy
},
1428 {MI
.getOperand(3).getReg()});
1429 unsigned Opcode
= MI
.getOpcode() == TargetOpcode::G_UADDO
1430 ? TargetOpcode::G_ADD
1431 : TargetOpcode::G_SUB
;
1432 // Do the arithmetic in the larger type.
1433 auto NewOp
= MIRBuilder
.buildInstr(Opcode
, {WideTy
}, {LHSZext
, RHSZext
});
1434 LLT OrigTy
= MRI
.getType(MI
.getOperand(0).getReg());
1435 APInt Mask
= APInt::getAllOnesValue(OrigTy
.getSizeInBits());
1436 auto AndOp
= MIRBuilder
.buildInstr(
1437 TargetOpcode::G_AND
, {WideTy
},
1438 {NewOp
, MIRBuilder
.buildConstant(WideTy
, Mask
.getZExtValue())});
1439 // There is no overflow if the AndOp is the same as NewOp.
1440 MIRBuilder
.buildICmp(CmpInst::ICMP_NE
, MI
.getOperand(1).getReg(), NewOp
,
1442 // Now trunc the NewOp to the original result.
1443 MIRBuilder
.buildTrunc(MI
.getOperand(0).getReg(), NewOp
);
1444 MI
.eraseFromParent();
1447 case TargetOpcode::G_CTTZ
:
1448 case TargetOpcode::G_CTTZ_ZERO_UNDEF
:
1449 case TargetOpcode::G_CTLZ
:
1450 case TargetOpcode::G_CTLZ_ZERO_UNDEF
:
1451 case TargetOpcode::G_CTPOP
: {
1453 Observer
.changingInstr(MI
);
1454 widenScalarDst(MI
, WideTy
, 0);
1455 Observer
.changedInstr(MI
);
1459 Register SrcReg
= MI
.getOperand(1).getReg();
1461 // First ZEXT the input.
1462 auto MIBSrc
= MIRBuilder
.buildZExt(WideTy
, SrcReg
);
1463 LLT CurTy
= MRI
.getType(SrcReg
);
1464 if (MI
.getOpcode() == TargetOpcode::G_CTTZ
) {
1465 // The count is the same in the larger type except if the original
1466 // value was zero. This can be handled by setting the bit just off
1467 // the top of the original type.
1469 APInt::getOneBitSet(WideTy
.getSizeInBits(), CurTy
.getSizeInBits());
1470 MIBSrc
= MIRBuilder
.buildOr(
1471 WideTy
, MIBSrc
, MIRBuilder
.buildConstant(WideTy
, TopBit
));
1474 // Perform the operation at the larger size.
1475 auto MIBNewOp
= MIRBuilder
.buildInstr(MI
.getOpcode(), {WideTy
}, {MIBSrc
});
1476 // This is already the correct result for CTPOP and CTTZs
1477 if (MI
.getOpcode() == TargetOpcode::G_CTLZ
||
1478 MI
.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF
) {
1479 // The correct result is NewOp - (Difference in widety and current ty).
1480 unsigned SizeDiff
= WideTy
.getSizeInBits() - CurTy
.getSizeInBits();
1481 MIBNewOp
= MIRBuilder
.buildInstr(
1482 TargetOpcode::G_SUB
, {WideTy
},
1483 {MIBNewOp
, MIRBuilder
.buildConstant(WideTy
, SizeDiff
)});
1486 MIRBuilder
.buildZExtOrTrunc(MI
.getOperand(0), MIBNewOp
);
1487 MI
.eraseFromParent();
1490 case TargetOpcode::G_BSWAP
: {
1491 Observer
.changingInstr(MI
);
1492 Register DstReg
= MI
.getOperand(0).getReg();
1494 Register ShrReg
= MRI
.createGenericVirtualRegister(WideTy
);
1495 Register DstExt
= MRI
.createGenericVirtualRegister(WideTy
);
1496 Register ShiftAmtReg
= MRI
.createGenericVirtualRegister(WideTy
);
1497 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
1499 MI
.getOperand(0).setReg(DstExt
);
1501 MIRBuilder
.setInsertPt(MIRBuilder
.getMBB(), ++MIRBuilder
.getInsertPt());
1503 LLT Ty
= MRI
.getType(DstReg
);
1504 unsigned DiffBits
= WideTy
.getScalarSizeInBits() - Ty
.getScalarSizeInBits();
1505 MIRBuilder
.buildConstant(ShiftAmtReg
, DiffBits
);
1506 MIRBuilder
.buildInstr(TargetOpcode::G_LSHR
)
1509 .addUse(ShiftAmtReg
);
1511 MIRBuilder
.buildTrunc(DstReg
, ShrReg
);
1512 Observer
.changedInstr(MI
);
1515 case TargetOpcode::G_BITREVERSE
: {
1516 Observer
.changingInstr(MI
);
1518 Register DstReg
= MI
.getOperand(0).getReg();
1519 LLT Ty
= MRI
.getType(DstReg
);
1520 unsigned DiffBits
= WideTy
.getScalarSizeInBits() - Ty
.getScalarSizeInBits();
1522 Register DstExt
= MRI
.createGenericVirtualRegister(WideTy
);
1523 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
1524 MI
.getOperand(0).setReg(DstExt
);
1525 MIRBuilder
.setInsertPt(MIRBuilder
.getMBB(), ++MIRBuilder
.getInsertPt());
1527 auto ShiftAmt
= MIRBuilder
.buildConstant(WideTy
, DiffBits
);
1528 auto Shift
= MIRBuilder
.buildLShr(WideTy
, DstExt
, ShiftAmt
);
1529 MIRBuilder
.buildTrunc(DstReg
, Shift
);
1530 Observer
.changedInstr(MI
);
1533 case TargetOpcode::G_ADD
:
1534 case TargetOpcode::G_AND
:
1535 case TargetOpcode::G_MUL
:
1536 case TargetOpcode::G_OR
:
1537 case TargetOpcode::G_XOR
:
1538 case TargetOpcode::G_SUB
:
1539 // Perform operation at larger width (any extension is fines here, high bits
1540 // don't affect the result) and then truncate the result back to the
1542 Observer
.changingInstr(MI
);
1543 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
1544 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ANYEXT
);
1545 widenScalarDst(MI
, WideTy
);
1546 Observer
.changedInstr(MI
);
1549 case TargetOpcode::G_SHL
:
1550 Observer
.changingInstr(MI
);
1553 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
1554 widenScalarDst(MI
, WideTy
);
1556 assert(TypeIdx
== 1);
1557 // The "number of bits to shift" operand must preserve its value as an
1558 // unsigned integer:
1559 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ZEXT
);
1562 Observer
.changedInstr(MI
);
1565 case TargetOpcode::G_SDIV
:
1566 case TargetOpcode::G_SREM
:
1567 case TargetOpcode::G_SMIN
:
1568 case TargetOpcode::G_SMAX
:
1569 Observer
.changingInstr(MI
);
1570 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_SEXT
);
1571 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_SEXT
);
1572 widenScalarDst(MI
, WideTy
);
1573 Observer
.changedInstr(MI
);
1576 case TargetOpcode::G_ASHR
:
1577 case TargetOpcode::G_LSHR
:
1578 Observer
.changingInstr(MI
);
1581 unsigned CvtOp
= MI
.getOpcode() == TargetOpcode::G_ASHR
?
1582 TargetOpcode::G_SEXT
: TargetOpcode::G_ZEXT
;
1584 widenScalarSrc(MI
, WideTy
, 1, CvtOp
);
1585 widenScalarDst(MI
, WideTy
);
1587 assert(TypeIdx
== 1);
1588 // The "number of bits to shift" operand must preserve its value as an
1589 // unsigned integer:
1590 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ZEXT
);
1593 Observer
.changedInstr(MI
);
1595 case TargetOpcode::G_UDIV
:
1596 case TargetOpcode::G_UREM
:
1597 case TargetOpcode::G_UMIN
:
1598 case TargetOpcode::G_UMAX
:
1599 Observer
.changingInstr(MI
);
1600 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ZEXT
);
1601 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ZEXT
);
1602 widenScalarDst(MI
, WideTy
);
1603 Observer
.changedInstr(MI
);
1606 case TargetOpcode::G_SELECT
:
1607 Observer
.changingInstr(MI
);
1609 // Perform operation at larger width (any extension is fine here, high
1610 // bits don't affect the result) and then truncate the result back to the
1612 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ANYEXT
);
1613 widenScalarSrc(MI
, WideTy
, 3, TargetOpcode::G_ANYEXT
);
1614 widenScalarDst(MI
, WideTy
);
1616 bool IsVec
= MRI
.getType(MI
.getOperand(1).getReg()).isVector();
1617 // Explicit extension is required here since high bits affect the result.
1618 widenScalarSrc(MI
, WideTy
, 1, MIRBuilder
.getBoolExtOp(IsVec
, false));
1620 Observer
.changedInstr(MI
);
1623 case TargetOpcode::G_FPTOSI
:
1624 case TargetOpcode::G_FPTOUI
:
1625 Observer
.changingInstr(MI
);
1628 widenScalarDst(MI
, WideTy
);
1630 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_FPEXT
);
1632 Observer
.changedInstr(MI
);
1634 case TargetOpcode::G_SITOFP
:
1636 return UnableToLegalize
;
1637 Observer
.changingInstr(MI
);
1638 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_SEXT
);
1639 Observer
.changedInstr(MI
);
1642 case TargetOpcode::G_UITOFP
:
1644 return UnableToLegalize
;
1645 Observer
.changingInstr(MI
);
1646 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ZEXT
);
1647 Observer
.changedInstr(MI
);
1650 case TargetOpcode::G_LOAD
:
1651 case TargetOpcode::G_SEXTLOAD
:
1652 case TargetOpcode::G_ZEXTLOAD
:
1653 Observer
.changingInstr(MI
);
1654 widenScalarDst(MI
, WideTy
);
1655 Observer
.changedInstr(MI
);
1658 case TargetOpcode::G_STORE
: {
1660 return UnableToLegalize
;
1662 LLT Ty
= MRI
.getType(MI
.getOperand(0).getReg());
1663 if (!isPowerOf2_32(Ty
.getSizeInBits()))
1664 return UnableToLegalize
;
1666 Observer
.changingInstr(MI
);
1668 unsigned ExtType
= Ty
.getScalarSizeInBits() == 1 ?
1669 TargetOpcode::G_ZEXT
: TargetOpcode::G_ANYEXT
;
1670 widenScalarSrc(MI
, WideTy
, 0, ExtType
);
1672 Observer
.changedInstr(MI
);
1675 case TargetOpcode::G_CONSTANT
: {
1676 MachineOperand
&SrcMO
= MI
.getOperand(1);
1677 LLVMContext
&Ctx
= MIRBuilder
.getMF().getFunction().getContext();
1678 const APInt
&Val
= SrcMO
.getCImm()->getValue().sext(WideTy
.getSizeInBits());
1679 Observer
.changingInstr(MI
);
1680 SrcMO
.setCImm(ConstantInt::get(Ctx
, Val
));
1682 widenScalarDst(MI
, WideTy
);
1683 Observer
.changedInstr(MI
);
1686 case TargetOpcode::G_FCONSTANT
: {
1687 MachineOperand
&SrcMO
= MI
.getOperand(1);
1688 LLVMContext
&Ctx
= MIRBuilder
.getMF().getFunction().getContext();
1689 APFloat Val
= SrcMO
.getFPImm()->getValueAPF();
1691 switch (WideTy
.getSizeInBits()) {
1693 Val
.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven
,
1697 Val
.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven
,
1701 return UnableToLegalize
;
1704 assert(!LosesInfo
&& "extend should always be lossless");
1706 Observer
.changingInstr(MI
);
1707 SrcMO
.setFPImm(ConstantFP::get(Ctx
, Val
));
1709 widenScalarDst(MI
, WideTy
, 0, TargetOpcode::G_FPTRUNC
);
1710 Observer
.changedInstr(MI
);
1713 case TargetOpcode::G_IMPLICIT_DEF
: {
1714 Observer
.changingInstr(MI
);
1715 widenScalarDst(MI
, WideTy
);
1716 Observer
.changedInstr(MI
);
1719 case TargetOpcode::G_BRCOND
:
1720 Observer
.changingInstr(MI
);
1721 widenScalarSrc(MI
, WideTy
, 0, MIRBuilder
.getBoolExtOp(false, false));
1722 Observer
.changedInstr(MI
);
1725 case TargetOpcode::G_FCMP
:
1726 Observer
.changingInstr(MI
);
1728 widenScalarDst(MI
, WideTy
);
1730 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_FPEXT
);
1731 widenScalarSrc(MI
, WideTy
, 3, TargetOpcode::G_FPEXT
);
1733 Observer
.changedInstr(MI
);
1736 case TargetOpcode::G_ICMP
:
1737 Observer
.changingInstr(MI
);
1739 widenScalarDst(MI
, WideTy
);
1741 unsigned ExtOpcode
= CmpInst::isSigned(static_cast<CmpInst::Predicate
>(
1742 MI
.getOperand(1).getPredicate()))
1743 ? TargetOpcode::G_SEXT
1744 : TargetOpcode::G_ZEXT
;
1745 widenScalarSrc(MI
, WideTy
, 2, ExtOpcode
);
1746 widenScalarSrc(MI
, WideTy
, 3, ExtOpcode
);
1748 Observer
.changedInstr(MI
);
1751 case TargetOpcode::G_GEP
:
1752 assert(TypeIdx
== 1 && "unable to legalize pointer of GEP");
1753 Observer
.changingInstr(MI
);
1754 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_SEXT
);
1755 Observer
.changedInstr(MI
);
1758 case TargetOpcode::G_PHI
: {
1759 assert(TypeIdx
== 0 && "Expecting only Idx 0");
1761 Observer
.changingInstr(MI
);
1762 for (unsigned I
= 1; I
< MI
.getNumOperands(); I
+= 2) {
1763 MachineBasicBlock
&OpMBB
= *MI
.getOperand(I
+ 1).getMBB();
1764 MIRBuilder
.setInsertPt(OpMBB
, OpMBB
.getFirstTerminator());
1765 widenScalarSrc(MI
, WideTy
, I
, TargetOpcode::G_ANYEXT
);
1768 MachineBasicBlock
&MBB
= *MI
.getParent();
1769 MIRBuilder
.setInsertPt(MBB
, --MBB
.getFirstNonPHI());
1770 widenScalarDst(MI
, WideTy
);
1771 Observer
.changedInstr(MI
);
1774 case TargetOpcode::G_EXTRACT_VECTOR_ELT
: {
1776 Register VecReg
= MI
.getOperand(1).getReg();
1777 LLT VecTy
= MRI
.getType(VecReg
);
1778 Observer
.changingInstr(MI
);
1780 widenScalarSrc(MI
, LLT::vector(VecTy
.getNumElements(),
1781 WideTy
.getSizeInBits()),
1782 1, TargetOpcode::G_SEXT
);
1784 widenScalarDst(MI
, WideTy
, 0);
1785 Observer
.changedInstr(MI
);
1790 return UnableToLegalize
;
1791 Observer
.changingInstr(MI
);
1792 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_SEXT
);
1793 Observer
.changedInstr(MI
);
1796 case TargetOpcode::G_FADD
:
1797 case TargetOpcode::G_FMUL
:
1798 case TargetOpcode::G_FSUB
:
1799 case TargetOpcode::G_FMA
:
1800 case TargetOpcode::G_FMAD
:
1801 case TargetOpcode::G_FNEG
:
1802 case TargetOpcode::G_FABS
:
1803 case TargetOpcode::G_FCANONICALIZE
:
1804 case TargetOpcode::G_FMINNUM
:
1805 case TargetOpcode::G_FMAXNUM
:
1806 case TargetOpcode::G_FMINNUM_IEEE
:
1807 case TargetOpcode::G_FMAXNUM_IEEE
:
1808 case TargetOpcode::G_FMINIMUM
:
1809 case TargetOpcode::G_FMAXIMUM
:
1810 case TargetOpcode::G_FDIV
:
1811 case TargetOpcode::G_FREM
:
1812 case TargetOpcode::G_FCEIL
:
1813 case TargetOpcode::G_FFLOOR
:
1814 case TargetOpcode::G_FCOS
:
1815 case TargetOpcode::G_FSIN
:
1816 case TargetOpcode::G_FLOG10
:
1817 case TargetOpcode::G_FLOG
:
1818 case TargetOpcode::G_FLOG2
:
1819 case TargetOpcode::G_FRINT
:
1820 case TargetOpcode::G_FNEARBYINT
:
1821 case TargetOpcode::G_FSQRT
:
1822 case TargetOpcode::G_FEXP
:
1823 case TargetOpcode::G_FEXP2
:
1824 case TargetOpcode::G_FPOW
:
1825 case TargetOpcode::G_INTRINSIC_TRUNC
:
1826 case TargetOpcode::G_INTRINSIC_ROUND
:
1827 assert(TypeIdx
== 0);
1828 Observer
.changingInstr(MI
);
1830 for (unsigned I
= 1, E
= MI
.getNumOperands(); I
!= E
; ++I
)
1831 widenScalarSrc(MI
, WideTy
, I
, TargetOpcode::G_FPEXT
);
1833 widenScalarDst(MI
, WideTy
, 0, TargetOpcode::G_FPTRUNC
);
1834 Observer
.changedInstr(MI
);
1836 case TargetOpcode::G_INTTOPTR
:
1838 return UnableToLegalize
;
1840 Observer
.changingInstr(MI
);
1841 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ZEXT
);
1842 Observer
.changedInstr(MI
);
1844 case TargetOpcode::G_PTRTOINT
:
1846 return UnableToLegalize
;
1848 Observer
.changingInstr(MI
);
1849 widenScalarDst(MI
, WideTy
, 0);
1850 Observer
.changedInstr(MI
);
1852 case TargetOpcode::G_BUILD_VECTOR
: {
1853 Observer
.changingInstr(MI
);
1855 const LLT WideEltTy
= TypeIdx
== 1 ? WideTy
: WideTy
.getElementType();
1856 for (int I
= 1, E
= MI
.getNumOperands(); I
!= E
; ++I
)
1857 widenScalarSrc(MI
, WideEltTy
, I
, TargetOpcode::G_ANYEXT
);
1859 // Avoid changing the result vector type if the source element type was
1862 auto &TII
= *MI
.getMF()->getSubtarget().getInstrInfo();
1863 MI
.setDesc(TII
.get(TargetOpcode::G_BUILD_VECTOR_TRUNC
));
1865 widenScalarDst(MI
, WideTy
, 0);
1868 Observer
.changedInstr(MI
);
1871 case TargetOpcode::G_SEXT_INREG
:
1873 return UnableToLegalize
;
1875 Observer
.changingInstr(MI
);
1876 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
1877 widenScalarDst(MI
, WideTy
, 0, TargetOpcode::G_TRUNC
);
1878 Observer
.changedInstr(MI
);
1883 LegalizerHelper::LegalizeResult
1884 LegalizerHelper::lower(MachineInstr
&MI
, unsigned TypeIdx
, LLT Ty
) {
1885 using namespace TargetOpcode
;
1886 MIRBuilder
.setInstr(MI
);
1888 switch(MI
.getOpcode()) {
1890 return UnableToLegalize
;
1891 case TargetOpcode::G_SREM
:
1892 case TargetOpcode::G_UREM
: {
1893 Register QuotReg
= MRI
.createGenericVirtualRegister(Ty
);
1894 MIRBuilder
.buildInstr(MI
.getOpcode() == G_SREM
? G_SDIV
: G_UDIV
)
1896 .addUse(MI
.getOperand(1).getReg())
1897 .addUse(MI
.getOperand(2).getReg());
1899 Register ProdReg
= MRI
.createGenericVirtualRegister(Ty
);
1900 MIRBuilder
.buildMul(ProdReg
, QuotReg
, MI
.getOperand(2).getReg());
1901 MIRBuilder
.buildSub(MI
.getOperand(0).getReg(), MI
.getOperand(1).getReg(),
1903 MI
.eraseFromParent();
1906 case TargetOpcode::G_SADDO
:
1907 case TargetOpcode::G_SSUBO
:
1908 return lowerSADDO_SSUBO(MI
);
1909 case TargetOpcode::G_SMULO
:
1910 case TargetOpcode::G_UMULO
: {
1911 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
1913 Register Res
= MI
.getOperand(0).getReg();
1914 Register Overflow
= MI
.getOperand(1).getReg();
1915 Register LHS
= MI
.getOperand(2).getReg();
1916 Register RHS
= MI
.getOperand(3).getReg();
1918 MIRBuilder
.buildMul(Res
, LHS
, RHS
);
1920 unsigned Opcode
= MI
.getOpcode() == TargetOpcode::G_SMULO
1921 ? TargetOpcode::G_SMULH
1922 : TargetOpcode::G_UMULH
;
1924 Register HiPart
= MRI
.createGenericVirtualRegister(Ty
);
1925 MIRBuilder
.buildInstr(Opcode
)
1930 Register Zero
= MRI
.createGenericVirtualRegister(Ty
);
1931 MIRBuilder
.buildConstant(Zero
, 0);
1933 // For *signed* multiply, overflow is detected by checking:
1934 // (hi != (lo >> bitwidth-1))
1935 if (Opcode
== TargetOpcode::G_SMULH
) {
1936 Register Shifted
= MRI
.createGenericVirtualRegister(Ty
);
1937 Register ShiftAmt
= MRI
.createGenericVirtualRegister(Ty
);
1938 MIRBuilder
.buildConstant(ShiftAmt
, Ty
.getSizeInBits() - 1);
1939 MIRBuilder
.buildInstr(TargetOpcode::G_ASHR
)
1943 MIRBuilder
.buildICmp(CmpInst::ICMP_NE
, Overflow
, HiPart
, Shifted
);
1945 MIRBuilder
.buildICmp(CmpInst::ICMP_NE
, Overflow
, HiPart
, Zero
);
1947 MI
.eraseFromParent();
1950 case TargetOpcode::G_FNEG
: {
1951 // TODO: Handle vector types once we are able to
1954 return UnableToLegalize
;
1955 Register Res
= MI
.getOperand(0).getReg();
1957 LLVMContext
&Ctx
= MIRBuilder
.getMF().getFunction().getContext();
1958 switch (Ty
.getSizeInBits()) {
1960 ZeroTy
= Type::getHalfTy(Ctx
);
1963 ZeroTy
= Type::getFloatTy(Ctx
);
1966 ZeroTy
= Type::getDoubleTy(Ctx
);
1969 ZeroTy
= Type::getFP128Ty(Ctx
);
1972 llvm_unreachable("unexpected floating-point type");
1974 ConstantFP
&ZeroForNegation
=
1975 *cast
<ConstantFP
>(ConstantFP::getZeroValueForNegation(ZeroTy
));
1976 auto Zero
= MIRBuilder
.buildFConstant(Ty
, ZeroForNegation
);
1977 Register SubByReg
= MI
.getOperand(1).getReg();
1978 Register ZeroReg
= Zero
->getOperand(0).getReg();
1979 MIRBuilder
.buildInstr(TargetOpcode::G_FSUB
, {Res
}, {ZeroReg
, SubByReg
},
1981 MI
.eraseFromParent();
1984 case TargetOpcode::G_FSUB
: {
1985 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
1986 // First, check if G_FNEG is marked as Lower. If so, we may
1987 // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
1988 if (LI
.getAction({G_FNEG
, {Ty
}}).Action
== Lower
)
1989 return UnableToLegalize
;
1990 Register Res
= MI
.getOperand(0).getReg();
1991 Register LHS
= MI
.getOperand(1).getReg();
1992 Register RHS
= MI
.getOperand(2).getReg();
1993 Register Neg
= MRI
.createGenericVirtualRegister(Ty
);
1994 MIRBuilder
.buildInstr(TargetOpcode::G_FNEG
).addDef(Neg
).addUse(RHS
);
1995 MIRBuilder
.buildInstr(TargetOpcode::G_FADD
, {Res
}, {LHS
, Neg
}, MI
.getFlags());
1996 MI
.eraseFromParent();
1999 case TargetOpcode::G_FMAD
:
2000 return lowerFMad(MI
);
2001 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS
: {
2002 Register OldValRes
= MI
.getOperand(0).getReg();
2003 Register SuccessRes
= MI
.getOperand(1).getReg();
2004 Register Addr
= MI
.getOperand(2).getReg();
2005 Register CmpVal
= MI
.getOperand(3).getReg();
2006 Register NewVal
= MI
.getOperand(4).getReg();
2007 MIRBuilder
.buildAtomicCmpXchg(OldValRes
, Addr
, CmpVal
, NewVal
,
2008 **MI
.memoperands_begin());
2009 MIRBuilder
.buildICmp(CmpInst::ICMP_EQ
, SuccessRes
, OldValRes
, CmpVal
);
2010 MI
.eraseFromParent();
2013 case TargetOpcode::G_LOAD
:
2014 case TargetOpcode::G_SEXTLOAD
:
2015 case TargetOpcode::G_ZEXTLOAD
: {
2016 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
2017 Register DstReg
= MI
.getOperand(0).getReg();
2018 Register PtrReg
= MI
.getOperand(1).getReg();
2019 LLT DstTy
= MRI
.getType(DstReg
);
2020 auto &MMO
= **MI
.memoperands_begin();
2022 if (DstTy
.getSizeInBits() == MMO
.getSizeInBits()) {
2023 if (MI
.getOpcode() == TargetOpcode::G_LOAD
) {
2024 // This load needs splitting into power of 2 sized loads.
2025 if (DstTy
.isVector())
2026 return UnableToLegalize
;
2027 if (isPowerOf2_32(DstTy
.getSizeInBits()))
2028 return UnableToLegalize
; // Don't know what we're being asked to do.
2030 // Our strategy here is to generate anyextending loads for the smaller
2031 // types up to next power-2 result type, and then combine the two larger
2032 // result values together, before truncating back down to the non-pow-2
2034 // E.g. v1 = i24 load =>
2035 // v2 = i32 load (2 byte)
2036 // v3 = i32 load (1 byte)
2037 // v4 = i32 shl v3, 16
2038 // v5 = i32 or v4, v2
2039 // v1 = i24 trunc v5
2040 // By doing this we generate the correct truncate which should get
2041 // combined away as an artifact with a matching extend.
2042 uint64_t LargeSplitSize
= PowerOf2Floor(DstTy
.getSizeInBits());
2043 uint64_t SmallSplitSize
= DstTy
.getSizeInBits() - LargeSplitSize
;
2045 MachineFunction
&MF
= MIRBuilder
.getMF();
2046 MachineMemOperand
*LargeMMO
=
2047 MF
.getMachineMemOperand(&MMO
, 0, LargeSplitSize
/ 8);
2048 MachineMemOperand
*SmallMMO
= MF
.getMachineMemOperand(
2049 &MMO
, LargeSplitSize
/ 8, SmallSplitSize
/ 8);
2051 LLT PtrTy
= MRI
.getType(PtrReg
);
2052 unsigned AnyExtSize
= NextPowerOf2(DstTy
.getSizeInBits());
2053 LLT AnyExtTy
= LLT::scalar(AnyExtSize
);
2054 Register LargeLdReg
= MRI
.createGenericVirtualRegister(AnyExtTy
);
2055 Register SmallLdReg
= MRI
.createGenericVirtualRegister(AnyExtTy
);
2057 MIRBuilder
.buildLoad(LargeLdReg
, PtrReg
, *LargeMMO
);
2060 MIRBuilder
.buildConstant(LLT::scalar(64), LargeSplitSize
/ 8);
2061 Register GEPReg
= MRI
.createGenericVirtualRegister(PtrTy
);
2062 auto SmallPtr
= MIRBuilder
.buildGEP(GEPReg
, PtrReg
, OffsetCst
.getReg(0));
2063 auto SmallLoad
= MIRBuilder
.buildLoad(SmallLdReg
, SmallPtr
.getReg(0),
2066 auto ShiftAmt
= MIRBuilder
.buildConstant(AnyExtTy
, LargeSplitSize
);
2067 auto Shift
= MIRBuilder
.buildShl(AnyExtTy
, SmallLoad
, ShiftAmt
);
2068 auto Or
= MIRBuilder
.buildOr(AnyExtTy
, Shift
, LargeLoad
);
2069 MIRBuilder
.buildTrunc(DstReg
, {Or
.getReg(0)});
2070 MI
.eraseFromParent();
2073 MIRBuilder
.buildLoad(DstReg
, PtrReg
, MMO
);
2074 MI
.eraseFromParent();
2078 if (DstTy
.isScalar()) {
2080 MRI
.createGenericVirtualRegister(LLT::scalar(MMO
.getSizeInBits()));
2081 MIRBuilder
.buildLoad(TmpReg
, PtrReg
, MMO
);
2082 switch (MI
.getOpcode()) {
2084 llvm_unreachable("Unexpected opcode");
2085 case TargetOpcode::G_LOAD
:
2086 MIRBuilder
.buildAnyExt(DstReg
, TmpReg
);
2088 case TargetOpcode::G_SEXTLOAD
:
2089 MIRBuilder
.buildSExt(DstReg
, TmpReg
);
2091 case TargetOpcode::G_ZEXTLOAD
:
2092 MIRBuilder
.buildZExt(DstReg
, TmpReg
);
2095 MI
.eraseFromParent();
2099 return UnableToLegalize
;
2101 case TargetOpcode::G_STORE
: {
2102 // Lower a non-power of 2 store into multiple pow-2 stores.
2103 // E.g. split an i24 store into an i16 store + i8 store.
2104 // We do this by first extending the stored value to the next largest power
2105 // of 2 type, and then using truncating stores to store the components.
2106 // By doing this, likewise with G_LOAD, generate an extend that can be
2107 // artifact-combined away instead of leaving behind extracts.
2108 Register SrcReg
= MI
.getOperand(0).getReg();
2109 Register PtrReg
= MI
.getOperand(1).getReg();
2110 LLT SrcTy
= MRI
.getType(SrcReg
);
2111 MachineMemOperand
&MMO
= **MI
.memoperands_begin();
2112 if (SrcTy
.getSizeInBits() != MMO
.getSizeInBits())
2113 return UnableToLegalize
;
2114 if (SrcTy
.isVector())
2115 return UnableToLegalize
;
2116 if (isPowerOf2_32(SrcTy
.getSizeInBits()))
2117 return UnableToLegalize
; // Don't know what we're being asked to do.
2119 // Extend to the next pow-2.
2120 const LLT ExtendTy
= LLT::scalar(NextPowerOf2(SrcTy
.getSizeInBits()));
2121 auto ExtVal
= MIRBuilder
.buildAnyExt(ExtendTy
, SrcReg
);
2123 // Obtain the smaller value by shifting away the larger value.
2124 uint64_t LargeSplitSize
= PowerOf2Floor(SrcTy
.getSizeInBits());
2125 uint64_t SmallSplitSize
= SrcTy
.getSizeInBits() - LargeSplitSize
;
2126 auto ShiftAmt
= MIRBuilder
.buildConstant(ExtendTy
, LargeSplitSize
);
2127 auto SmallVal
= MIRBuilder
.buildLShr(ExtendTy
, ExtVal
, ShiftAmt
);
2129 // Generate the GEP and truncating stores.
2130 LLT PtrTy
= MRI
.getType(PtrReg
);
2132 MIRBuilder
.buildConstant(LLT::scalar(64), LargeSplitSize
/ 8);
2133 Register GEPReg
= MRI
.createGenericVirtualRegister(PtrTy
);
2134 auto SmallPtr
= MIRBuilder
.buildGEP(GEPReg
, PtrReg
, OffsetCst
.getReg(0));
2136 MachineFunction
&MF
= MIRBuilder
.getMF();
2137 MachineMemOperand
*LargeMMO
=
2138 MF
.getMachineMemOperand(&MMO
, 0, LargeSplitSize
/ 8);
2139 MachineMemOperand
*SmallMMO
=
2140 MF
.getMachineMemOperand(&MMO
, LargeSplitSize
/ 8, SmallSplitSize
/ 8);
2141 MIRBuilder
.buildStore(ExtVal
.getReg(0), PtrReg
, *LargeMMO
);
2142 MIRBuilder
.buildStore(SmallVal
.getReg(0), SmallPtr
.getReg(0), *SmallMMO
);
2143 MI
.eraseFromParent();
2146 case TargetOpcode::G_CTLZ_ZERO_UNDEF
:
2147 case TargetOpcode::G_CTTZ_ZERO_UNDEF
:
2148 case TargetOpcode::G_CTLZ
:
2149 case TargetOpcode::G_CTTZ
:
2150 case TargetOpcode::G_CTPOP
:
2151 return lowerBitCount(MI
, TypeIdx
, Ty
);
2153 Register Res
= MI
.getOperand(0).getReg();
2154 Register CarryOut
= MI
.getOperand(1).getReg();
2155 Register LHS
= MI
.getOperand(2).getReg();
2156 Register RHS
= MI
.getOperand(3).getReg();
2158 MIRBuilder
.buildAdd(Res
, LHS
, RHS
);
2159 MIRBuilder
.buildICmp(CmpInst::ICMP_ULT
, CarryOut
, Res
, RHS
);
2161 MI
.eraseFromParent();
2165 Register Res
= MI
.getOperand(0).getReg();
2166 Register CarryOut
= MI
.getOperand(1).getReg();
2167 Register LHS
= MI
.getOperand(2).getReg();
2168 Register RHS
= MI
.getOperand(3).getReg();
2169 Register CarryIn
= MI
.getOperand(4).getReg();
2171 Register TmpRes
= MRI
.createGenericVirtualRegister(Ty
);
2172 Register ZExtCarryIn
= MRI
.createGenericVirtualRegister(Ty
);
2174 MIRBuilder
.buildAdd(TmpRes
, LHS
, RHS
);
2175 MIRBuilder
.buildZExt(ZExtCarryIn
, CarryIn
);
2176 MIRBuilder
.buildAdd(Res
, TmpRes
, ZExtCarryIn
);
2177 MIRBuilder
.buildICmp(CmpInst::ICMP_ULT
, CarryOut
, Res
, LHS
);
2179 MI
.eraseFromParent();
2183 Register Res
= MI
.getOperand(0).getReg();
2184 Register BorrowOut
= MI
.getOperand(1).getReg();
2185 Register LHS
= MI
.getOperand(2).getReg();
2186 Register RHS
= MI
.getOperand(3).getReg();
2188 MIRBuilder
.buildSub(Res
, LHS
, RHS
);
2189 MIRBuilder
.buildICmp(CmpInst::ICMP_ULT
, BorrowOut
, LHS
, RHS
);
2191 MI
.eraseFromParent();
2195 Register Res
= MI
.getOperand(0).getReg();
2196 Register BorrowOut
= MI
.getOperand(1).getReg();
2197 Register LHS
= MI
.getOperand(2).getReg();
2198 Register RHS
= MI
.getOperand(3).getReg();
2199 Register BorrowIn
= MI
.getOperand(4).getReg();
2201 Register TmpRes
= MRI
.createGenericVirtualRegister(Ty
);
2202 Register ZExtBorrowIn
= MRI
.createGenericVirtualRegister(Ty
);
2203 Register LHS_EQ_RHS
= MRI
.createGenericVirtualRegister(LLT::scalar(1));
2204 Register LHS_ULT_RHS
= MRI
.createGenericVirtualRegister(LLT::scalar(1));
2206 MIRBuilder
.buildSub(TmpRes
, LHS
, RHS
);
2207 MIRBuilder
.buildZExt(ZExtBorrowIn
, BorrowIn
);
2208 MIRBuilder
.buildSub(Res
, TmpRes
, ZExtBorrowIn
);
2209 MIRBuilder
.buildICmp(CmpInst::ICMP_EQ
, LHS_EQ_RHS
, LHS
, RHS
);
2210 MIRBuilder
.buildICmp(CmpInst::ICMP_ULT
, LHS_ULT_RHS
, LHS
, RHS
);
2211 MIRBuilder
.buildSelect(BorrowOut
, LHS_EQ_RHS
, BorrowIn
, LHS_ULT_RHS
);
2213 MI
.eraseFromParent();
2217 return lowerUITOFP(MI
, TypeIdx
, Ty
);
2219 return lowerSITOFP(MI
, TypeIdx
, Ty
);
2221 return lowerFPTOUI(MI
, TypeIdx
, Ty
);
2226 return lowerMinMax(MI
, TypeIdx
, Ty
);
2228 return lowerFCopySign(MI
, TypeIdx
, Ty
);
2231 return lowerFMinNumMaxNum(MI
);
2232 case G_UNMERGE_VALUES
:
2233 return lowerUnmergeValues(MI
);
2234 case TargetOpcode::G_SEXT_INREG
: {
2235 assert(MI
.getOperand(2).isImm() && "Expected immediate");
2236 int64_t SizeInBits
= MI
.getOperand(2).getImm();
2238 Register DstReg
= MI
.getOperand(0).getReg();
2239 Register SrcReg
= MI
.getOperand(1).getReg();
2240 LLT DstTy
= MRI
.getType(DstReg
);
2241 Register TmpRes
= MRI
.createGenericVirtualRegister(DstTy
);
2243 auto MIBSz
= MIRBuilder
.buildConstant(DstTy
, DstTy
.getScalarSizeInBits() - SizeInBits
);
2244 MIRBuilder
.buildInstr(TargetOpcode::G_SHL
, {TmpRes
}, {SrcReg
, MIBSz
->getOperand(0).getReg()});
2245 MIRBuilder
.buildInstr(TargetOpcode::G_ASHR
, {DstReg
}, {TmpRes
, MIBSz
->getOperand(0).getReg()});
2246 MI
.eraseFromParent();
2249 case G_SHUFFLE_VECTOR
:
2250 return lowerShuffleVector(MI
);
2251 case G_DYN_STACKALLOC
:
2252 return lowerDynStackAlloc(MI
);
2254 return lowerExtract(MI
);
2256 return lowerInsert(MI
);
2260 LegalizerHelper::LegalizeResult
LegalizerHelper::fewerElementsVectorImplicitDef(
2261 MachineInstr
&MI
, unsigned TypeIdx
, LLT NarrowTy
) {
2262 SmallVector
<Register
, 2> DstRegs
;
2264 unsigned NarrowSize
= NarrowTy
.getSizeInBits();
2265 Register DstReg
= MI
.getOperand(0).getReg();
2266 unsigned Size
= MRI
.getType(DstReg
).getSizeInBits();
2267 int NumParts
= Size
/ NarrowSize
;
2268 // FIXME: Don't know how to handle the situation where the small vectors
2269 // aren't all the same size yet.
2270 if (Size
% NarrowSize
!= 0)
2271 return UnableToLegalize
;
2273 for (int i
= 0; i
< NumParts
; ++i
) {
2274 Register TmpReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
2275 MIRBuilder
.buildUndef(TmpReg
);
2276 DstRegs
.push_back(TmpReg
);
2279 if (NarrowTy
.isVector())
2280 MIRBuilder
.buildConcatVectors(DstReg
, DstRegs
);
2282 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
2284 MI
.eraseFromParent();
2288 LegalizerHelper::LegalizeResult
2289 LegalizerHelper::fewerElementsVectorBasic(MachineInstr
&MI
, unsigned TypeIdx
,
2291 const unsigned Opc
= MI
.getOpcode();
2292 const unsigned NumOps
= MI
.getNumOperands() - 1;
2293 const unsigned NarrowSize
= NarrowTy
.getSizeInBits();
2294 const Register DstReg
= MI
.getOperand(0).getReg();
2295 const unsigned Flags
= MI
.getFlags();
2296 const LLT DstTy
= MRI
.getType(DstReg
);
2297 const unsigned Size
= DstTy
.getSizeInBits();
2298 const int NumParts
= Size
/ NarrowSize
;
2299 const LLT EltTy
= DstTy
.getElementType();
2300 const unsigned EltSize
= EltTy
.getSizeInBits();
2301 const unsigned BitsForNumParts
= NarrowSize
* NumParts
;
2303 // Check if we have any leftovers. If we do, then only handle the case where
2304 // the leftover is one element.
2305 if (BitsForNumParts
!= Size
&& BitsForNumParts
+ EltSize
!= Size
)
2306 return UnableToLegalize
;
2308 if (BitsForNumParts
!= Size
) {
2309 Register AccumDstReg
= MRI
.createGenericVirtualRegister(DstTy
);
2310 MIRBuilder
.buildUndef(AccumDstReg
);
2312 // Handle the pieces which evenly divide into the requested type with
2313 // extract/op/insert sequence.
2314 for (unsigned Offset
= 0; Offset
< BitsForNumParts
; Offset
+= NarrowSize
) {
2315 SmallVector
<SrcOp
, 4> SrcOps
;
2316 for (unsigned I
= 1, E
= MI
.getNumOperands(); I
!= E
; ++I
) {
2317 Register PartOpReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
2318 MIRBuilder
.buildExtract(PartOpReg
, MI
.getOperand(I
).getReg(), Offset
);
2319 SrcOps
.push_back(PartOpReg
);
2322 Register PartDstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
2323 MIRBuilder
.buildInstr(Opc
, {PartDstReg
}, SrcOps
, Flags
);
2325 Register PartInsertReg
= MRI
.createGenericVirtualRegister(DstTy
);
2326 MIRBuilder
.buildInsert(PartInsertReg
, AccumDstReg
, PartDstReg
, Offset
);
2327 AccumDstReg
= PartInsertReg
;
2330 // Handle the remaining element sized leftover piece.
2331 SmallVector
<SrcOp
, 4> SrcOps
;
2332 for (unsigned I
= 1, E
= MI
.getNumOperands(); I
!= E
; ++I
) {
2333 Register PartOpReg
= MRI
.createGenericVirtualRegister(EltTy
);
2334 MIRBuilder
.buildExtract(PartOpReg
, MI
.getOperand(I
).getReg(),
2336 SrcOps
.push_back(PartOpReg
);
2339 Register PartDstReg
= MRI
.createGenericVirtualRegister(EltTy
);
2340 MIRBuilder
.buildInstr(Opc
, {PartDstReg
}, SrcOps
, Flags
);
2341 MIRBuilder
.buildInsert(DstReg
, AccumDstReg
, PartDstReg
, BitsForNumParts
);
2342 MI
.eraseFromParent();
2347 SmallVector
<Register
, 2> DstRegs
, Src0Regs
, Src1Regs
, Src2Regs
;
2349 extractParts(MI
.getOperand(1).getReg(), NarrowTy
, NumParts
, Src0Regs
);
2352 extractParts(MI
.getOperand(2).getReg(), NarrowTy
, NumParts
, Src1Regs
);
2355 extractParts(MI
.getOperand(3).getReg(), NarrowTy
, NumParts
, Src2Regs
);
2357 for (int i
= 0; i
< NumParts
; ++i
) {
2358 Register DstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
2361 MIRBuilder
.buildInstr(Opc
, {DstReg
}, {Src0Regs
[i
]}, Flags
);
2362 else if (NumOps
== 2) {
2363 MIRBuilder
.buildInstr(Opc
, {DstReg
}, {Src0Regs
[i
], Src1Regs
[i
]}, Flags
);
2364 } else if (NumOps
== 3) {
2365 MIRBuilder
.buildInstr(Opc
, {DstReg
},
2366 {Src0Regs
[i
], Src1Regs
[i
], Src2Regs
[i
]}, Flags
);
2369 DstRegs
.push_back(DstReg
);
2372 if (NarrowTy
.isVector())
2373 MIRBuilder
.buildConcatVectors(DstReg
, DstRegs
);
2375 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
2377 MI
.eraseFromParent();
2381 // Handle splitting vector operations which need to have the same number of
2382 // elements in each type index, but each type index may have a different element
2385 // e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
2386 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2387 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2389 // Also handles some irregular breakdown cases, e.g.
2390 // e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
2391 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2392 // s64 = G_SHL s64, s32
2393 LegalizerHelper::LegalizeResult
2394 LegalizerHelper::fewerElementsVectorMultiEltType(
2395 MachineInstr
&MI
, unsigned TypeIdx
, LLT NarrowTyArg
) {
2397 return UnableToLegalize
;
2399 const LLT NarrowTy0
= NarrowTyArg
;
2400 const unsigned NewNumElts
=
2401 NarrowTy0
.isVector() ? NarrowTy0
.getNumElements() : 1;
2403 const Register DstReg
= MI
.getOperand(0).getReg();
2404 LLT DstTy
= MRI
.getType(DstReg
);
2407 // All of the operands need to have the same number of elements, so if we can
2408 // determine a type breakdown for the result type, we can for all of the
2410 int NumParts
= getNarrowTypeBreakDown(DstTy
, NarrowTy0
, LeftoverTy0
).first
;
2412 return UnableToLegalize
;
2414 SmallVector
<MachineInstrBuilder
, 4> NewInsts
;
2416 SmallVector
<Register
, 4> DstRegs
, LeftoverDstRegs
;
2417 SmallVector
<Register
, 4> PartRegs
, LeftoverRegs
;
2419 for (unsigned I
= 1, E
= MI
.getNumOperands(); I
!= E
; ++I
) {
2421 Register SrcReg
= MI
.getOperand(I
).getReg();
2422 LLT SrcTyI
= MRI
.getType(SrcReg
);
2423 LLT NarrowTyI
= LLT::scalarOrVector(NewNumElts
, SrcTyI
.getScalarType());
2426 // Split this operand into the requested typed registers, and any leftover
2427 // required to reproduce the original type.
2428 if (!extractParts(SrcReg
, SrcTyI
, NarrowTyI
, LeftoverTyI
, PartRegs
,
2430 return UnableToLegalize
;
2433 // For the first operand, create an instruction for each part and setup
2435 for (Register PartReg
: PartRegs
) {
2436 Register PartDstReg
= MRI
.createGenericVirtualRegister(NarrowTy0
);
2437 NewInsts
.push_back(MIRBuilder
.buildInstrNoInsert(MI
.getOpcode())
2440 DstRegs
.push_back(PartDstReg
);
2443 for (Register LeftoverReg
: LeftoverRegs
) {
2444 Register PartDstReg
= MRI
.createGenericVirtualRegister(LeftoverTy0
);
2445 NewInsts
.push_back(MIRBuilder
.buildInstrNoInsert(MI
.getOpcode())
2447 .addUse(LeftoverReg
));
2448 LeftoverDstRegs
.push_back(PartDstReg
);
2451 assert(NewInsts
.size() == PartRegs
.size() + LeftoverRegs
.size());
2453 // Add the newly created operand splits to the existing instructions. The
2454 // odd-sized pieces are ordered after the requested NarrowTyArg sized
2456 unsigned InstCount
= 0;
2457 for (unsigned J
= 0, JE
= PartRegs
.size(); J
!= JE
; ++J
)
2458 NewInsts
[InstCount
++].addUse(PartRegs
[J
]);
2459 for (unsigned J
= 0, JE
= LeftoverRegs
.size(); J
!= JE
; ++J
)
2460 NewInsts
[InstCount
++].addUse(LeftoverRegs
[J
]);
2464 LeftoverRegs
.clear();
2467 // Insert the newly built operations and rebuild the result register.
2468 for (auto &MIB
: NewInsts
)
2469 MIRBuilder
.insertInstr(MIB
);
2471 insertParts(DstReg
, DstTy
, NarrowTy0
, DstRegs
, LeftoverTy0
, LeftoverDstRegs
);
2473 MI
.eraseFromParent();
2477 LegalizerHelper::LegalizeResult
2478 LegalizerHelper::fewerElementsVectorCasts(MachineInstr
&MI
, unsigned TypeIdx
,
2481 return UnableToLegalize
;
2483 Register DstReg
= MI
.getOperand(0).getReg();
2484 Register SrcReg
= MI
.getOperand(1).getReg();
2485 LLT DstTy
= MRI
.getType(DstReg
);
2486 LLT SrcTy
= MRI
.getType(SrcReg
);
2488 LLT NarrowTy0
= NarrowTy
;
2492 if (NarrowTy
.isVector()) {
2493 // Uneven breakdown not handled.
2494 NumParts
= DstTy
.getNumElements() / NarrowTy
.getNumElements();
2495 if (NumParts
* NarrowTy
.getNumElements() != DstTy
.getNumElements())
2496 return UnableToLegalize
;
2498 NarrowTy1
= LLT::vector(NumParts
, SrcTy
.getElementType().getSizeInBits());
2500 NumParts
= DstTy
.getNumElements();
2501 NarrowTy1
= SrcTy
.getElementType();
2504 SmallVector
<Register
, 4> SrcRegs
, DstRegs
;
2505 extractParts(SrcReg
, NarrowTy1
, NumParts
, SrcRegs
);
2507 for (unsigned I
= 0; I
< NumParts
; ++I
) {
2508 Register DstReg
= MRI
.createGenericVirtualRegister(NarrowTy0
);
2509 MachineInstr
*NewInst
= MIRBuilder
.buildInstr(MI
.getOpcode())
2511 .addUse(SrcRegs
[I
]);
2513 NewInst
->setFlags(MI
.getFlags());
2514 DstRegs
.push_back(DstReg
);
2517 if (NarrowTy
.isVector())
2518 MIRBuilder
.buildConcatVectors(DstReg
, DstRegs
);
2520 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
2522 MI
.eraseFromParent();
2526 LegalizerHelper::LegalizeResult
2527 LegalizerHelper::fewerElementsVectorCmp(MachineInstr
&MI
, unsigned TypeIdx
,
2529 Register DstReg
= MI
.getOperand(0).getReg();
2530 Register Src0Reg
= MI
.getOperand(2).getReg();
2531 LLT DstTy
= MRI
.getType(DstReg
);
2532 LLT SrcTy
= MRI
.getType(Src0Reg
);
2535 LLT NarrowTy0
, NarrowTy1
;
2538 unsigned NewElts
= NarrowTy
.isVector() ? NarrowTy
.getNumElements() : 1;
2539 unsigned OldElts
= DstTy
.getNumElements();
2541 NarrowTy0
= NarrowTy
;
2542 NumParts
= NarrowTy
.isVector() ? (OldElts
/ NewElts
) : DstTy
.getNumElements();
2543 NarrowTy1
= NarrowTy
.isVector() ?
2544 LLT::vector(NarrowTy
.getNumElements(), SrcTy
.getScalarSizeInBits()) :
2545 SrcTy
.getElementType();
2548 unsigned NewElts
= NarrowTy
.isVector() ? NarrowTy
.getNumElements() : 1;
2549 unsigned OldElts
= SrcTy
.getNumElements();
2551 NumParts
= NarrowTy
.isVector() ? (OldElts
/ NewElts
) :
2552 NarrowTy
.getNumElements();
2553 NarrowTy0
= LLT::vector(NarrowTy
.getNumElements(),
2554 DstTy
.getScalarSizeInBits());
2555 NarrowTy1
= NarrowTy
;
2558 // FIXME: Don't know how to handle the situation where the small vectors
2559 // aren't all the same size yet.
2560 if (NarrowTy1
.isVector() &&
2561 NarrowTy1
.getNumElements() * NumParts
!= DstTy
.getNumElements())
2562 return UnableToLegalize
;
2564 CmpInst::Predicate Pred
2565 = static_cast<CmpInst::Predicate
>(MI
.getOperand(1).getPredicate());
2567 SmallVector
<Register
, 2> Src1Regs
, Src2Regs
, DstRegs
;
2568 extractParts(MI
.getOperand(2).getReg(), NarrowTy1
, NumParts
, Src1Regs
);
2569 extractParts(MI
.getOperand(3).getReg(), NarrowTy1
, NumParts
, Src2Regs
);
2571 for (unsigned I
= 0; I
< NumParts
; ++I
) {
2572 Register DstReg
= MRI
.createGenericVirtualRegister(NarrowTy0
);
2573 DstRegs
.push_back(DstReg
);
2575 if (MI
.getOpcode() == TargetOpcode::G_ICMP
)
2576 MIRBuilder
.buildICmp(Pred
, DstReg
, Src1Regs
[I
], Src2Regs
[I
]);
2578 MachineInstr
*NewCmp
2579 = MIRBuilder
.buildFCmp(Pred
, DstReg
, Src1Regs
[I
], Src2Regs
[I
]);
2580 NewCmp
->setFlags(MI
.getFlags());
2584 if (NarrowTy1
.isVector())
2585 MIRBuilder
.buildConcatVectors(DstReg
, DstRegs
);
2587 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
2589 MI
.eraseFromParent();
2593 LegalizerHelper::LegalizeResult
2594 LegalizerHelper::fewerElementsVectorSelect(MachineInstr
&MI
, unsigned TypeIdx
,
2596 Register DstReg
= MI
.getOperand(0).getReg();
2597 Register CondReg
= MI
.getOperand(1).getReg();
2599 unsigned NumParts
= 0;
2600 LLT NarrowTy0
, NarrowTy1
;
2602 LLT DstTy
= MRI
.getType(DstReg
);
2603 LLT CondTy
= MRI
.getType(CondReg
);
2604 unsigned Size
= DstTy
.getSizeInBits();
2606 assert(TypeIdx
== 0 || CondTy
.isVector());
2609 NarrowTy0
= NarrowTy
;
2612 unsigned NarrowSize
= NarrowTy0
.getSizeInBits();
2613 // FIXME: Don't know how to handle the situation where the small vectors
2614 // aren't all the same size yet.
2615 if (Size
% NarrowSize
!= 0)
2616 return UnableToLegalize
;
2618 NumParts
= Size
/ NarrowSize
;
2620 // Need to break down the condition type
2621 if (CondTy
.isVector()) {
2622 if (CondTy
.getNumElements() == NumParts
)
2623 NarrowTy1
= CondTy
.getElementType();
2625 NarrowTy1
= LLT::vector(CondTy
.getNumElements() / NumParts
,
2626 CondTy
.getScalarSizeInBits());
2629 NumParts
= CondTy
.getNumElements();
2630 if (NarrowTy
.isVector()) {
2631 // TODO: Handle uneven breakdown.
2632 if (NumParts
* NarrowTy
.getNumElements() != CondTy
.getNumElements())
2633 return UnableToLegalize
;
2635 return UnableToLegalize
;
2637 NarrowTy0
= DstTy
.getElementType();
2638 NarrowTy1
= NarrowTy
;
2642 SmallVector
<Register
, 2> DstRegs
, Src0Regs
, Src1Regs
, Src2Regs
;
2643 if (CondTy
.isVector())
2644 extractParts(MI
.getOperand(1).getReg(), NarrowTy1
, NumParts
, Src0Regs
);
2646 extractParts(MI
.getOperand(2).getReg(), NarrowTy0
, NumParts
, Src1Regs
);
2647 extractParts(MI
.getOperand(3).getReg(), NarrowTy0
, NumParts
, Src2Regs
);
2649 for (unsigned i
= 0; i
< NumParts
; ++i
) {
2650 Register DstReg
= MRI
.createGenericVirtualRegister(NarrowTy0
);
2651 MIRBuilder
.buildSelect(DstReg
, CondTy
.isVector() ? Src0Regs
[i
] : CondReg
,
2652 Src1Regs
[i
], Src2Regs
[i
]);
2653 DstRegs
.push_back(DstReg
);
2656 if (NarrowTy0
.isVector())
2657 MIRBuilder
.buildConcatVectors(DstReg
, DstRegs
);
2659 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
2661 MI
.eraseFromParent();
2665 LegalizerHelper::LegalizeResult
2666 LegalizerHelper::fewerElementsVectorPhi(MachineInstr
&MI
, unsigned TypeIdx
,
2668 const Register DstReg
= MI
.getOperand(0).getReg();
2669 LLT PhiTy
= MRI
.getType(DstReg
);
2672 // All of the operands need to have the same number of elements, so if we can
2673 // determine a type breakdown for the result type, we can for all of the
2675 int NumParts
, NumLeftover
;
2676 std::tie(NumParts
, NumLeftover
)
2677 = getNarrowTypeBreakDown(PhiTy
, NarrowTy
, LeftoverTy
);
2679 return UnableToLegalize
;
2681 SmallVector
<Register
, 4> DstRegs
, LeftoverDstRegs
;
2682 SmallVector
<MachineInstrBuilder
, 4> NewInsts
;
2684 const int TotalNumParts
= NumParts
+ NumLeftover
;
2686 // Insert the new phis in the result block first.
2687 for (int I
= 0; I
!= TotalNumParts
; ++I
) {
2688 LLT Ty
= I
< NumParts
? NarrowTy
: LeftoverTy
;
2689 Register PartDstReg
= MRI
.createGenericVirtualRegister(Ty
);
2690 NewInsts
.push_back(MIRBuilder
.buildInstr(TargetOpcode::G_PHI
)
2691 .addDef(PartDstReg
));
2693 DstRegs
.push_back(PartDstReg
);
2695 LeftoverDstRegs
.push_back(PartDstReg
);
2698 MachineBasicBlock
*MBB
= MI
.getParent();
2699 MIRBuilder
.setInsertPt(*MBB
, MBB
->getFirstNonPHI());
2700 insertParts(DstReg
, PhiTy
, NarrowTy
, DstRegs
, LeftoverTy
, LeftoverDstRegs
);
2702 SmallVector
<Register
, 4> PartRegs
, LeftoverRegs
;
2704 // Insert code to extract the incoming values in each predecessor block.
2705 for (unsigned I
= 1, E
= MI
.getNumOperands(); I
!= E
; I
+= 2) {
2707 LeftoverRegs
.clear();
2709 Register SrcReg
= MI
.getOperand(I
).getReg();
2710 MachineBasicBlock
&OpMBB
= *MI
.getOperand(I
+ 1).getMBB();
2711 MIRBuilder
.setInsertPt(OpMBB
, OpMBB
.getFirstTerminator());
2714 if (!extractParts(SrcReg
, PhiTy
, NarrowTy
, Unused
, PartRegs
,
2716 return UnableToLegalize
;
2718 // Add the newly created operand splits to the existing instructions. The
2719 // odd-sized pieces are ordered after the requested NarrowTyArg sized
2721 for (int J
= 0; J
!= TotalNumParts
; ++J
) {
2722 MachineInstrBuilder MIB
= NewInsts
[J
];
2723 MIB
.addUse(J
< NumParts
? PartRegs
[J
] : LeftoverRegs
[J
- NumParts
]);
2728 MI
.eraseFromParent();
2732 LegalizerHelper::LegalizeResult
2733 LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr
&MI
,
2737 return UnableToLegalize
;
2739 const int NumDst
= MI
.getNumOperands() - 1;
2740 const Register SrcReg
= MI
.getOperand(NumDst
).getReg();
2741 LLT SrcTy
= MRI
.getType(SrcReg
);
2743 LLT DstTy
= MRI
.getType(MI
.getOperand(0).getReg());
2745 // TODO: Create sequence of extracts.
2746 if (DstTy
== NarrowTy
)
2747 return UnableToLegalize
;
2749 LLT GCDTy
= getGCDType(SrcTy
, NarrowTy
);
2750 if (DstTy
== GCDTy
) {
2751 // This would just be a copy of the same unmerge.
2752 // TODO: Create extracts, pad with undef and create intermediate merges.
2753 return UnableToLegalize
;
2756 auto Unmerge
= MIRBuilder
.buildUnmerge(GCDTy
, SrcReg
);
2757 const int NumUnmerge
= Unmerge
->getNumOperands() - 1;
2758 const int PartsPerUnmerge
= NumDst
/ NumUnmerge
;
2760 for (int I
= 0; I
!= NumUnmerge
; ++I
) {
2761 auto MIB
= MIRBuilder
.buildInstr(TargetOpcode::G_UNMERGE_VALUES
);
2763 for (int J
= 0; J
!= PartsPerUnmerge
; ++J
)
2764 MIB
.addDef(MI
.getOperand(I
* PartsPerUnmerge
+ J
).getReg());
2765 MIB
.addUse(Unmerge
.getReg(I
));
2768 MI
.eraseFromParent();
2772 LegalizerHelper::LegalizeResult
2773 LegalizerHelper::fewerElementsVectorBuildVector(MachineInstr
&MI
,
2776 assert(TypeIdx
== 0 && "not a vector type index");
2777 Register DstReg
= MI
.getOperand(0).getReg();
2778 LLT DstTy
= MRI
.getType(DstReg
);
2779 LLT SrcTy
= DstTy
.getElementType();
2781 int DstNumElts
= DstTy
.getNumElements();
2782 int NarrowNumElts
= NarrowTy
.getNumElements();
2783 int NumConcat
= (DstNumElts
+ NarrowNumElts
- 1) / NarrowNumElts
;
2784 LLT WidenedDstTy
= LLT::vector(NarrowNumElts
* NumConcat
, SrcTy
);
2786 SmallVector
<Register
, 8> ConcatOps
;
2787 SmallVector
<Register
, 8> SubBuildVector
;
2790 if (WidenedDstTy
!= DstTy
)
2791 UndefReg
= MIRBuilder
.buildUndef(SrcTy
).getReg(0);
2793 // Create a G_CONCAT_VECTORS of NarrowTy pieces, padding with undef as
2796 // %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2
2799 // %4:_(s16) = G_IMPLICIT_DEF
2800 // %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1
2801 // %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4
2802 // %7:_(<4 x s16>) = G_CONCAT_VECTORS %5, %6
2803 // %3:_(<3 x s16>) = G_EXTRACT %7, 0
2804 for (int I
= 0; I
!= NumConcat
; ++I
) {
2805 for (int J
= 0; J
!= NarrowNumElts
; ++J
) {
2806 int SrcIdx
= NarrowNumElts
* I
+ J
;
2808 if (SrcIdx
< DstNumElts
) {
2809 Register SrcReg
= MI
.getOperand(SrcIdx
+ 1).getReg();
2810 SubBuildVector
.push_back(SrcReg
);
2812 SubBuildVector
.push_back(UndefReg
);
2815 auto BuildVec
= MIRBuilder
.buildBuildVector(NarrowTy
, SubBuildVector
);
2816 ConcatOps
.push_back(BuildVec
.getReg(0));
2817 SubBuildVector
.clear();
2820 if (DstTy
== WidenedDstTy
)
2821 MIRBuilder
.buildConcatVectors(DstReg
, ConcatOps
);
2823 auto Concat
= MIRBuilder
.buildConcatVectors(WidenedDstTy
, ConcatOps
);
2824 MIRBuilder
.buildExtract(DstReg
, Concat
, 0);
2827 MI
.eraseFromParent();
2831 LegalizerHelper::LegalizeResult
2832 LegalizerHelper::reduceLoadStoreWidth(MachineInstr
&MI
, unsigned TypeIdx
,
2834 // FIXME: Don't know how to handle secondary types yet.
2836 return UnableToLegalize
;
2838 MachineMemOperand
*MMO
= *MI
.memoperands_begin();
2840 // This implementation doesn't work for atomics. Give up instead of doing
2841 // something invalid.
2842 if (MMO
->getOrdering() != AtomicOrdering::NotAtomic
||
2843 MMO
->getFailureOrdering() != AtomicOrdering::NotAtomic
)
2844 return UnableToLegalize
;
2846 bool IsLoad
= MI
.getOpcode() == TargetOpcode::G_LOAD
;
2847 Register ValReg
= MI
.getOperand(0).getReg();
2848 Register AddrReg
= MI
.getOperand(1).getReg();
2849 LLT ValTy
= MRI
.getType(ValReg
);
2852 int NumLeftover
= -1;
2854 SmallVector
<Register
, 8> NarrowRegs
, NarrowLeftoverRegs
;
2856 std::tie(NumParts
, NumLeftover
) = getNarrowTypeBreakDown(ValTy
, NarrowTy
, LeftoverTy
);
2858 if (extractParts(ValReg
, ValTy
, NarrowTy
, LeftoverTy
, NarrowRegs
,
2859 NarrowLeftoverRegs
)) {
2860 NumParts
= NarrowRegs
.size();
2861 NumLeftover
= NarrowLeftoverRegs
.size();
2866 return UnableToLegalize
;
2868 const LLT OffsetTy
= LLT::scalar(MRI
.getType(AddrReg
).getScalarSizeInBits());
2870 unsigned TotalSize
= ValTy
.getSizeInBits();
2872 // Split the load/store into PartTy sized pieces starting at Offset. If this
2873 // is a load, return the new registers in ValRegs. For a store, each elements
2874 // of ValRegs should be PartTy. Returns the next offset that needs to be
2876 auto splitTypePieces
= [=](LLT PartTy
, SmallVectorImpl
<Register
> &ValRegs
,
2877 unsigned Offset
) -> unsigned {
2878 MachineFunction
&MF
= MIRBuilder
.getMF();
2879 unsigned PartSize
= PartTy
.getSizeInBits();
2880 for (unsigned Idx
= 0, E
= NumParts
; Idx
!= E
&& Offset
< TotalSize
;
2881 Offset
+= PartSize
, ++Idx
) {
2882 unsigned ByteSize
= PartSize
/ 8;
2883 unsigned ByteOffset
= Offset
/ 8;
2884 Register NewAddrReg
;
2886 MIRBuilder
.materializeGEP(NewAddrReg
, AddrReg
, OffsetTy
, ByteOffset
);
2888 MachineMemOperand
*NewMMO
=
2889 MF
.getMachineMemOperand(MMO
, ByteOffset
, ByteSize
);
2892 Register Dst
= MRI
.createGenericVirtualRegister(PartTy
);
2893 ValRegs
.push_back(Dst
);
2894 MIRBuilder
.buildLoad(Dst
, NewAddrReg
, *NewMMO
);
2896 MIRBuilder
.buildStore(ValRegs
[Idx
], NewAddrReg
, *NewMMO
);
2903 unsigned HandledOffset
= splitTypePieces(NarrowTy
, NarrowRegs
, 0);
2905 // Handle the rest of the register if this isn't an even type breakdown.
2906 if (LeftoverTy
.isValid())
2907 splitTypePieces(LeftoverTy
, NarrowLeftoverRegs
, HandledOffset
);
2910 insertParts(ValReg
, ValTy
, NarrowTy
, NarrowRegs
,
2911 LeftoverTy
, NarrowLeftoverRegs
);
2914 MI
.eraseFromParent();
2918 LegalizerHelper::LegalizeResult
2919 LegalizerHelper::fewerElementsVector(MachineInstr
&MI
, unsigned TypeIdx
,
2921 using namespace TargetOpcode
;
2923 MIRBuilder
.setInstr(MI
);
2924 switch (MI
.getOpcode()) {
2925 case G_IMPLICIT_DEF
:
2926 return fewerElementsVectorImplicitDef(MI
, TypeIdx
, NarrowTy
);
2940 case G_FCANONICALIZE
:
2955 case G_INTRINSIC_ROUND
:
2956 case G_INTRINSIC_TRUNC
:
2969 case G_FMINNUM_IEEE
:
2970 case G_FMAXNUM_IEEE
:
2973 return fewerElementsVectorBasic(MI
, TypeIdx
, NarrowTy
);
2978 case G_CTLZ_ZERO_UNDEF
:
2980 case G_CTTZ_ZERO_UNDEF
:
2983 return fewerElementsVectorMultiEltType(MI
, TypeIdx
, NarrowTy
);
2995 case G_ADDRSPACE_CAST
:
2996 return fewerElementsVectorCasts(MI
, TypeIdx
, NarrowTy
);
2999 return fewerElementsVectorCmp(MI
, TypeIdx
, NarrowTy
);
3001 return fewerElementsVectorSelect(MI
, TypeIdx
, NarrowTy
);
3003 return fewerElementsVectorPhi(MI
, TypeIdx
, NarrowTy
);
3004 case G_UNMERGE_VALUES
:
3005 return fewerElementsVectorUnmergeValues(MI
, TypeIdx
, NarrowTy
);
3006 case G_BUILD_VECTOR
:
3007 return fewerElementsVectorBuildVector(MI
, TypeIdx
, NarrowTy
);
3010 return reduceLoadStoreWidth(MI
, TypeIdx
, NarrowTy
);
3012 return UnableToLegalize
;
3016 LegalizerHelper::LegalizeResult
3017 LegalizerHelper::narrowScalarShiftByConstant(MachineInstr
&MI
, const APInt
&Amt
,
3018 const LLT HalfTy
, const LLT AmtTy
) {
3020 Register InL
= MRI
.createGenericVirtualRegister(HalfTy
);
3021 Register InH
= MRI
.createGenericVirtualRegister(HalfTy
);
3022 MIRBuilder
.buildUnmerge({InL
, InH
}, MI
.getOperand(1).getReg());
3024 if (Amt
.isNullValue()) {
3025 MIRBuilder
.buildMerge(MI
.getOperand(0).getReg(), {InL
, InH
});
3026 MI
.eraseFromParent();
3031 unsigned NVTBits
= HalfTy
.getSizeInBits();
3032 unsigned VTBits
= 2 * NVTBits
;
3034 SrcOp
Lo(Register(0)), Hi(Register(0));
3035 if (MI
.getOpcode() == TargetOpcode::G_SHL
) {
3036 if (Amt
.ugt(VTBits
)) {
3037 Lo
= Hi
= MIRBuilder
.buildConstant(NVT
, 0);
3038 } else if (Amt
.ugt(NVTBits
)) {
3039 Lo
= MIRBuilder
.buildConstant(NVT
, 0);
3040 Hi
= MIRBuilder
.buildShl(NVT
, InL
,
3041 MIRBuilder
.buildConstant(AmtTy
, Amt
- NVTBits
));
3042 } else if (Amt
== NVTBits
) {
3043 Lo
= MIRBuilder
.buildConstant(NVT
, 0);
3046 Lo
= MIRBuilder
.buildShl(NVT
, InL
, MIRBuilder
.buildConstant(AmtTy
, Amt
));
3048 MIRBuilder
.buildShl(NVT
, InH
, MIRBuilder
.buildConstant(AmtTy
, Amt
));
3049 auto OrRHS
= MIRBuilder
.buildLShr(
3050 NVT
, InL
, MIRBuilder
.buildConstant(AmtTy
, -Amt
+ NVTBits
));
3051 Hi
= MIRBuilder
.buildOr(NVT
, OrLHS
, OrRHS
);
3053 } else if (MI
.getOpcode() == TargetOpcode::G_LSHR
) {
3054 if (Amt
.ugt(VTBits
)) {
3055 Lo
= Hi
= MIRBuilder
.buildConstant(NVT
, 0);
3056 } else if (Amt
.ugt(NVTBits
)) {
3057 Lo
= MIRBuilder
.buildLShr(NVT
, InH
,
3058 MIRBuilder
.buildConstant(AmtTy
, Amt
- NVTBits
));
3059 Hi
= MIRBuilder
.buildConstant(NVT
, 0);
3060 } else if (Amt
== NVTBits
) {
3062 Hi
= MIRBuilder
.buildConstant(NVT
, 0);
3064 auto ShiftAmtConst
= MIRBuilder
.buildConstant(AmtTy
, Amt
);
3066 auto OrLHS
= MIRBuilder
.buildLShr(NVT
, InL
, ShiftAmtConst
);
3067 auto OrRHS
= MIRBuilder
.buildShl(
3068 NVT
, InH
, MIRBuilder
.buildConstant(AmtTy
, -Amt
+ NVTBits
));
3070 Lo
= MIRBuilder
.buildOr(NVT
, OrLHS
, OrRHS
);
3071 Hi
= MIRBuilder
.buildLShr(NVT
, InH
, ShiftAmtConst
);
3074 if (Amt
.ugt(VTBits
)) {
3075 Hi
= Lo
= MIRBuilder
.buildAShr(
3076 NVT
, InH
, MIRBuilder
.buildConstant(AmtTy
, NVTBits
- 1));
3077 } else if (Amt
.ugt(NVTBits
)) {
3078 Lo
= MIRBuilder
.buildAShr(NVT
, InH
,
3079 MIRBuilder
.buildConstant(AmtTy
, Amt
- NVTBits
));
3080 Hi
= MIRBuilder
.buildAShr(NVT
, InH
,
3081 MIRBuilder
.buildConstant(AmtTy
, NVTBits
- 1));
3082 } else if (Amt
== NVTBits
) {
3084 Hi
= MIRBuilder
.buildAShr(NVT
, InH
,
3085 MIRBuilder
.buildConstant(AmtTy
, NVTBits
- 1));
3087 auto ShiftAmtConst
= MIRBuilder
.buildConstant(AmtTy
, Amt
);
3089 auto OrLHS
= MIRBuilder
.buildLShr(NVT
, InL
, ShiftAmtConst
);
3090 auto OrRHS
= MIRBuilder
.buildShl(
3091 NVT
, InH
, MIRBuilder
.buildConstant(AmtTy
, -Amt
+ NVTBits
));
3093 Lo
= MIRBuilder
.buildOr(NVT
, OrLHS
, OrRHS
);
3094 Hi
= MIRBuilder
.buildAShr(NVT
, InH
, ShiftAmtConst
);
3098 MIRBuilder
.buildMerge(MI
.getOperand(0).getReg(), {Lo
.getReg(), Hi
.getReg()});
3099 MI
.eraseFromParent();
3104 // TODO: Optimize if constant shift amount.
3105 LegalizerHelper::LegalizeResult
3106 LegalizerHelper::narrowScalarShift(MachineInstr
&MI
, unsigned TypeIdx
,
3109 Observer
.changingInstr(MI
);
3110 narrowScalarSrc(MI
, RequestedTy
, 2);
3111 Observer
.changedInstr(MI
);
3115 Register DstReg
= MI
.getOperand(0).getReg();
3116 LLT DstTy
= MRI
.getType(DstReg
);
3117 if (DstTy
.isVector())
3118 return UnableToLegalize
;
3120 Register Amt
= MI
.getOperand(2).getReg();
3121 LLT ShiftAmtTy
= MRI
.getType(Amt
);
3122 const unsigned DstEltSize
= DstTy
.getScalarSizeInBits();
3123 if (DstEltSize
% 2 != 0)
3124 return UnableToLegalize
;
3126 // Ignore the input type. We can only go to exactly half the size of the
3127 // input. If that isn't small enough, the resulting pieces will be further
3129 const unsigned NewBitSize
= DstEltSize
/ 2;
3130 const LLT HalfTy
= LLT::scalar(NewBitSize
);
3131 const LLT CondTy
= LLT::scalar(1);
3133 if (const MachineInstr
*KShiftAmt
=
3134 getOpcodeDef(TargetOpcode::G_CONSTANT
, Amt
, MRI
)) {
3135 return narrowScalarShiftByConstant(
3136 MI
, KShiftAmt
->getOperand(1).getCImm()->getValue(), HalfTy
, ShiftAmtTy
);
3139 // TODO: Expand with known bits.
3141 // Handle the fully general expansion by an unknown amount.
3142 auto NewBits
= MIRBuilder
.buildConstant(ShiftAmtTy
, NewBitSize
);
3144 Register InL
= MRI
.createGenericVirtualRegister(HalfTy
);
3145 Register InH
= MRI
.createGenericVirtualRegister(HalfTy
);
3146 MIRBuilder
.buildUnmerge({InL
, InH
}, MI
.getOperand(1).getReg());
3148 auto AmtExcess
= MIRBuilder
.buildSub(ShiftAmtTy
, Amt
, NewBits
);
3149 auto AmtLack
= MIRBuilder
.buildSub(ShiftAmtTy
, NewBits
, Amt
);
3151 auto Zero
= MIRBuilder
.buildConstant(ShiftAmtTy
, 0);
3152 auto IsShort
= MIRBuilder
.buildICmp(ICmpInst::ICMP_ULT
, CondTy
, Amt
, NewBits
);
3153 auto IsZero
= MIRBuilder
.buildICmp(ICmpInst::ICMP_EQ
, CondTy
, Amt
, Zero
);
3155 Register ResultRegs
[2];
3156 switch (MI
.getOpcode()) {
3157 case TargetOpcode::G_SHL
: {
3158 // Short: ShAmt < NewBitSize
3159 auto LoS
= MIRBuilder
.buildShl(HalfTy
, InL
, Amt
);
3161 auto LoOr
= MIRBuilder
.buildLShr(HalfTy
, InL
, AmtLack
);
3162 auto HiOr
= MIRBuilder
.buildShl(HalfTy
, InH
, Amt
);
3163 auto HiS
= MIRBuilder
.buildOr(HalfTy
, LoOr
, HiOr
);
3165 // Long: ShAmt >= NewBitSize
3166 auto LoL
= MIRBuilder
.buildConstant(HalfTy
, 0); // Lo part is zero.
3167 auto HiL
= MIRBuilder
.buildShl(HalfTy
, InL
, AmtExcess
); // Hi from Lo part.
3169 auto Lo
= MIRBuilder
.buildSelect(HalfTy
, IsShort
, LoS
, LoL
);
3170 auto Hi
= MIRBuilder
.buildSelect(
3171 HalfTy
, IsZero
, InH
, MIRBuilder
.buildSelect(HalfTy
, IsShort
, HiS
, HiL
));
3173 ResultRegs
[0] = Lo
.getReg(0);
3174 ResultRegs
[1] = Hi
.getReg(0);
3177 case TargetOpcode::G_LSHR
:
3178 case TargetOpcode::G_ASHR
: {
3179 // Short: ShAmt < NewBitSize
3180 auto HiS
= MIRBuilder
.buildInstr(MI
.getOpcode(), {HalfTy
}, {InH
, Amt
});
3182 auto LoOr
= MIRBuilder
.buildLShr(HalfTy
, InL
, Amt
);
3183 auto HiOr
= MIRBuilder
.buildShl(HalfTy
, InH
, AmtLack
);
3184 auto LoS
= MIRBuilder
.buildOr(HalfTy
, LoOr
, HiOr
);
3186 // Long: ShAmt >= NewBitSize
3187 MachineInstrBuilder HiL
;
3188 if (MI
.getOpcode() == TargetOpcode::G_LSHR
) {
3189 HiL
= MIRBuilder
.buildConstant(HalfTy
, 0); // Hi part is zero.
3191 auto ShiftAmt
= MIRBuilder
.buildConstant(ShiftAmtTy
, NewBitSize
- 1);
3192 HiL
= MIRBuilder
.buildAShr(HalfTy
, InH
, ShiftAmt
); // Sign of Hi part.
3194 auto LoL
= MIRBuilder
.buildInstr(MI
.getOpcode(), {HalfTy
},
3195 {InH
, AmtExcess
}); // Lo from Hi part.
3197 auto Lo
= MIRBuilder
.buildSelect(
3198 HalfTy
, IsZero
, InL
, MIRBuilder
.buildSelect(HalfTy
, IsShort
, LoS
, LoL
));
3200 auto Hi
= MIRBuilder
.buildSelect(HalfTy
, IsShort
, HiS
, HiL
);
3202 ResultRegs
[0] = Lo
.getReg(0);
3203 ResultRegs
[1] = Hi
.getReg(0);
3207 llvm_unreachable("not a shift");
3210 MIRBuilder
.buildMerge(DstReg
, ResultRegs
);
3211 MI
.eraseFromParent();
3215 LegalizerHelper::LegalizeResult
3216 LegalizerHelper::moreElementsVectorPhi(MachineInstr
&MI
, unsigned TypeIdx
,
3218 assert(TypeIdx
== 0 && "Expecting only Idx 0");
3220 Observer
.changingInstr(MI
);
3221 for (unsigned I
= 1, E
= MI
.getNumOperands(); I
!= E
; I
+= 2) {
3222 MachineBasicBlock
&OpMBB
= *MI
.getOperand(I
+ 1).getMBB();
3223 MIRBuilder
.setInsertPt(OpMBB
, OpMBB
.getFirstTerminator());
3224 moreElementsVectorSrc(MI
, MoreTy
, I
);
3227 MachineBasicBlock
&MBB
= *MI
.getParent();
3228 MIRBuilder
.setInsertPt(MBB
, --MBB
.getFirstNonPHI());
3229 moreElementsVectorDst(MI
, MoreTy
, 0);
3230 Observer
.changedInstr(MI
);
3234 LegalizerHelper::LegalizeResult
3235 LegalizerHelper::moreElementsVector(MachineInstr
&MI
, unsigned TypeIdx
,
3237 MIRBuilder
.setInstr(MI
);
3238 unsigned Opc
= MI
.getOpcode();
3240 case TargetOpcode::G_IMPLICIT_DEF
:
3241 case TargetOpcode::G_LOAD
: {
3243 return UnableToLegalize
;
3244 Observer
.changingInstr(MI
);
3245 moreElementsVectorDst(MI
, MoreTy
, 0);
3246 Observer
.changedInstr(MI
);
3249 case TargetOpcode::G_STORE
:
3251 return UnableToLegalize
;
3252 Observer
.changingInstr(MI
);
3253 moreElementsVectorSrc(MI
, MoreTy
, 0);
3254 Observer
.changedInstr(MI
);
3256 case TargetOpcode::G_AND
:
3257 case TargetOpcode::G_OR
:
3258 case TargetOpcode::G_XOR
:
3259 case TargetOpcode::G_SMIN
:
3260 case TargetOpcode::G_SMAX
:
3261 case TargetOpcode::G_UMIN
:
3262 case TargetOpcode::G_UMAX
: {
3263 Observer
.changingInstr(MI
);
3264 moreElementsVectorSrc(MI
, MoreTy
, 1);
3265 moreElementsVectorSrc(MI
, MoreTy
, 2);
3266 moreElementsVectorDst(MI
, MoreTy
, 0);
3267 Observer
.changedInstr(MI
);
3270 case TargetOpcode::G_EXTRACT
:
3272 return UnableToLegalize
;
3273 Observer
.changingInstr(MI
);
3274 moreElementsVectorSrc(MI
, MoreTy
, 1);
3275 Observer
.changedInstr(MI
);
3277 case TargetOpcode::G_INSERT
:
3279 return UnableToLegalize
;
3280 Observer
.changingInstr(MI
);
3281 moreElementsVectorSrc(MI
, MoreTy
, 1);
3282 moreElementsVectorDst(MI
, MoreTy
, 0);
3283 Observer
.changedInstr(MI
);
3285 case TargetOpcode::G_SELECT
:
3287 return UnableToLegalize
;
3288 if (MRI
.getType(MI
.getOperand(1).getReg()).isVector())
3289 return UnableToLegalize
;
3291 Observer
.changingInstr(MI
);
3292 moreElementsVectorSrc(MI
, MoreTy
, 2);
3293 moreElementsVectorSrc(MI
, MoreTy
, 3);
3294 moreElementsVectorDst(MI
, MoreTy
, 0);
3295 Observer
.changedInstr(MI
);
3297 case TargetOpcode::G_UNMERGE_VALUES
: {
3299 return UnableToLegalize
;
3301 LLT DstTy
= MRI
.getType(MI
.getOperand(0).getReg());
3302 int NumDst
= MI
.getNumOperands() - 1;
3303 moreElementsVectorSrc(MI
, MoreTy
, NumDst
);
3305 auto MIB
= MIRBuilder
.buildInstr(TargetOpcode::G_UNMERGE_VALUES
);
3306 for (int I
= 0; I
!= NumDst
; ++I
)
3307 MIB
.addDef(MI
.getOperand(I
).getReg());
3309 int NewNumDst
= MoreTy
.getSizeInBits() / DstTy
.getSizeInBits();
3310 for (int I
= NumDst
; I
!= NewNumDst
; ++I
)
3311 MIB
.addDef(MRI
.createGenericVirtualRegister(DstTy
));
3313 MIB
.addUse(MI
.getOperand(NumDst
).getReg());
3314 MI
.eraseFromParent();
3317 case TargetOpcode::G_PHI
:
3318 return moreElementsVectorPhi(MI
, TypeIdx
, MoreTy
);
3320 return UnableToLegalize
;
3324 void LegalizerHelper::multiplyRegisters(SmallVectorImpl
<Register
> &DstRegs
,
3325 ArrayRef
<Register
> Src1Regs
,
3326 ArrayRef
<Register
> Src2Regs
,
3328 MachineIRBuilder
&B
= MIRBuilder
;
3329 unsigned SrcParts
= Src1Regs
.size();
3330 unsigned DstParts
= DstRegs
.size();
3332 unsigned DstIdx
= 0; // Low bits of the result.
3333 Register FactorSum
=
3334 B
.buildMul(NarrowTy
, Src1Regs
[DstIdx
], Src2Regs
[DstIdx
]).getReg(0);
3335 DstRegs
[DstIdx
] = FactorSum
;
3337 unsigned CarrySumPrevDstIdx
;
3338 SmallVector
<Register
, 4> Factors
;
3340 for (DstIdx
= 1; DstIdx
< DstParts
; DstIdx
++) {
3341 // Collect low parts of muls for DstIdx.
3342 for (unsigned i
= DstIdx
+ 1 < SrcParts
? 0 : DstIdx
- SrcParts
+ 1;
3343 i
<= std::min(DstIdx
, SrcParts
- 1); ++i
) {
3344 MachineInstrBuilder Mul
=
3345 B
.buildMul(NarrowTy
, Src1Regs
[DstIdx
- i
], Src2Regs
[i
]);
3346 Factors
.push_back(Mul
.getReg(0));
3348 // Collect high parts of muls from previous DstIdx.
3349 for (unsigned i
= DstIdx
< SrcParts
? 0 : DstIdx
- SrcParts
;
3350 i
<= std::min(DstIdx
- 1, SrcParts
- 1); ++i
) {
3351 MachineInstrBuilder Umulh
=
3352 B
.buildUMulH(NarrowTy
, Src1Regs
[DstIdx
- 1 - i
], Src2Regs
[i
]);
3353 Factors
.push_back(Umulh
.getReg(0));
3355 // Add CarrySum from additons calculated for previous DstIdx.
3357 Factors
.push_back(CarrySumPrevDstIdx
);
3361 // Add all factors and accumulate all carries into CarrySum.
3362 if (DstIdx
!= DstParts
- 1) {
3363 MachineInstrBuilder Uaddo
=
3364 B
.buildUAddo(NarrowTy
, LLT::scalar(1), Factors
[0], Factors
[1]);
3365 FactorSum
= Uaddo
.getReg(0);
3366 CarrySum
= B
.buildZExt(NarrowTy
, Uaddo
.getReg(1)).getReg(0);
3367 for (unsigned i
= 2; i
< Factors
.size(); ++i
) {
3368 MachineInstrBuilder Uaddo
=
3369 B
.buildUAddo(NarrowTy
, LLT::scalar(1), FactorSum
, Factors
[i
]);
3370 FactorSum
= Uaddo
.getReg(0);
3371 MachineInstrBuilder Carry
= B
.buildZExt(NarrowTy
, Uaddo
.getReg(1));
3372 CarrySum
= B
.buildAdd(NarrowTy
, CarrySum
, Carry
).getReg(0);
3375 // Since value for the next index is not calculated, neither is CarrySum.
3376 FactorSum
= B
.buildAdd(NarrowTy
, Factors
[0], Factors
[1]).getReg(0);
3377 for (unsigned i
= 2; i
< Factors
.size(); ++i
)
3378 FactorSum
= B
.buildAdd(NarrowTy
, FactorSum
, Factors
[i
]).getReg(0);
3381 CarrySumPrevDstIdx
= CarrySum
;
3382 DstRegs
[DstIdx
] = FactorSum
;
3387 LegalizerHelper::LegalizeResult
3388 LegalizerHelper::narrowScalarMul(MachineInstr
&MI
, LLT NarrowTy
) {
3389 Register DstReg
= MI
.getOperand(0).getReg();
3390 Register Src1
= MI
.getOperand(1).getReg();
3391 Register Src2
= MI
.getOperand(2).getReg();
3393 LLT Ty
= MRI
.getType(DstReg
);
3395 return UnableToLegalize
;
3397 unsigned SrcSize
= MRI
.getType(Src1
).getSizeInBits();
3398 unsigned DstSize
= Ty
.getSizeInBits();
3399 unsigned NarrowSize
= NarrowTy
.getSizeInBits();
3400 if (DstSize
% NarrowSize
!= 0 || SrcSize
% NarrowSize
!= 0)
3401 return UnableToLegalize
;
3403 unsigned NumDstParts
= DstSize
/ NarrowSize
;
3404 unsigned NumSrcParts
= SrcSize
/ NarrowSize
;
3405 bool IsMulHigh
= MI
.getOpcode() == TargetOpcode::G_UMULH
;
3406 unsigned DstTmpParts
= NumDstParts
* (IsMulHigh
? 2 : 1);
3408 SmallVector
<Register
, 2> Src1Parts
, Src2Parts
, DstTmpRegs
;
3409 extractParts(Src1
, NarrowTy
, NumSrcParts
, Src1Parts
);
3410 extractParts(Src2
, NarrowTy
, NumSrcParts
, Src2Parts
);
3411 DstTmpRegs
.resize(DstTmpParts
);
3412 multiplyRegisters(DstTmpRegs
, Src1Parts
, Src2Parts
, NarrowTy
);
3414 // Take only high half of registers if this is high mul.
3415 ArrayRef
<Register
> DstRegs(
3416 IsMulHigh
? &DstTmpRegs
[DstTmpParts
/ 2] : &DstTmpRegs
[0], NumDstParts
);
3417 MIRBuilder
.buildMerge(DstReg
, DstRegs
);
3418 MI
.eraseFromParent();
3422 LegalizerHelper::LegalizeResult
3423 LegalizerHelper::narrowScalarExtract(MachineInstr
&MI
, unsigned TypeIdx
,
3426 return UnableToLegalize
;
3428 uint64_t NarrowSize
= NarrowTy
.getSizeInBits();
3430 int64_t SizeOp1
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
3431 // FIXME: add support for when SizeOp1 isn't an exact multiple of
3433 if (SizeOp1
% NarrowSize
!= 0)
3434 return UnableToLegalize
;
3435 int NumParts
= SizeOp1
/ NarrowSize
;
3437 SmallVector
<Register
, 2> SrcRegs
, DstRegs
;
3438 SmallVector
<uint64_t, 2> Indexes
;
3439 extractParts(MI
.getOperand(1).getReg(), NarrowTy
, NumParts
, SrcRegs
);
3441 Register OpReg
= MI
.getOperand(0).getReg();
3442 uint64_t OpStart
= MI
.getOperand(2).getImm();
3443 uint64_t OpSize
= MRI
.getType(OpReg
).getSizeInBits();
3444 for (int i
= 0; i
< NumParts
; ++i
) {
3445 unsigned SrcStart
= i
* NarrowSize
;
3447 if (SrcStart
+ NarrowSize
<= OpStart
|| SrcStart
>= OpStart
+ OpSize
) {
3448 // No part of the extract uses this subregister, ignore it.
3450 } else if (SrcStart
== OpStart
&& NarrowTy
== MRI
.getType(OpReg
)) {
3451 // The entire subregister is extracted, forward the value.
3452 DstRegs
.push_back(SrcRegs
[i
]);
3456 // OpSegStart is where this destination segment would start in OpReg if it
3457 // extended infinitely in both directions.
3458 int64_t ExtractOffset
;
3460 if (OpStart
< SrcStart
) {
3462 SegSize
= std::min(NarrowSize
, OpStart
+ OpSize
- SrcStart
);
3464 ExtractOffset
= OpStart
- SrcStart
;
3465 SegSize
= std::min(SrcStart
+ NarrowSize
- OpStart
, OpSize
);
3468 Register SegReg
= SrcRegs
[i
];
3469 if (ExtractOffset
!= 0 || SegSize
!= NarrowSize
) {
3470 // A genuine extract is needed.
3471 SegReg
= MRI
.createGenericVirtualRegister(LLT::scalar(SegSize
));
3472 MIRBuilder
.buildExtract(SegReg
, SrcRegs
[i
], ExtractOffset
);
3475 DstRegs
.push_back(SegReg
);
3478 Register DstReg
= MI
.getOperand(0).getReg();
3479 if(MRI
.getType(DstReg
).isVector())
3480 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
3482 MIRBuilder
.buildMerge(DstReg
, DstRegs
);
3483 MI
.eraseFromParent();
3487 LegalizerHelper::LegalizeResult
3488 LegalizerHelper::narrowScalarInsert(MachineInstr
&MI
, unsigned TypeIdx
,
3490 // FIXME: Don't know how to handle secondary types yet.
3492 return UnableToLegalize
;
3494 uint64_t SizeOp0
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
3495 uint64_t NarrowSize
= NarrowTy
.getSizeInBits();
3497 // FIXME: add support for when SizeOp0 isn't an exact multiple of
3499 if (SizeOp0
% NarrowSize
!= 0)
3500 return UnableToLegalize
;
3502 int NumParts
= SizeOp0
/ NarrowSize
;
3504 SmallVector
<Register
, 2> SrcRegs
, DstRegs
;
3505 SmallVector
<uint64_t, 2> Indexes
;
3506 extractParts(MI
.getOperand(1).getReg(), NarrowTy
, NumParts
, SrcRegs
);
3508 Register OpReg
= MI
.getOperand(2).getReg();
3509 uint64_t OpStart
= MI
.getOperand(3).getImm();
3510 uint64_t OpSize
= MRI
.getType(OpReg
).getSizeInBits();
3511 for (int i
= 0; i
< NumParts
; ++i
) {
3512 unsigned DstStart
= i
* NarrowSize
;
3514 if (DstStart
+ NarrowSize
<= OpStart
|| DstStart
>= OpStart
+ OpSize
) {
3515 // No part of the insert affects this subregister, forward the original.
3516 DstRegs
.push_back(SrcRegs
[i
]);
3518 } else if (DstStart
== OpStart
&& NarrowTy
== MRI
.getType(OpReg
)) {
3519 // The entire subregister is defined by this insert, forward the new
3521 DstRegs
.push_back(OpReg
);
3525 // OpSegStart is where this destination segment would start in OpReg if it
3526 // extended infinitely in both directions.
3527 int64_t ExtractOffset
, InsertOffset
;
3529 if (OpStart
< DstStart
) {
3531 ExtractOffset
= DstStart
- OpStart
;
3532 SegSize
= std::min(NarrowSize
, OpStart
+ OpSize
- DstStart
);
3534 InsertOffset
= OpStart
- DstStart
;
3537 std::min(NarrowSize
- InsertOffset
, OpStart
+ OpSize
- DstStart
);
3540 Register SegReg
= OpReg
;
3541 if (ExtractOffset
!= 0 || SegSize
!= OpSize
) {
3542 // A genuine extract is needed.
3543 SegReg
= MRI
.createGenericVirtualRegister(LLT::scalar(SegSize
));
3544 MIRBuilder
.buildExtract(SegReg
, OpReg
, ExtractOffset
);
3547 Register DstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
3548 MIRBuilder
.buildInsert(DstReg
, SrcRegs
[i
], SegReg
, InsertOffset
);
3549 DstRegs
.push_back(DstReg
);
3552 assert(DstRegs
.size() == (unsigned)NumParts
&& "not all parts covered");
3553 Register DstReg
= MI
.getOperand(0).getReg();
3554 if(MRI
.getType(DstReg
).isVector())
3555 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
3557 MIRBuilder
.buildMerge(DstReg
, DstRegs
);
3558 MI
.eraseFromParent();
3562 LegalizerHelper::LegalizeResult
3563 LegalizerHelper::narrowScalarBasic(MachineInstr
&MI
, unsigned TypeIdx
,
3565 Register DstReg
= MI
.getOperand(0).getReg();
3566 LLT DstTy
= MRI
.getType(DstReg
);
3568 assert(MI
.getNumOperands() == 3 && TypeIdx
== 0);
3570 SmallVector
<Register
, 4> DstRegs
, DstLeftoverRegs
;
3571 SmallVector
<Register
, 4> Src0Regs
, Src0LeftoverRegs
;
3572 SmallVector
<Register
, 4> Src1Regs
, Src1LeftoverRegs
;
3574 if (!extractParts(MI
.getOperand(1).getReg(), DstTy
, NarrowTy
, LeftoverTy
,
3575 Src0Regs
, Src0LeftoverRegs
))
3576 return UnableToLegalize
;
3579 if (!extractParts(MI
.getOperand(2).getReg(), DstTy
, NarrowTy
, Unused
,
3580 Src1Regs
, Src1LeftoverRegs
))
3581 llvm_unreachable("inconsistent extractParts result");
3583 for (unsigned I
= 0, E
= Src1Regs
.size(); I
!= E
; ++I
) {
3584 auto Inst
= MIRBuilder
.buildInstr(MI
.getOpcode(), {NarrowTy
},
3585 {Src0Regs
[I
], Src1Regs
[I
]});
3586 DstRegs
.push_back(Inst
->getOperand(0).getReg());
3589 for (unsigned I
= 0, E
= Src1LeftoverRegs
.size(); I
!= E
; ++I
) {
3590 auto Inst
= MIRBuilder
.buildInstr(
3592 {LeftoverTy
}, {Src0LeftoverRegs
[I
], Src1LeftoverRegs
[I
]});
3593 DstLeftoverRegs
.push_back(Inst
->getOperand(0).getReg());
3596 insertParts(DstReg
, DstTy
, NarrowTy
, DstRegs
,
3597 LeftoverTy
, DstLeftoverRegs
);
3599 MI
.eraseFromParent();
3603 LegalizerHelper::LegalizeResult
3604 LegalizerHelper::narrowScalarSelect(MachineInstr
&MI
, unsigned TypeIdx
,
3607 return UnableToLegalize
;
3609 Register CondReg
= MI
.getOperand(1).getReg();
3610 LLT CondTy
= MRI
.getType(CondReg
);
3611 if (CondTy
.isVector()) // TODO: Handle vselect
3612 return UnableToLegalize
;
3614 Register DstReg
= MI
.getOperand(0).getReg();
3615 LLT DstTy
= MRI
.getType(DstReg
);
3617 SmallVector
<Register
, 4> DstRegs
, DstLeftoverRegs
;
3618 SmallVector
<Register
, 4> Src1Regs
, Src1LeftoverRegs
;
3619 SmallVector
<Register
, 4> Src2Regs
, Src2LeftoverRegs
;
3621 if (!extractParts(MI
.getOperand(2).getReg(), DstTy
, NarrowTy
, LeftoverTy
,
3622 Src1Regs
, Src1LeftoverRegs
))
3623 return UnableToLegalize
;
3626 if (!extractParts(MI
.getOperand(3).getReg(), DstTy
, NarrowTy
, Unused
,
3627 Src2Regs
, Src2LeftoverRegs
))
3628 llvm_unreachable("inconsistent extractParts result");
3630 for (unsigned I
= 0, E
= Src1Regs
.size(); I
!= E
; ++I
) {
3631 auto Select
= MIRBuilder
.buildSelect(NarrowTy
,
3632 CondReg
, Src1Regs
[I
], Src2Regs
[I
]);
3633 DstRegs
.push_back(Select
->getOperand(0).getReg());
3636 for (unsigned I
= 0, E
= Src1LeftoverRegs
.size(); I
!= E
; ++I
) {
3637 auto Select
= MIRBuilder
.buildSelect(
3638 LeftoverTy
, CondReg
, Src1LeftoverRegs
[I
], Src2LeftoverRegs
[I
]);
3639 DstLeftoverRegs
.push_back(Select
->getOperand(0).getReg());
3642 insertParts(DstReg
, DstTy
, NarrowTy
, DstRegs
,
3643 LeftoverTy
, DstLeftoverRegs
);
3645 MI
.eraseFromParent();
3649 LegalizerHelper::LegalizeResult
3650 LegalizerHelper::lowerBitCount(MachineInstr
&MI
, unsigned TypeIdx
, LLT Ty
) {
3651 unsigned Opc
= MI
.getOpcode();
3652 auto &TII
= *MI
.getMF()->getSubtarget().getInstrInfo();
3653 auto isSupported
= [this](const LegalityQuery
&Q
) {
3654 auto QAction
= LI
.getAction(Q
).Action
;
3655 return QAction
== Legal
|| QAction
== Libcall
|| QAction
== Custom
;
3659 return UnableToLegalize
;
3660 case TargetOpcode::G_CTLZ_ZERO_UNDEF
: {
3661 // This trivially expands to CTLZ.
3662 Observer
.changingInstr(MI
);
3663 MI
.setDesc(TII
.get(TargetOpcode::G_CTLZ
));
3664 Observer
.changedInstr(MI
);
3667 case TargetOpcode::G_CTLZ
: {
3668 Register SrcReg
= MI
.getOperand(1).getReg();
3669 unsigned Len
= Ty
.getSizeInBits();
3670 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF
, {Ty
, Ty
}})) {
3671 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
3672 auto MIBCtlzZU
= MIRBuilder
.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF
,
3674 auto MIBZero
= MIRBuilder
.buildConstant(Ty
, 0);
3675 auto MIBLen
= MIRBuilder
.buildConstant(Ty
, Len
);
3676 auto MIBICmp
= MIRBuilder
.buildICmp(CmpInst::ICMP_EQ
, LLT::scalar(1),
3678 MIRBuilder
.buildSelect(MI
.getOperand(0).getReg(), MIBICmp
, MIBLen
,
3680 MI
.eraseFromParent();
3683 // for now, we do this:
3684 // NewLen = NextPowerOf2(Len);
3685 // x = x | (x >> 1);
3686 // x = x | (x >> 2);
3688 // x = x | (x >>16);
3689 // x = x | (x >>32); // for 64-bit input
3691 // return Len - popcount(x);
3693 // Ref: "Hacker's Delight" by Henry Warren
3694 Register Op
= SrcReg
;
3695 unsigned NewLen
= PowerOf2Ceil(Len
);
3696 for (unsigned i
= 0; (1U << i
) <= (NewLen
/ 2); ++i
) {
3697 auto MIBShiftAmt
= MIRBuilder
.buildConstant(Ty
, 1ULL << i
);
3698 auto MIBOp
= MIRBuilder
.buildInstr(
3699 TargetOpcode::G_OR
, {Ty
},
3700 {Op
, MIRBuilder
.buildInstr(TargetOpcode::G_LSHR
, {Ty
},
3701 {Op
, MIBShiftAmt
})});
3702 Op
= MIBOp
->getOperand(0).getReg();
3704 auto MIBPop
= MIRBuilder
.buildInstr(TargetOpcode::G_CTPOP
, {Ty
}, {Op
});
3705 MIRBuilder
.buildInstr(TargetOpcode::G_SUB
, {MI
.getOperand(0).getReg()},
3706 {MIRBuilder
.buildConstant(Ty
, Len
), MIBPop
});
3707 MI
.eraseFromParent();
3710 case TargetOpcode::G_CTTZ_ZERO_UNDEF
: {
3711 // This trivially expands to CTTZ.
3712 Observer
.changingInstr(MI
);
3713 MI
.setDesc(TII
.get(TargetOpcode::G_CTTZ
));
3714 Observer
.changedInstr(MI
);
3717 case TargetOpcode::G_CTTZ
: {
3718 Register SrcReg
= MI
.getOperand(1).getReg();
3719 unsigned Len
= Ty
.getSizeInBits();
3720 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF
, {Ty
, Ty
}})) {
3721 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
3723 auto MIBCttzZU
= MIRBuilder
.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF
,
3725 auto MIBZero
= MIRBuilder
.buildConstant(Ty
, 0);
3726 auto MIBLen
= MIRBuilder
.buildConstant(Ty
, Len
);
3727 auto MIBICmp
= MIRBuilder
.buildICmp(CmpInst::ICMP_EQ
, LLT::scalar(1),
3729 MIRBuilder
.buildSelect(MI
.getOperand(0).getReg(), MIBICmp
, MIBLen
,
3731 MI
.eraseFromParent();
3734 // for now, we use: { return popcount(~x & (x - 1)); }
3735 // unless the target has ctlz but not ctpop, in which case we use:
3736 // { return 32 - nlz(~x & (x-1)); }
3737 // Ref: "Hacker's Delight" by Henry Warren
3738 auto MIBCstNeg1
= MIRBuilder
.buildConstant(Ty
, -1);
3740 MIRBuilder
.buildInstr(TargetOpcode::G_XOR
, {Ty
}, {SrcReg
, MIBCstNeg1
});
3741 auto MIBTmp
= MIRBuilder
.buildInstr(
3742 TargetOpcode::G_AND
, {Ty
},
3743 {MIBNot
, MIRBuilder
.buildInstr(TargetOpcode::G_ADD
, {Ty
},
3744 {SrcReg
, MIBCstNeg1
})});
3745 if (!isSupported({TargetOpcode::G_CTPOP
, {Ty
, Ty
}}) &&
3746 isSupported({TargetOpcode::G_CTLZ
, {Ty
, Ty
}})) {
3747 auto MIBCstLen
= MIRBuilder
.buildConstant(Ty
, Len
);
3748 MIRBuilder
.buildInstr(
3749 TargetOpcode::G_SUB
, {MI
.getOperand(0).getReg()},
3751 MIRBuilder
.buildInstr(TargetOpcode::G_CTLZ
, {Ty
}, {MIBTmp
})});
3752 MI
.eraseFromParent();
3755 MI
.setDesc(TII
.get(TargetOpcode::G_CTPOP
));
3756 MI
.getOperand(1).setReg(MIBTmp
->getOperand(0).getReg());
3762 // Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
3764 LegalizerHelper::LegalizeResult
3765 LegalizerHelper::lowerU64ToF32BitOps(MachineInstr
&MI
) {
3766 Register Dst
= MI
.getOperand(0).getReg();
3767 Register Src
= MI
.getOperand(1).getReg();
3768 const LLT S64
= LLT::scalar(64);
3769 const LLT S32
= LLT::scalar(32);
3770 const LLT S1
= LLT::scalar(1);
3772 assert(MRI
.getType(Src
) == S64
&& MRI
.getType(Dst
) == S32
);
3774 // unsigned cul2f(ulong u) {
3775 // uint lz = clz(u);
3776 // uint e = (u != 0) ? 127U + 63U - lz : 0;
3777 // u = (u << lz) & 0x7fffffffffffffffUL;
3778 // ulong t = u & 0xffffffffffUL;
3779 // uint v = (e << 23) | (uint)(u >> 40);
3780 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
3781 // return as_float(v + r);
3784 auto Zero32
= MIRBuilder
.buildConstant(S32
, 0);
3785 auto Zero64
= MIRBuilder
.buildConstant(S64
, 0);
3787 auto LZ
= MIRBuilder
.buildCTLZ_ZERO_UNDEF(S32
, Src
);
3789 auto K
= MIRBuilder
.buildConstant(S32
, 127U + 63U);
3790 auto Sub
= MIRBuilder
.buildSub(S32
, K
, LZ
);
3792 auto NotZero
= MIRBuilder
.buildICmp(CmpInst::ICMP_NE
, S1
, Src
, Zero64
);
3793 auto E
= MIRBuilder
.buildSelect(S32
, NotZero
, Sub
, Zero32
);
3795 auto Mask0
= MIRBuilder
.buildConstant(S64
, (-1ULL) >> 1);
3796 auto ShlLZ
= MIRBuilder
.buildShl(S64
, Src
, LZ
);
3798 auto U
= MIRBuilder
.buildAnd(S64
, ShlLZ
, Mask0
);
3800 auto Mask1
= MIRBuilder
.buildConstant(S64
, 0xffffffffffULL
);
3801 auto T
= MIRBuilder
.buildAnd(S64
, U
, Mask1
);
3803 auto UShl
= MIRBuilder
.buildLShr(S64
, U
, MIRBuilder
.buildConstant(S64
, 40));
3804 auto ShlE
= MIRBuilder
.buildShl(S32
, E
, MIRBuilder
.buildConstant(S32
, 23));
3805 auto V
= MIRBuilder
.buildOr(S32
, ShlE
, MIRBuilder
.buildTrunc(S32
, UShl
));
3807 auto C
= MIRBuilder
.buildConstant(S64
, 0x8000000000ULL
);
3808 auto RCmp
= MIRBuilder
.buildICmp(CmpInst::ICMP_UGT
, S1
, T
, C
);
3809 auto TCmp
= MIRBuilder
.buildICmp(CmpInst::ICMP_EQ
, S1
, T
, C
);
3810 auto One
= MIRBuilder
.buildConstant(S32
, 1);
3812 auto VTrunc1
= MIRBuilder
.buildAnd(S32
, V
, One
);
3813 auto Select0
= MIRBuilder
.buildSelect(S32
, TCmp
, VTrunc1
, Zero32
);
3814 auto R
= MIRBuilder
.buildSelect(S32
, RCmp
, One
, Select0
);
3815 MIRBuilder
.buildAdd(Dst
, V
, R
);
3820 LegalizerHelper::LegalizeResult
3821 LegalizerHelper::lowerUITOFP(MachineInstr
&MI
, unsigned TypeIdx
, LLT Ty
) {
3822 Register Dst
= MI
.getOperand(0).getReg();
3823 Register Src
= MI
.getOperand(1).getReg();
3824 LLT DstTy
= MRI
.getType(Dst
);
3825 LLT SrcTy
= MRI
.getType(Src
);
3827 if (SrcTy
!= LLT::scalar(64))
3828 return UnableToLegalize
;
3830 if (DstTy
== LLT::scalar(32)) {
3831 // TODO: SelectionDAG has several alternative expansions to port which may
3832 // be more reasonble depending on the available instructions. If a target
3833 // has sitofp, does not have CTLZ, or can efficiently use f64 as an
3834 // intermediate type, this is probably worse.
3835 return lowerU64ToF32BitOps(MI
);
3838 return UnableToLegalize
;
3841 LegalizerHelper::LegalizeResult
3842 LegalizerHelper::lowerSITOFP(MachineInstr
&MI
, unsigned TypeIdx
, LLT Ty
) {
3843 Register Dst
= MI
.getOperand(0).getReg();
3844 Register Src
= MI
.getOperand(1).getReg();
3845 LLT DstTy
= MRI
.getType(Dst
);
3846 LLT SrcTy
= MRI
.getType(Src
);
3848 const LLT S64
= LLT::scalar(64);
3849 const LLT S32
= LLT::scalar(32);
3850 const LLT S1
= LLT::scalar(1);
3853 return UnableToLegalize
;
3856 // signed cl2f(long l) {
3857 // long s = l >> 63;
3858 // float r = cul2f((l + s) ^ s);
3859 // return s ? -r : r;
3862 auto SignBit
= MIRBuilder
.buildConstant(S64
, 63);
3863 auto S
= MIRBuilder
.buildAShr(S64
, L
, SignBit
);
3865 auto LPlusS
= MIRBuilder
.buildAdd(S64
, L
, S
);
3866 auto Xor
= MIRBuilder
.buildXor(S64
, LPlusS
, S
);
3867 auto R
= MIRBuilder
.buildUITOFP(S32
, Xor
);
3869 auto RNeg
= MIRBuilder
.buildFNeg(S32
, R
);
3870 auto SignNotZero
= MIRBuilder
.buildICmp(CmpInst::ICMP_NE
, S1
, S
,
3871 MIRBuilder
.buildConstant(S64
, 0));
3872 MIRBuilder
.buildSelect(Dst
, SignNotZero
, RNeg
, R
);
3876 return UnableToLegalize
;
3879 LegalizerHelper::LegalizeResult
3880 LegalizerHelper::lowerFPTOUI(MachineInstr
&MI
, unsigned TypeIdx
, LLT Ty
) {
3881 Register Dst
= MI
.getOperand(0).getReg();
3882 Register Src
= MI
.getOperand(1).getReg();
3883 LLT DstTy
= MRI
.getType(Dst
);
3884 LLT SrcTy
= MRI
.getType(Src
);
3885 const LLT S64
= LLT::scalar(64);
3886 const LLT S32
= LLT::scalar(32);
3888 if (SrcTy
!= S64
&& SrcTy
!= S32
)
3889 return UnableToLegalize
;
3890 if (DstTy
!= S32
&& DstTy
!= S64
)
3891 return UnableToLegalize
;
3893 // FPTOSI gives same result as FPTOUI for positive signed integers.
3894 // FPTOUI needs to deal with fp values that convert to unsigned integers
3895 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
3897 APInt TwoPExpInt
= APInt::getSignMask(DstTy
.getSizeInBits());
3898 APFloat
TwoPExpFP(SrcTy
.getSizeInBits() == 32 ? APFloat::IEEEsingle()
3899 : APFloat::IEEEdouble(),
3900 APInt::getNullValue(SrcTy
.getSizeInBits()));
3901 TwoPExpFP
.convertFromAPInt(TwoPExpInt
, false, APFloat::rmNearestTiesToEven
);
3903 MachineInstrBuilder FPTOSI
= MIRBuilder
.buildFPTOSI(DstTy
, Src
);
3905 MachineInstrBuilder Threshold
= MIRBuilder
.buildFConstant(SrcTy
, TwoPExpFP
);
3906 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
3907 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
3908 MachineInstrBuilder FSub
= MIRBuilder
.buildFSub(SrcTy
, Src
, Threshold
);
3909 MachineInstrBuilder ResLowBits
= MIRBuilder
.buildFPTOSI(DstTy
, FSub
);
3910 MachineInstrBuilder ResHighBit
= MIRBuilder
.buildConstant(DstTy
, TwoPExpInt
);
3911 MachineInstrBuilder Res
= MIRBuilder
.buildXor(DstTy
, ResLowBits
, ResHighBit
);
3913 MachineInstrBuilder FCMP
=
3914 MIRBuilder
.buildFCmp(CmpInst::FCMP_ULT
, DstTy
, Src
, Threshold
);
3915 MIRBuilder
.buildSelect(Dst
, FCMP
, FPTOSI
, Res
);
3917 MI
.eraseFromParent();
3921 static CmpInst::Predicate
minMaxToCompare(unsigned Opc
) {
3923 case TargetOpcode::G_SMIN
:
3924 return CmpInst::ICMP_SLT
;
3925 case TargetOpcode::G_SMAX
:
3926 return CmpInst::ICMP_SGT
;
3927 case TargetOpcode::G_UMIN
:
3928 return CmpInst::ICMP_ULT
;
3929 case TargetOpcode::G_UMAX
:
3930 return CmpInst::ICMP_UGT
;
3932 llvm_unreachable("not in integer min/max");
3936 LegalizerHelper::LegalizeResult
3937 LegalizerHelper::lowerMinMax(MachineInstr
&MI
, unsigned TypeIdx
, LLT Ty
) {
3938 Register Dst
= MI
.getOperand(0).getReg();
3939 Register Src0
= MI
.getOperand(1).getReg();
3940 Register Src1
= MI
.getOperand(2).getReg();
3942 const CmpInst::Predicate Pred
= minMaxToCompare(MI
.getOpcode());
3943 LLT CmpType
= MRI
.getType(Dst
).changeElementSize(1);
3945 auto Cmp
= MIRBuilder
.buildICmp(Pred
, CmpType
, Src0
, Src1
);
3946 MIRBuilder
.buildSelect(Dst
, Cmp
, Src0
, Src1
);
3948 MI
.eraseFromParent();
3952 LegalizerHelper::LegalizeResult
3953 LegalizerHelper::lowerFCopySign(MachineInstr
&MI
, unsigned TypeIdx
, LLT Ty
) {
3954 Register Dst
= MI
.getOperand(0).getReg();
3955 Register Src0
= MI
.getOperand(1).getReg();
3956 Register Src1
= MI
.getOperand(2).getReg();
3958 const LLT Src0Ty
= MRI
.getType(Src0
);
3959 const LLT Src1Ty
= MRI
.getType(Src1
);
3961 const int Src0Size
= Src0Ty
.getScalarSizeInBits();
3962 const int Src1Size
= Src1Ty
.getScalarSizeInBits();
3964 auto SignBitMask
= MIRBuilder
.buildConstant(
3965 Src0Ty
, APInt::getSignMask(Src0Size
));
3967 auto NotSignBitMask
= MIRBuilder
.buildConstant(
3968 Src0Ty
, APInt::getLowBitsSet(Src0Size
, Src0Size
- 1));
3970 auto And0
= MIRBuilder
.buildAnd(Src0Ty
, Src0
, NotSignBitMask
);
3973 if (Src0Ty
== Src1Ty
) {
3974 auto And1
= MIRBuilder
.buildAnd(Src1Ty
, Src0
, SignBitMask
);
3975 Or
= MIRBuilder
.buildOr(Dst
, And0
, And1
);
3976 } else if (Src0Size
> Src1Size
) {
3977 auto ShiftAmt
= MIRBuilder
.buildConstant(Src0Ty
, Src0Size
- Src1Size
);
3978 auto Zext
= MIRBuilder
.buildZExt(Src0Ty
, Src1
);
3979 auto Shift
= MIRBuilder
.buildShl(Src0Ty
, Zext
, ShiftAmt
);
3980 auto And1
= MIRBuilder
.buildAnd(Src0Ty
, Shift
, SignBitMask
);
3981 Or
= MIRBuilder
.buildOr(Dst
, And0
, And1
);
3983 auto ShiftAmt
= MIRBuilder
.buildConstant(Src1Ty
, Src1Size
- Src0Size
);
3984 auto Shift
= MIRBuilder
.buildLShr(Src1Ty
, Src1
, ShiftAmt
);
3985 auto Trunc
= MIRBuilder
.buildTrunc(Src0Ty
, Shift
);
3986 auto And1
= MIRBuilder
.buildAnd(Src0Ty
, Trunc
, SignBitMask
);
3987 Or
= MIRBuilder
.buildOr(Dst
, And0
, And1
);
3990 // Be careful about setting nsz/nnan/ninf on every instruction, since the
3991 // constants are a nan and -0.0, but the final result should preserve
3993 if (unsigned Flags
= MI
.getFlags())
3994 Or
->setFlags(Flags
);
3996 MI
.eraseFromParent();
4000 LegalizerHelper::LegalizeResult
4001 LegalizerHelper::lowerFMinNumMaxNum(MachineInstr
&MI
) {
4002 unsigned NewOp
= MI
.getOpcode() == TargetOpcode::G_FMINNUM
?
4003 TargetOpcode::G_FMINNUM_IEEE
: TargetOpcode::G_FMAXNUM_IEEE
;
4005 Register Dst
= MI
.getOperand(0).getReg();
4006 Register Src0
= MI
.getOperand(1).getReg();
4007 Register Src1
= MI
.getOperand(2).getReg();
4008 LLT Ty
= MRI
.getType(Dst
);
4010 if (!MI
.getFlag(MachineInstr::FmNoNans
)) {
4011 // Insert canonicalizes if it's possible we need to quiet to get correct
4014 // Note this must be done here, and not as an optimization combine in the
4015 // absence of a dedicate quiet-snan instruction as we're using an
4016 // omni-purpose G_FCANONICALIZE.
4017 if (!isKnownNeverSNaN(Src0
, MRI
))
4018 Src0
= MIRBuilder
.buildFCanonicalize(Ty
, Src0
, MI
.getFlags()).getReg(0);
4020 if (!isKnownNeverSNaN(Src1
, MRI
))
4021 Src1
= MIRBuilder
.buildFCanonicalize(Ty
, Src1
, MI
.getFlags()).getReg(0);
4024 // If there are no nans, it's safe to simply replace this with the non-IEEE
4026 MIRBuilder
.buildInstr(NewOp
, {Dst
}, {Src0
, Src1
}, MI
.getFlags());
4027 MI
.eraseFromParent();
4031 LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFMad(MachineInstr
&MI
) {
4032 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
4033 Register DstReg
= MI
.getOperand(0).getReg();
4034 LLT Ty
= MRI
.getType(DstReg
);
4035 unsigned Flags
= MI
.getFlags();
4037 auto Mul
= MIRBuilder
.buildFMul(Ty
, MI
.getOperand(1), MI
.getOperand(2),
4039 MIRBuilder
.buildFAdd(DstReg
, Mul
, MI
.getOperand(3), Flags
);
4040 MI
.eraseFromParent();
4044 LegalizerHelper::LegalizeResult
4045 LegalizerHelper::lowerUnmergeValues(MachineInstr
&MI
) {
4046 const unsigned NumDst
= MI
.getNumOperands() - 1;
4047 const Register SrcReg
= MI
.getOperand(NumDst
).getReg();
4048 LLT SrcTy
= MRI
.getType(SrcReg
);
4050 Register Dst0Reg
= MI
.getOperand(0).getReg();
4051 LLT DstTy
= MRI
.getType(Dst0Reg
);
4054 // Expand scalarizing unmerge as bitcast to integer and shift.
4055 if (!DstTy
.isVector() && SrcTy
.isVector() &&
4056 SrcTy
.getElementType() == DstTy
) {
4057 LLT IntTy
= LLT::scalar(SrcTy
.getSizeInBits());
4058 Register Cast
= MIRBuilder
.buildBitcast(IntTy
, SrcReg
).getReg(0);
4060 MIRBuilder
.buildTrunc(Dst0Reg
, Cast
);
4062 const unsigned DstSize
= DstTy
.getSizeInBits();
4063 unsigned Offset
= DstSize
;
4064 for (unsigned I
= 1; I
!= NumDst
; ++I
, Offset
+= DstSize
) {
4065 auto ShiftAmt
= MIRBuilder
.buildConstant(IntTy
, Offset
);
4066 auto Shift
= MIRBuilder
.buildLShr(IntTy
, Cast
, ShiftAmt
);
4067 MIRBuilder
.buildTrunc(MI
.getOperand(I
), Shift
);
4070 MI
.eraseFromParent();
4074 return UnableToLegalize
;
4077 LegalizerHelper::LegalizeResult
4078 LegalizerHelper::lowerShuffleVector(MachineInstr
&MI
) {
4079 Register DstReg
= MI
.getOperand(0).getReg();
4080 Register Src0Reg
= MI
.getOperand(1).getReg();
4081 Register Src1Reg
= MI
.getOperand(2).getReg();
4082 LLT Src0Ty
= MRI
.getType(Src0Reg
);
4083 LLT DstTy
= MRI
.getType(DstReg
);
4084 LLT IdxTy
= LLT::scalar(32);
4086 const Constant
*ShufMask
= MI
.getOperand(3).getShuffleMask();
4088 SmallVector
<int, 32> Mask
;
4089 ShuffleVectorInst::getShuffleMask(ShufMask
, Mask
);
4091 if (DstTy
.isScalar()) {
4092 if (Src0Ty
.isVector())
4093 return UnableToLegalize
;
4095 // This is just a SELECT.
4096 assert(Mask
.size() == 1 && "Expected a single mask element");
4098 if (Mask
[0] < 0 || Mask
[0] > 1)
4099 Val
= MIRBuilder
.buildUndef(DstTy
).getReg(0);
4101 Val
= Mask
[0] == 0 ? Src0Reg
: Src1Reg
;
4102 MIRBuilder
.buildCopy(DstReg
, Val
);
4103 MI
.eraseFromParent();
4108 SmallVector
<Register
, 32> BuildVec
;
4109 LLT EltTy
= DstTy
.getElementType();
4111 for (int Idx
: Mask
) {
4113 if (!Undef
.isValid())
4114 Undef
= MIRBuilder
.buildUndef(EltTy
).getReg(0);
4115 BuildVec
.push_back(Undef
);
4119 if (Src0Ty
.isScalar()) {
4120 BuildVec
.push_back(Idx
== 0 ? Src0Reg
: Src1Reg
);
4122 int NumElts
= Src0Ty
.getNumElements();
4123 Register SrcVec
= Idx
< NumElts
? Src0Reg
: Src1Reg
;
4124 int ExtractIdx
= Idx
< NumElts
? Idx
: Idx
- NumElts
;
4125 auto IdxK
= MIRBuilder
.buildConstant(IdxTy
, ExtractIdx
);
4126 auto Extract
= MIRBuilder
.buildExtractVectorElement(EltTy
, SrcVec
, IdxK
);
4127 BuildVec
.push_back(Extract
.getReg(0));
4131 MIRBuilder
.buildBuildVector(DstReg
, BuildVec
);
4132 MI
.eraseFromParent();
4136 LegalizerHelper::LegalizeResult
4137 LegalizerHelper::lowerDynStackAlloc(MachineInstr
&MI
) {
4138 Register Dst
= MI
.getOperand(0).getReg();
4139 Register AllocSize
= MI
.getOperand(1).getReg();
4140 unsigned Align
= MI
.getOperand(2).getImm();
4142 const auto &MF
= *MI
.getMF();
4143 const auto &TLI
= *MF
.getSubtarget().getTargetLowering();
4145 LLT PtrTy
= MRI
.getType(Dst
);
4146 LLT IntPtrTy
= LLT::scalar(PtrTy
.getSizeInBits());
4148 Register SPReg
= TLI
.getStackPointerRegisterToSaveRestore();
4149 auto SPTmp
= MIRBuilder
.buildCopy(PtrTy
, SPReg
);
4150 SPTmp
= MIRBuilder
.buildCast(IntPtrTy
, SPTmp
);
4152 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
4153 // have to generate an extra instruction to negate the alloc and then use
4154 // G_GEP to add the negative offset.
4155 auto Alloc
= MIRBuilder
.buildSub(IntPtrTy
, SPTmp
, AllocSize
);
4157 APInt
AlignMask(IntPtrTy
.getSizeInBits(), Align
, true);
4159 auto AlignCst
= MIRBuilder
.buildConstant(IntPtrTy
, AlignMask
);
4160 Alloc
= MIRBuilder
.buildAnd(IntPtrTy
, Alloc
, AlignCst
);
4163 SPTmp
= MIRBuilder
.buildCast(PtrTy
, Alloc
);
4164 MIRBuilder
.buildCopy(SPReg
, SPTmp
);
4165 MIRBuilder
.buildCopy(Dst
, SPTmp
);
4167 MI
.eraseFromParent();
4171 LegalizerHelper::LegalizeResult
4172 LegalizerHelper::lowerExtract(MachineInstr
&MI
) {
4173 Register Dst
= MI
.getOperand(0).getReg();
4174 Register Src
= MI
.getOperand(1).getReg();
4175 unsigned Offset
= MI
.getOperand(2).getImm();
4177 LLT DstTy
= MRI
.getType(Dst
);
4178 LLT SrcTy
= MRI
.getType(Src
);
4180 if (DstTy
.isScalar() &&
4181 (SrcTy
.isScalar() ||
4182 (SrcTy
.isVector() && DstTy
== SrcTy
.getElementType()))) {
4183 LLT SrcIntTy
= SrcTy
;
4184 if (!SrcTy
.isScalar()) {
4185 SrcIntTy
= LLT::scalar(SrcTy
.getSizeInBits());
4186 Src
= MIRBuilder
.buildBitcast(SrcIntTy
, Src
).getReg(0);
4190 MIRBuilder
.buildTrunc(Dst
, Src
);
4192 auto ShiftAmt
= MIRBuilder
.buildConstant(SrcIntTy
, Offset
);
4193 auto Shr
= MIRBuilder
.buildLShr(SrcIntTy
, Src
, ShiftAmt
);
4194 MIRBuilder
.buildTrunc(Dst
, Shr
);
4197 MI
.eraseFromParent();
4201 return UnableToLegalize
;
4204 LegalizerHelper::LegalizeResult
LegalizerHelper::lowerInsert(MachineInstr
&MI
) {
4205 Register Dst
= MI
.getOperand(0).getReg();
4206 Register Src
= MI
.getOperand(1).getReg();
4207 Register InsertSrc
= MI
.getOperand(2).getReg();
4208 uint64_t Offset
= MI
.getOperand(3).getImm();
4210 LLT DstTy
= MRI
.getType(Src
);
4211 LLT InsertTy
= MRI
.getType(InsertSrc
);
4213 if (InsertTy
.isScalar() &&
4214 (DstTy
.isScalar() ||
4215 (DstTy
.isVector() && DstTy
.getElementType() == InsertTy
))) {
4216 LLT IntDstTy
= DstTy
;
4217 if (!DstTy
.isScalar()) {
4218 IntDstTy
= LLT::scalar(DstTy
.getSizeInBits());
4219 Src
= MIRBuilder
.buildBitcast(IntDstTy
, Src
).getReg(0);
4222 Register ExtInsSrc
= MIRBuilder
.buildZExt(IntDstTy
, InsertSrc
).getReg(0);
4224 auto ShiftAmt
= MIRBuilder
.buildConstant(IntDstTy
, Offset
);
4225 ExtInsSrc
= MIRBuilder
.buildShl(IntDstTy
, ExtInsSrc
, ShiftAmt
).getReg(0);
4228 APInt MaskVal
= ~APInt::getBitsSet(DstTy
.getSizeInBits(), Offset
,
4229 InsertTy
.getSizeInBits());
4231 auto Mask
= MIRBuilder
.buildConstant(IntDstTy
, MaskVal
);
4232 auto MaskedSrc
= MIRBuilder
.buildAnd(IntDstTy
, Src
, Mask
);
4233 auto Or
= MIRBuilder
.buildOr(IntDstTy
, MaskedSrc
, ExtInsSrc
);
4235 MIRBuilder
.buildBitcast(Dst
, Or
);
4236 MI
.eraseFromParent();
4240 return UnableToLegalize
;
4243 LegalizerHelper::LegalizeResult
4244 LegalizerHelper::lowerSADDO_SSUBO(MachineInstr
&MI
) {
4245 Register Dst0
= MI
.getOperand(0).getReg();
4246 Register Dst1
= MI
.getOperand(1).getReg();
4247 Register LHS
= MI
.getOperand(2).getReg();
4248 Register RHS
= MI
.getOperand(3).getReg();
4249 const bool IsAdd
= MI
.getOpcode() == TargetOpcode::G_SADDO
;
4251 LLT Ty
= MRI
.getType(Dst0
);
4252 LLT BoolTy
= MRI
.getType(Dst1
);
4255 MIRBuilder
.buildAdd(Dst0
, LHS
, RHS
);
4257 MIRBuilder
.buildSub(Dst0
, LHS
, RHS
);
4259 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
4261 auto Zero
= MIRBuilder
.buildConstant(Ty
, 0);
4263 // For an addition, the result should be less than one of the operands (LHS)
4264 // if and only if the other operand (RHS) is negative, otherwise there will
4266 // For a subtraction, the result should be less than one of the operands
4267 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
4268 // otherwise there will be overflow.
4269 auto ResultLowerThanLHS
=
4270 MIRBuilder
.buildICmp(CmpInst::ICMP_SLT
, BoolTy
, Dst0
, LHS
);
4271 auto ConditionRHS
= MIRBuilder
.buildICmp(
4272 IsAdd
? CmpInst::ICMP_SLT
: CmpInst::ICMP_SGT
, BoolTy
, RHS
, Zero
);
4274 MIRBuilder
.buildXor(Dst1
, ConditionRHS
, ResultLowerThanLHS
);
4275 MI
.eraseFromParent();