1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// \file This file implements the LegalizerHelper class to legalize
10 /// individual instructions and the LegalizeMachineIR wrapper pass for the
11 /// primary legalization.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
16 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
17 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/TargetInstrInfo.h"
21 #include "llvm/CodeGen/TargetLowering.h"
22 #include "llvm/CodeGen/TargetSubtargetInfo.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/MathExtras.h"
25 #include "llvm/Support/raw_ostream.h"
27 #define DEBUG_TYPE "legalizer"
30 using namespace LegalizeActions
;
32 /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
34 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
35 /// with any leftover piece as type \p LeftoverTy
37 /// Returns -1 in the first element of the pair if the breakdown is not
39 static std::pair
<int, int>
40 getNarrowTypeBreakDown(LLT OrigTy
, LLT NarrowTy
, LLT
&LeftoverTy
) {
41 assert(!LeftoverTy
.isValid() && "this is an out argument");
43 unsigned Size
= OrigTy
.getSizeInBits();
44 unsigned NarrowSize
= NarrowTy
.getSizeInBits();
45 unsigned NumParts
= Size
/ NarrowSize
;
46 unsigned LeftoverSize
= Size
- NumParts
* NarrowSize
;
47 assert(Size
> NarrowSize
);
49 if (LeftoverSize
== 0)
52 if (NarrowTy
.isVector()) {
53 unsigned EltSize
= OrigTy
.getScalarSizeInBits();
54 if (LeftoverSize
% EltSize
!= 0)
56 LeftoverTy
= LLT::scalarOrVector(LeftoverSize
/ EltSize
, EltSize
);
58 LeftoverTy
= LLT::scalar(LeftoverSize
);
61 int NumLeftover
= LeftoverSize
/ LeftoverTy
.getSizeInBits();
62 return std::make_pair(NumParts
, NumLeftover
);
65 LegalizerHelper::LegalizerHelper(MachineFunction
&MF
,
66 GISelChangeObserver
&Observer
,
67 MachineIRBuilder
&Builder
)
68 : MIRBuilder(Builder
), MRI(MF
.getRegInfo()),
69 LI(*MF
.getSubtarget().getLegalizerInfo()), Observer(Observer
) {
71 MIRBuilder
.setChangeObserver(Observer
);
74 LegalizerHelper::LegalizerHelper(MachineFunction
&MF
, const LegalizerInfo
&LI
,
75 GISelChangeObserver
&Observer
,
77 : MIRBuilder(B
), MRI(MF
.getRegInfo()), LI(LI
), Observer(Observer
) {
79 MIRBuilder
.setChangeObserver(Observer
);
81 LegalizerHelper::LegalizeResult
82 LegalizerHelper::legalizeInstrStep(MachineInstr
&MI
) {
83 LLVM_DEBUG(dbgs() << "Legalizing: "; MI
.print(dbgs()));
85 if (MI
.getOpcode() == TargetOpcode::G_INTRINSIC
||
86 MI
.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
)
87 return LI
.legalizeIntrinsic(MI
, MRI
, MIRBuilder
) ? Legalized
89 auto Step
= LI
.getAction(MI
, MRI
);
90 switch (Step
.Action
) {
92 LLVM_DEBUG(dbgs() << ".. Already legal\n");
95 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
98 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
99 return narrowScalar(MI
, Step
.TypeIdx
, Step
.NewType
);
101 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
102 return widenScalar(MI
, Step
.TypeIdx
, Step
.NewType
);
104 LLVM_DEBUG(dbgs() << ".. Lower\n");
105 return lower(MI
, Step
.TypeIdx
, Step
.NewType
);
107 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
108 return fewerElementsVector(MI
, Step
.TypeIdx
, Step
.NewType
);
110 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
111 return moreElementsVector(MI
, Step
.TypeIdx
, Step
.NewType
);
113 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
114 return LI
.legalizeCustom(MI
, MRI
, MIRBuilder
, Observer
) ? Legalized
117 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
118 return UnableToLegalize
;
122 void LegalizerHelper::extractParts(Register Reg
, LLT Ty
, int NumParts
,
123 SmallVectorImpl
<Register
> &VRegs
) {
124 for (int i
= 0; i
< NumParts
; ++i
)
125 VRegs
.push_back(MRI
.createGenericVirtualRegister(Ty
));
126 MIRBuilder
.buildUnmerge(VRegs
, Reg
);
129 bool LegalizerHelper::extractParts(Register Reg
, LLT RegTy
,
130 LLT MainTy
, LLT
&LeftoverTy
,
131 SmallVectorImpl
<Register
> &VRegs
,
132 SmallVectorImpl
<Register
> &LeftoverRegs
) {
133 assert(!LeftoverTy
.isValid() && "this is an out argument");
135 unsigned RegSize
= RegTy
.getSizeInBits();
136 unsigned MainSize
= MainTy
.getSizeInBits();
137 unsigned NumParts
= RegSize
/ MainSize
;
138 unsigned LeftoverSize
= RegSize
- NumParts
* MainSize
;
140 // Use an unmerge when possible.
141 if (LeftoverSize
== 0) {
142 for (unsigned I
= 0; I
< NumParts
; ++I
)
143 VRegs
.push_back(MRI
.createGenericVirtualRegister(MainTy
));
144 MIRBuilder
.buildUnmerge(VRegs
, Reg
);
148 if (MainTy
.isVector()) {
149 unsigned EltSize
= MainTy
.getScalarSizeInBits();
150 if (LeftoverSize
% EltSize
!= 0)
152 LeftoverTy
= LLT::scalarOrVector(LeftoverSize
/ EltSize
, EltSize
);
154 LeftoverTy
= LLT::scalar(LeftoverSize
);
157 // For irregular sizes, extract the individual parts.
158 for (unsigned I
= 0; I
!= NumParts
; ++I
) {
159 Register NewReg
= MRI
.createGenericVirtualRegister(MainTy
);
160 VRegs
.push_back(NewReg
);
161 MIRBuilder
.buildExtract(NewReg
, Reg
, MainSize
* I
);
164 for (unsigned Offset
= MainSize
* NumParts
; Offset
< RegSize
;
165 Offset
+= LeftoverSize
) {
166 Register NewReg
= MRI
.createGenericVirtualRegister(LeftoverTy
);
167 LeftoverRegs
.push_back(NewReg
);
168 MIRBuilder
.buildExtract(NewReg
, Reg
, Offset
);
174 static LLT
getGCDType(LLT OrigTy
, LLT TargetTy
) {
175 if (OrigTy
.isVector() && TargetTy
.isVector()) {
176 assert(OrigTy
.getElementType() == TargetTy
.getElementType());
177 int GCD
= greatestCommonDivisor(OrigTy
.getNumElements(),
178 TargetTy
.getNumElements());
179 return LLT::scalarOrVector(GCD
, OrigTy
.getElementType());
182 if (OrigTy
.isVector() && !TargetTy
.isVector()) {
183 assert(OrigTy
.getElementType() == TargetTy
);
187 assert(!OrigTy
.isVector() && !TargetTy
.isVector());
189 int GCD
= greatestCommonDivisor(OrigTy
.getSizeInBits(),
190 TargetTy
.getSizeInBits());
191 return LLT::scalar(GCD
);
194 void LegalizerHelper::insertParts(Register DstReg
,
195 LLT ResultTy
, LLT PartTy
,
196 ArrayRef
<Register
> PartRegs
,
198 ArrayRef
<Register
> LeftoverRegs
) {
199 if (!LeftoverTy
.isValid()) {
200 assert(LeftoverRegs
.empty());
202 if (!ResultTy
.isVector()) {
203 MIRBuilder
.buildMerge(DstReg
, PartRegs
);
207 if (PartTy
.isVector())
208 MIRBuilder
.buildConcatVectors(DstReg
, PartRegs
);
210 MIRBuilder
.buildBuildVector(DstReg
, PartRegs
);
214 unsigned PartSize
= PartTy
.getSizeInBits();
215 unsigned LeftoverPartSize
= LeftoverTy
.getSizeInBits();
217 Register CurResultReg
= MRI
.createGenericVirtualRegister(ResultTy
);
218 MIRBuilder
.buildUndef(CurResultReg
);
221 for (Register PartReg
: PartRegs
) {
222 Register NewResultReg
= MRI
.createGenericVirtualRegister(ResultTy
);
223 MIRBuilder
.buildInsert(NewResultReg
, CurResultReg
, PartReg
, Offset
);
224 CurResultReg
= NewResultReg
;
228 for (unsigned I
= 0, E
= LeftoverRegs
.size(); I
!= E
; ++I
) {
229 // Use the original output register for the final insert to avoid a copy.
230 Register NewResultReg
= (I
+ 1 == E
) ?
231 DstReg
: MRI
.createGenericVirtualRegister(ResultTy
);
233 MIRBuilder
.buildInsert(NewResultReg
, CurResultReg
, LeftoverRegs
[I
], Offset
);
234 CurResultReg
= NewResultReg
;
235 Offset
+= LeftoverPartSize
;
239 static RTLIB::Libcall
getRTLibDesc(unsigned Opcode
, unsigned Size
) {
241 case TargetOpcode::G_SDIV
:
242 assert((Size
== 32 || Size
== 64) && "Unsupported size");
243 return Size
== 64 ? RTLIB::SDIV_I64
: RTLIB::SDIV_I32
;
244 case TargetOpcode::G_UDIV
:
245 assert((Size
== 32 || Size
== 64) && "Unsupported size");
246 return Size
== 64 ? RTLIB::UDIV_I64
: RTLIB::UDIV_I32
;
247 case TargetOpcode::G_SREM
:
248 assert((Size
== 32 || Size
== 64) && "Unsupported size");
249 return Size
== 64 ? RTLIB::SREM_I64
: RTLIB::SREM_I32
;
250 case TargetOpcode::G_UREM
:
251 assert((Size
== 32 || Size
== 64) && "Unsupported size");
252 return Size
== 64 ? RTLIB::UREM_I64
: RTLIB::UREM_I32
;
253 case TargetOpcode::G_CTLZ_ZERO_UNDEF
:
254 assert(Size
== 32 && "Unsupported size");
255 return RTLIB::CTLZ_I32
;
256 case TargetOpcode::G_FADD
:
257 assert((Size
== 32 || Size
== 64) && "Unsupported size");
258 return Size
== 64 ? RTLIB::ADD_F64
: RTLIB::ADD_F32
;
259 case TargetOpcode::G_FSUB
:
260 assert((Size
== 32 || Size
== 64) && "Unsupported size");
261 return Size
== 64 ? RTLIB::SUB_F64
: RTLIB::SUB_F32
;
262 case TargetOpcode::G_FMUL
:
263 assert((Size
== 32 || Size
== 64) && "Unsupported size");
264 return Size
== 64 ? RTLIB::MUL_F64
: RTLIB::MUL_F32
;
265 case TargetOpcode::G_FDIV
:
266 assert((Size
== 32 || Size
== 64) && "Unsupported size");
267 return Size
== 64 ? RTLIB::DIV_F64
: RTLIB::DIV_F32
;
268 case TargetOpcode::G_FEXP
:
269 assert((Size
== 32 || Size
== 64) && "Unsupported size");
270 return Size
== 64 ? RTLIB::EXP_F64
: RTLIB::EXP_F32
;
271 case TargetOpcode::G_FEXP2
:
272 assert((Size
== 32 || Size
== 64) && "Unsupported size");
273 return Size
== 64 ? RTLIB::EXP2_F64
: RTLIB::EXP2_F32
;
274 case TargetOpcode::G_FREM
:
275 return Size
== 64 ? RTLIB::REM_F64
: RTLIB::REM_F32
;
276 case TargetOpcode::G_FPOW
:
277 return Size
== 64 ? RTLIB::POW_F64
: RTLIB::POW_F32
;
278 case TargetOpcode::G_FMA
:
279 assert((Size
== 32 || Size
== 64) && "Unsupported size");
280 return Size
== 64 ? RTLIB::FMA_F64
: RTLIB::FMA_F32
;
281 case TargetOpcode::G_FSIN
:
282 assert((Size
== 32 || Size
== 64 || Size
== 128) && "Unsupported size");
283 return Size
== 128 ? RTLIB::SIN_F128
284 : Size
== 64 ? RTLIB::SIN_F64
: RTLIB::SIN_F32
;
285 case TargetOpcode::G_FCOS
:
286 assert((Size
== 32 || Size
== 64 || Size
== 128) && "Unsupported size");
287 return Size
== 128 ? RTLIB::COS_F128
288 : Size
== 64 ? RTLIB::COS_F64
: RTLIB::COS_F32
;
289 case TargetOpcode::G_FLOG10
:
290 assert((Size
== 32 || Size
== 64 || Size
== 128) && "Unsupported size");
291 return Size
== 128 ? RTLIB::LOG10_F128
292 : Size
== 64 ? RTLIB::LOG10_F64
: RTLIB::LOG10_F32
;
293 case TargetOpcode::G_FLOG
:
294 assert((Size
== 32 || Size
== 64 || Size
== 128) && "Unsupported size");
295 return Size
== 128 ? RTLIB::LOG_F128
296 : Size
== 64 ? RTLIB::LOG_F64
: RTLIB::LOG_F32
;
297 case TargetOpcode::G_FLOG2
:
298 assert((Size
== 32 || Size
== 64 || Size
== 128) && "Unsupported size");
299 return Size
== 128 ? RTLIB::LOG2_F128
300 : Size
== 64 ? RTLIB::LOG2_F64
: RTLIB::LOG2_F32
;
301 case TargetOpcode::G_FCEIL
:
302 assert((Size
== 32 || Size
== 64) && "Unsupported size");
303 return Size
== 64 ? RTLIB::CEIL_F64
: RTLIB::CEIL_F32
;
304 case TargetOpcode::G_FFLOOR
:
305 assert((Size
== 32 || Size
== 64) && "Unsupported size");
306 return Size
== 64 ? RTLIB::FLOOR_F64
: RTLIB::FLOOR_F32
;
308 llvm_unreachable("Unknown libcall function");
311 LegalizerHelper::LegalizeResult
312 llvm::createLibcall(MachineIRBuilder
&MIRBuilder
, RTLIB::Libcall Libcall
,
313 const CallLowering::ArgInfo
&Result
,
314 ArrayRef
<CallLowering::ArgInfo
> Args
) {
315 auto &CLI
= *MIRBuilder
.getMF().getSubtarget().getCallLowering();
316 auto &TLI
= *MIRBuilder
.getMF().getSubtarget().getTargetLowering();
317 const char *Name
= TLI
.getLibcallName(Libcall
);
319 MIRBuilder
.getMF().getFrameInfo().setHasCalls(true);
321 CallLowering::CallLoweringInfo Info
;
322 Info
.CallConv
= TLI
.getLibcallCallingConv(Libcall
);
323 Info
.Callee
= MachineOperand::CreateES(Name
);
324 Info
.OrigRet
= Result
;
325 std::copy(Args
.begin(), Args
.end(), std::back_inserter(Info
.OrigArgs
));
326 if (!CLI
.lowerCall(MIRBuilder
, Info
))
327 return LegalizerHelper::UnableToLegalize
;
329 return LegalizerHelper::Legalized
;
332 // Useful for libcalls where all operands have the same type.
333 static LegalizerHelper::LegalizeResult
334 simpleLibcall(MachineInstr
&MI
, MachineIRBuilder
&MIRBuilder
, unsigned Size
,
336 auto Libcall
= getRTLibDesc(MI
.getOpcode(), Size
);
338 SmallVector
<CallLowering::ArgInfo
, 3> Args
;
339 for (unsigned i
= 1; i
< MI
.getNumOperands(); i
++)
340 Args
.push_back({MI
.getOperand(i
).getReg(), OpType
});
341 return createLibcall(MIRBuilder
, Libcall
, {MI
.getOperand(0).getReg(), OpType
},
345 LegalizerHelper::LegalizeResult
346 llvm::createMemLibcall(MachineIRBuilder
&MIRBuilder
, MachineRegisterInfo
&MRI
,
348 assert(MI
.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
);
349 auto &Ctx
= MIRBuilder
.getMF().getFunction().getContext();
351 SmallVector
<CallLowering::ArgInfo
, 3> Args
;
352 for (unsigned i
= 1; i
< MI
.getNumOperands(); i
++) {
353 Register Reg
= MI
.getOperand(i
).getReg();
355 // Need derive an IR type for call lowering.
356 LLT OpLLT
= MRI
.getType(Reg
);
357 Type
*OpTy
= nullptr;
358 if (OpLLT
.isPointer())
359 OpTy
= Type::getInt8PtrTy(Ctx
, OpLLT
.getAddressSpace());
361 OpTy
= IntegerType::get(Ctx
, OpLLT
.getSizeInBits());
362 Args
.push_back({Reg
, OpTy
});
365 auto &CLI
= *MIRBuilder
.getMF().getSubtarget().getCallLowering();
366 auto &TLI
= *MIRBuilder
.getMF().getSubtarget().getTargetLowering();
367 Intrinsic::ID ID
= MI
.getOperand(0).getIntrinsicID();
368 RTLIB::Libcall RTLibcall
;
370 case Intrinsic::memcpy
:
371 RTLibcall
= RTLIB::MEMCPY
;
373 case Intrinsic::memset
:
374 RTLibcall
= RTLIB::MEMSET
;
376 case Intrinsic::memmove
:
377 RTLibcall
= RTLIB::MEMMOVE
;
380 return LegalizerHelper::UnableToLegalize
;
382 const char *Name
= TLI
.getLibcallName(RTLibcall
);
384 MIRBuilder
.setInstr(MI
);
385 MIRBuilder
.getMF().getFrameInfo().setHasCalls(true);
387 CallLowering::CallLoweringInfo Info
;
388 Info
.CallConv
= TLI
.getLibcallCallingConv(RTLibcall
);
389 Info
.Callee
= MachineOperand::CreateES(Name
);
390 Info
.OrigRet
= CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx
));
391 std::copy(Args
.begin(), Args
.end(), std::back_inserter(Info
.OrigArgs
));
392 if (!CLI
.lowerCall(MIRBuilder
, Info
))
393 return LegalizerHelper::UnableToLegalize
;
395 return LegalizerHelper::Legalized
;
398 static RTLIB::Libcall
getConvRTLibDesc(unsigned Opcode
, Type
*ToType
,
400 auto ToMVT
= MVT::getVT(ToType
);
401 auto FromMVT
= MVT::getVT(FromType
);
404 case TargetOpcode::G_FPEXT
:
405 return RTLIB::getFPEXT(FromMVT
, ToMVT
);
406 case TargetOpcode::G_FPTRUNC
:
407 return RTLIB::getFPROUND(FromMVT
, ToMVT
);
408 case TargetOpcode::G_FPTOSI
:
409 return RTLIB::getFPTOSINT(FromMVT
, ToMVT
);
410 case TargetOpcode::G_FPTOUI
:
411 return RTLIB::getFPTOUINT(FromMVT
, ToMVT
);
412 case TargetOpcode::G_SITOFP
:
413 return RTLIB::getSINTTOFP(FromMVT
, ToMVT
);
414 case TargetOpcode::G_UITOFP
:
415 return RTLIB::getUINTTOFP(FromMVT
, ToMVT
);
417 llvm_unreachable("Unsupported libcall function");
420 static LegalizerHelper::LegalizeResult
421 conversionLibcall(MachineInstr
&MI
, MachineIRBuilder
&MIRBuilder
, Type
*ToType
,
423 RTLIB::Libcall Libcall
= getConvRTLibDesc(MI
.getOpcode(), ToType
, FromType
);
424 return createLibcall(MIRBuilder
, Libcall
, {MI
.getOperand(0).getReg(), ToType
},
425 {{MI
.getOperand(1).getReg(), FromType
}});
428 LegalizerHelper::LegalizeResult
429 LegalizerHelper::libcall(MachineInstr
&MI
) {
430 LLT LLTy
= MRI
.getType(MI
.getOperand(0).getReg());
431 unsigned Size
= LLTy
.getSizeInBits();
432 auto &Ctx
= MIRBuilder
.getMF().getFunction().getContext();
434 MIRBuilder
.setInstr(MI
);
436 switch (MI
.getOpcode()) {
438 return UnableToLegalize
;
439 case TargetOpcode::G_SDIV
:
440 case TargetOpcode::G_UDIV
:
441 case TargetOpcode::G_SREM
:
442 case TargetOpcode::G_UREM
:
443 case TargetOpcode::G_CTLZ_ZERO_UNDEF
: {
444 Type
*HLTy
= IntegerType::get(Ctx
, Size
);
445 auto Status
= simpleLibcall(MI
, MIRBuilder
, Size
, HLTy
);
446 if (Status
!= Legalized
)
450 case TargetOpcode::G_FADD
:
451 case TargetOpcode::G_FSUB
:
452 case TargetOpcode::G_FMUL
:
453 case TargetOpcode::G_FDIV
:
454 case TargetOpcode::G_FMA
:
455 case TargetOpcode::G_FPOW
:
456 case TargetOpcode::G_FREM
:
457 case TargetOpcode::G_FCOS
:
458 case TargetOpcode::G_FSIN
:
459 case TargetOpcode::G_FLOG10
:
460 case TargetOpcode::G_FLOG
:
461 case TargetOpcode::G_FLOG2
:
462 case TargetOpcode::G_FEXP
:
463 case TargetOpcode::G_FEXP2
:
464 case TargetOpcode::G_FCEIL
:
465 case TargetOpcode::G_FFLOOR
: {
467 LLVM_DEBUG(dbgs() << "Size " << Size
<< " too large to legalize.\n");
468 return UnableToLegalize
;
470 Type
*HLTy
= Size
== 64 ? Type::getDoubleTy(Ctx
) : Type::getFloatTy(Ctx
);
471 auto Status
= simpleLibcall(MI
, MIRBuilder
, Size
, HLTy
);
472 if (Status
!= Legalized
)
476 case TargetOpcode::G_FPEXT
: {
477 // FIXME: Support other floating point types (half, fp128 etc)
478 unsigned FromSize
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
479 unsigned ToSize
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
480 if (ToSize
!= 64 || FromSize
!= 32)
481 return UnableToLegalize
;
482 LegalizeResult Status
= conversionLibcall(
483 MI
, MIRBuilder
, Type::getDoubleTy(Ctx
), Type::getFloatTy(Ctx
));
484 if (Status
!= Legalized
)
488 case TargetOpcode::G_FPTRUNC
: {
489 // FIXME: Support other floating point types (half, fp128 etc)
490 unsigned FromSize
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
491 unsigned ToSize
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
492 if (ToSize
!= 32 || FromSize
!= 64)
493 return UnableToLegalize
;
494 LegalizeResult Status
= conversionLibcall(
495 MI
, MIRBuilder
, Type::getFloatTy(Ctx
), Type::getDoubleTy(Ctx
));
496 if (Status
!= Legalized
)
500 case TargetOpcode::G_FPTOSI
:
501 case TargetOpcode::G_FPTOUI
: {
502 // FIXME: Support other types
503 unsigned FromSize
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
504 unsigned ToSize
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
505 if ((ToSize
!= 32 && ToSize
!= 64) || (FromSize
!= 32 && FromSize
!= 64))
506 return UnableToLegalize
;
507 LegalizeResult Status
= conversionLibcall(
509 ToSize
== 32 ? Type::getInt32Ty(Ctx
) : Type::getInt64Ty(Ctx
),
510 FromSize
== 64 ? Type::getDoubleTy(Ctx
) : Type::getFloatTy(Ctx
));
511 if (Status
!= Legalized
)
515 case TargetOpcode::G_SITOFP
:
516 case TargetOpcode::G_UITOFP
: {
517 // FIXME: Support other types
518 unsigned FromSize
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
519 unsigned ToSize
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
520 if ((FromSize
!= 32 && FromSize
!= 64) || (ToSize
!= 32 && ToSize
!= 64))
521 return UnableToLegalize
;
522 LegalizeResult Status
= conversionLibcall(
524 ToSize
== 64 ? Type::getDoubleTy(Ctx
) : Type::getFloatTy(Ctx
),
525 FromSize
== 32 ? Type::getInt32Ty(Ctx
) : Type::getInt64Ty(Ctx
));
526 if (Status
!= Legalized
)
532 MI
.eraseFromParent();
536 LegalizerHelper::LegalizeResult
LegalizerHelper::narrowScalar(MachineInstr
&MI
,
539 MIRBuilder
.setInstr(MI
);
541 uint64_t SizeOp0
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
542 uint64_t NarrowSize
= NarrowTy
.getSizeInBits();
544 switch (MI
.getOpcode()) {
546 return UnableToLegalize
;
547 case TargetOpcode::G_IMPLICIT_DEF
: {
548 // FIXME: add support for when SizeOp0 isn't an exact multiple of
550 if (SizeOp0
% NarrowSize
!= 0)
551 return UnableToLegalize
;
552 int NumParts
= SizeOp0
/ NarrowSize
;
554 SmallVector
<Register
, 2> DstRegs
;
555 for (int i
= 0; i
< NumParts
; ++i
)
557 MIRBuilder
.buildUndef(NarrowTy
)->getOperand(0).getReg());
559 Register DstReg
= MI
.getOperand(0).getReg();
560 if(MRI
.getType(DstReg
).isVector())
561 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
563 MIRBuilder
.buildMerge(DstReg
, DstRegs
);
564 MI
.eraseFromParent();
567 case TargetOpcode::G_CONSTANT
: {
568 LLT Ty
= MRI
.getType(MI
.getOperand(0).getReg());
569 const APInt
&Val
= MI
.getOperand(1).getCImm()->getValue();
570 unsigned TotalSize
= Ty
.getSizeInBits();
571 unsigned NarrowSize
= NarrowTy
.getSizeInBits();
572 int NumParts
= TotalSize
/ NarrowSize
;
574 SmallVector
<Register
, 4> PartRegs
;
575 for (int I
= 0; I
!= NumParts
; ++I
) {
576 unsigned Offset
= I
* NarrowSize
;
577 auto K
= MIRBuilder
.buildConstant(NarrowTy
,
578 Val
.lshr(Offset
).trunc(NarrowSize
));
579 PartRegs
.push_back(K
.getReg(0));
583 unsigned LeftoverBits
= TotalSize
- NumParts
* NarrowSize
;
584 SmallVector
<Register
, 1> LeftoverRegs
;
585 if (LeftoverBits
!= 0) {
586 LeftoverTy
= LLT::scalar(LeftoverBits
);
587 auto K
= MIRBuilder
.buildConstant(
589 Val
.lshr(NumParts
* NarrowSize
).trunc(LeftoverBits
));
590 LeftoverRegs
.push_back(K
.getReg(0));
593 insertParts(MI
.getOperand(0).getReg(),
594 Ty
, NarrowTy
, PartRegs
, LeftoverTy
, LeftoverRegs
);
596 MI
.eraseFromParent();
599 case TargetOpcode::G_SEXT
: {
601 return UnableToLegalize
;
603 if (NarrowTy
.getSizeInBits() != SizeOp0
/ 2) {
604 LLVM_DEBUG(dbgs() << "Can't narrow sext to type " << NarrowTy
<< "\n");
605 return UnableToLegalize
;
608 Register SrcReg
= MI
.getOperand(1).getReg();
610 // Shift the sign bit of the low register through the high register.
612 MIRBuilder
.buildConstant(LLT::scalar(64), NarrowTy
.getSizeInBits() - 1);
613 auto Shift
= MIRBuilder
.buildAShr(NarrowTy
, SrcReg
, ShiftAmt
);
614 MIRBuilder
.buildMerge(MI
.getOperand(0).getReg(), {SrcReg
, Shift
.getReg(0)});
615 MI
.eraseFromParent();
618 case TargetOpcode::G_ZEXT
: {
620 return UnableToLegalize
;
622 if (SizeOp0
% NarrowTy
.getSizeInBits() != 0)
623 return UnableToLegalize
;
625 // Generate a merge where the bottom bits are taken from the source, and
626 // zero everything else.
627 Register ZeroReg
= MIRBuilder
.buildConstant(NarrowTy
, 0).getReg(0);
628 unsigned NumParts
= SizeOp0
/ NarrowTy
.getSizeInBits();
629 SmallVector
<Register
, 4> Srcs
= {MI
.getOperand(1).getReg()};
630 for (unsigned Part
= 1; Part
< NumParts
; ++Part
)
631 Srcs
.push_back(ZeroReg
);
632 MIRBuilder
.buildMerge(MI
.getOperand(0).getReg(), Srcs
);
633 MI
.eraseFromParent();
636 case TargetOpcode::G_TRUNC
: {
638 return UnableToLegalize
;
640 uint64_t SizeOp1
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
641 if (NarrowTy
.getSizeInBits() * 2 != SizeOp1
) {
642 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy
<< "\n");
643 return UnableToLegalize
;
646 auto Unmerge
= MIRBuilder
.buildUnmerge(NarrowTy
, MI
.getOperand(1).getReg());
647 MIRBuilder
.buildCopy(MI
.getOperand(0).getReg(), Unmerge
.getReg(0));
648 MI
.eraseFromParent();
652 case TargetOpcode::G_ADD
: {
653 // FIXME: add support for when SizeOp0 isn't an exact multiple of
655 if (SizeOp0
% NarrowSize
!= 0)
656 return UnableToLegalize
;
657 // Expand in terms of carry-setting/consuming G_ADDE instructions.
658 int NumParts
= SizeOp0
/ NarrowTy
.getSizeInBits();
660 SmallVector
<Register
, 2> Src1Regs
, Src2Regs
, DstRegs
;
661 extractParts(MI
.getOperand(1).getReg(), NarrowTy
, NumParts
, Src1Regs
);
662 extractParts(MI
.getOperand(2).getReg(), NarrowTy
, NumParts
, Src2Regs
);
665 for (int i
= 0; i
< NumParts
; ++i
) {
666 Register DstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
667 Register CarryOut
= MRI
.createGenericVirtualRegister(LLT::scalar(1));
670 MIRBuilder
.buildUAddo(DstReg
, CarryOut
, Src1Regs
[i
], Src2Regs
[i
]);
672 MIRBuilder
.buildUAdde(DstReg
, CarryOut
, Src1Regs
[i
],
673 Src2Regs
[i
], CarryIn
);
676 DstRegs
.push_back(DstReg
);
679 Register DstReg
= MI
.getOperand(0).getReg();
680 if(MRI
.getType(DstReg
).isVector())
681 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
683 MIRBuilder
.buildMerge(DstReg
, DstRegs
);
684 MI
.eraseFromParent();
687 case TargetOpcode::G_SUB
: {
688 // FIXME: add support for when SizeOp0 isn't an exact multiple of
690 if (SizeOp0
% NarrowSize
!= 0)
691 return UnableToLegalize
;
693 int NumParts
= SizeOp0
/ NarrowTy
.getSizeInBits();
695 SmallVector
<Register
, 2> Src1Regs
, Src2Regs
, DstRegs
;
696 extractParts(MI
.getOperand(1).getReg(), NarrowTy
, NumParts
, Src1Regs
);
697 extractParts(MI
.getOperand(2).getReg(), NarrowTy
, NumParts
, Src2Regs
);
699 Register DstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
700 Register BorrowOut
= MRI
.createGenericVirtualRegister(LLT::scalar(1));
701 MIRBuilder
.buildInstr(TargetOpcode::G_USUBO
, {DstReg
, BorrowOut
},
702 {Src1Regs
[0], Src2Regs
[0]});
703 DstRegs
.push_back(DstReg
);
704 Register BorrowIn
= BorrowOut
;
705 for (int i
= 1; i
< NumParts
; ++i
) {
706 DstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
707 BorrowOut
= MRI
.createGenericVirtualRegister(LLT::scalar(1));
709 MIRBuilder
.buildInstr(TargetOpcode::G_USUBE
, {DstReg
, BorrowOut
},
710 {Src1Regs
[i
], Src2Regs
[i
], BorrowIn
});
712 DstRegs
.push_back(DstReg
);
713 BorrowIn
= BorrowOut
;
715 MIRBuilder
.buildMerge(MI
.getOperand(0).getReg(), DstRegs
);
716 MI
.eraseFromParent();
719 case TargetOpcode::G_MUL
:
720 case TargetOpcode::G_UMULH
:
721 return narrowScalarMul(MI
, NarrowTy
);
722 case TargetOpcode::G_EXTRACT
:
723 return narrowScalarExtract(MI
, TypeIdx
, NarrowTy
);
724 case TargetOpcode::G_INSERT
:
725 return narrowScalarInsert(MI
, TypeIdx
, NarrowTy
);
726 case TargetOpcode::G_LOAD
: {
727 const auto &MMO
= **MI
.memoperands_begin();
728 Register DstReg
= MI
.getOperand(0).getReg();
729 LLT DstTy
= MRI
.getType(DstReg
);
730 if (DstTy
.isVector())
731 return UnableToLegalize
;
733 if (8 * MMO
.getSize() != DstTy
.getSizeInBits()) {
734 Register TmpReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
735 auto &MMO
= **MI
.memoperands_begin();
736 MIRBuilder
.buildLoad(TmpReg
, MI
.getOperand(1).getReg(), MMO
);
737 MIRBuilder
.buildAnyExt(DstReg
, TmpReg
);
738 MI
.eraseFromParent();
742 return reduceLoadStoreWidth(MI
, TypeIdx
, NarrowTy
);
744 case TargetOpcode::G_ZEXTLOAD
:
745 case TargetOpcode::G_SEXTLOAD
: {
746 bool ZExt
= MI
.getOpcode() == TargetOpcode::G_ZEXTLOAD
;
747 Register DstReg
= MI
.getOperand(0).getReg();
748 Register PtrReg
= MI
.getOperand(1).getReg();
750 Register TmpReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
751 auto &MMO
= **MI
.memoperands_begin();
752 if (MMO
.getSizeInBits() == NarrowSize
) {
753 MIRBuilder
.buildLoad(TmpReg
, PtrReg
, MMO
);
755 unsigned ExtLoad
= ZExt
? TargetOpcode::G_ZEXTLOAD
756 : TargetOpcode::G_SEXTLOAD
;
757 MIRBuilder
.buildInstr(ExtLoad
)
760 .addMemOperand(&MMO
);
764 MIRBuilder
.buildZExt(DstReg
, TmpReg
);
766 MIRBuilder
.buildSExt(DstReg
, TmpReg
);
768 MI
.eraseFromParent();
771 case TargetOpcode::G_STORE
: {
772 const auto &MMO
= **MI
.memoperands_begin();
774 Register SrcReg
= MI
.getOperand(0).getReg();
775 LLT SrcTy
= MRI
.getType(SrcReg
);
776 if (SrcTy
.isVector())
777 return UnableToLegalize
;
779 int NumParts
= SizeOp0
/ NarrowSize
;
780 unsigned HandledSize
= NumParts
* NarrowTy
.getSizeInBits();
781 unsigned LeftoverBits
= SrcTy
.getSizeInBits() - HandledSize
;
782 if (SrcTy
.isVector() && LeftoverBits
!= 0)
783 return UnableToLegalize
;
785 if (8 * MMO
.getSize() != SrcTy
.getSizeInBits()) {
786 Register TmpReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
787 auto &MMO
= **MI
.memoperands_begin();
788 MIRBuilder
.buildTrunc(TmpReg
, SrcReg
);
789 MIRBuilder
.buildStore(TmpReg
, MI
.getOperand(1).getReg(), MMO
);
790 MI
.eraseFromParent();
794 return reduceLoadStoreWidth(MI
, 0, NarrowTy
);
796 case TargetOpcode::G_SELECT
:
797 return narrowScalarSelect(MI
, TypeIdx
, NarrowTy
);
798 case TargetOpcode::G_AND
:
799 case TargetOpcode::G_OR
:
800 case TargetOpcode::G_XOR
: {
801 // Legalize bitwise operation:
802 // A = BinOp<Ty> B, C
804 // B1, ..., BN = G_UNMERGE_VALUES B
805 // C1, ..., CN = G_UNMERGE_VALUES C
806 // A1 = BinOp<Ty/N> B1, C2
808 // AN = BinOp<Ty/N> BN, CN
809 // A = G_MERGE_VALUES A1, ..., AN
810 return narrowScalarBasic(MI
, TypeIdx
, NarrowTy
);
812 case TargetOpcode::G_SHL
:
813 case TargetOpcode::G_LSHR
:
814 case TargetOpcode::G_ASHR
:
815 return narrowScalarShift(MI
, TypeIdx
, NarrowTy
);
816 case TargetOpcode::G_CTLZ
:
817 case TargetOpcode::G_CTLZ_ZERO_UNDEF
:
818 case TargetOpcode::G_CTTZ
:
819 case TargetOpcode::G_CTTZ_ZERO_UNDEF
:
820 case TargetOpcode::G_CTPOP
:
822 return UnableToLegalize
; // TODO
824 Observer
.changingInstr(MI
);
825 narrowScalarDst(MI
, NarrowTy
, 0, TargetOpcode::G_ZEXT
);
826 Observer
.changedInstr(MI
);
828 case TargetOpcode::G_INTTOPTR
:
830 return UnableToLegalize
;
832 Observer
.changingInstr(MI
);
833 narrowScalarSrc(MI
, NarrowTy
, 1);
834 Observer
.changedInstr(MI
);
836 case TargetOpcode::G_PTRTOINT
:
838 return UnableToLegalize
;
840 Observer
.changingInstr(MI
);
841 narrowScalarDst(MI
, NarrowTy
, 0, TargetOpcode::G_ZEXT
);
842 Observer
.changedInstr(MI
);
844 case TargetOpcode::G_PHI
: {
845 unsigned NumParts
= SizeOp0
/ NarrowSize
;
846 SmallVector
<Register
, 2> DstRegs
;
847 SmallVector
<SmallVector
<Register
, 2>, 2> SrcRegs
;
848 DstRegs
.resize(NumParts
);
849 SrcRegs
.resize(MI
.getNumOperands() / 2);
850 Observer
.changingInstr(MI
);
851 for (unsigned i
= 1; i
< MI
.getNumOperands(); i
+= 2) {
852 MachineBasicBlock
&OpMBB
= *MI
.getOperand(i
+ 1).getMBB();
853 MIRBuilder
.setInsertPt(OpMBB
, OpMBB
.getFirstTerminator());
854 extractParts(MI
.getOperand(i
).getReg(), NarrowTy
, NumParts
,
857 MachineBasicBlock
&MBB
= *MI
.getParent();
858 MIRBuilder
.setInsertPt(MBB
, MI
);
859 for (unsigned i
= 0; i
< NumParts
; ++i
) {
860 DstRegs
[i
] = MRI
.createGenericVirtualRegister(NarrowTy
);
861 MachineInstrBuilder MIB
=
862 MIRBuilder
.buildInstr(TargetOpcode::G_PHI
).addDef(DstRegs
[i
]);
863 for (unsigned j
= 1; j
< MI
.getNumOperands(); j
+= 2)
864 MIB
.addUse(SrcRegs
[j
/ 2][i
]).add(MI
.getOperand(j
+ 1));
866 MIRBuilder
.setInsertPt(MBB
, --MBB
.getFirstNonPHI());
867 MIRBuilder
.buildMerge(MI
.getOperand(0).getReg(), DstRegs
);
868 Observer
.changedInstr(MI
);
869 MI
.eraseFromParent();
872 case TargetOpcode::G_EXTRACT_VECTOR_ELT
:
873 case TargetOpcode::G_INSERT_VECTOR_ELT
: {
875 return UnableToLegalize
;
877 int OpIdx
= MI
.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT
? 2 : 3;
878 Observer
.changingInstr(MI
);
879 narrowScalarSrc(MI
, NarrowTy
, OpIdx
);
880 Observer
.changedInstr(MI
);
883 case TargetOpcode::G_ICMP
: {
884 uint64_t SrcSize
= MRI
.getType(MI
.getOperand(2).getReg()).getSizeInBits();
885 if (NarrowSize
* 2 != SrcSize
)
886 return UnableToLegalize
;
888 Observer
.changingInstr(MI
);
889 Register LHSL
= MRI
.createGenericVirtualRegister(NarrowTy
);
890 Register LHSH
= MRI
.createGenericVirtualRegister(NarrowTy
);
891 MIRBuilder
.buildUnmerge({LHSL
, LHSH
}, MI
.getOperand(2).getReg());
893 Register RHSL
= MRI
.createGenericVirtualRegister(NarrowTy
);
894 Register RHSH
= MRI
.createGenericVirtualRegister(NarrowTy
);
895 MIRBuilder
.buildUnmerge({RHSL
, RHSH
}, MI
.getOperand(3).getReg());
897 CmpInst::Predicate Pred
=
898 static_cast<CmpInst::Predicate
>(MI
.getOperand(1).getPredicate());
899 LLT ResTy
= MRI
.getType(MI
.getOperand(0).getReg());
901 if (Pred
== CmpInst::ICMP_EQ
|| Pred
== CmpInst::ICMP_NE
) {
902 MachineInstrBuilder XorL
= MIRBuilder
.buildXor(NarrowTy
, LHSL
, RHSL
);
903 MachineInstrBuilder XorH
= MIRBuilder
.buildXor(NarrowTy
, LHSH
, RHSH
);
904 MachineInstrBuilder Or
= MIRBuilder
.buildOr(NarrowTy
, XorL
, XorH
);
905 MachineInstrBuilder Zero
= MIRBuilder
.buildConstant(NarrowTy
, 0);
906 MIRBuilder
.buildICmp(Pred
, MI
.getOperand(0).getReg(), Or
, Zero
);
908 MachineInstrBuilder CmpH
= MIRBuilder
.buildICmp(Pred
, ResTy
, LHSH
, RHSH
);
909 MachineInstrBuilder CmpHEQ
=
910 MIRBuilder
.buildICmp(CmpInst::Predicate::ICMP_EQ
, ResTy
, LHSH
, RHSH
);
911 MachineInstrBuilder CmpLU
= MIRBuilder
.buildICmp(
912 ICmpInst::getUnsignedPredicate(Pred
), ResTy
, LHSL
, RHSL
);
913 MIRBuilder
.buildSelect(MI
.getOperand(0).getReg(), CmpHEQ
, CmpLU
, CmpH
);
915 Observer
.changedInstr(MI
);
916 MI
.eraseFromParent();
919 case TargetOpcode::G_SEXT_INREG
: {
921 return UnableToLegalize
;
923 if (!MI
.getOperand(2).isImm())
924 return UnableToLegalize
;
925 int64_t SizeInBits
= MI
.getOperand(2).getImm();
927 // So long as the new type has more bits than the bits we're extending we
928 // don't need to break it apart.
929 if (NarrowTy
.getScalarSizeInBits() >= SizeInBits
) {
930 Observer
.changingInstr(MI
);
931 // We don't lose any non-extension bits by truncating the src and
932 // sign-extending the dst.
933 MachineOperand
&MO1
= MI
.getOperand(1);
934 auto TruncMIB
= MIRBuilder
.buildTrunc(NarrowTy
, MO1
.getReg());
935 MO1
.setReg(TruncMIB
->getOperand(0).getReg());
937 MachineOperand
&MO2
= MI
.getOperand(0);
938 Register DstExt
= MRI
.createGenericVirtualRegister(NarrowTy
);
939 MIRBuilder
.setInsertPt(MIRBuilder
.getMBB(), ++MIRBuilder
.getInsertPt());
940 MIRBuilder
.buildInstr(TargetOpcode::G_SEXT
, {MO2
.getReg()}, {DstExt
});
942 Observer
.changedInstr(MI
);
946 // Break it apart. Components below the extension point are unmodified. The
947 // component containing the extension point becomes a narrower SEXT_INREG.
948 // Components above it are ashr'd from the component containing the
950 if (SizeOp0
% NarrowSize
!= 0)
951 return UnableToLegalize
;
952 int NumParts
= SizeOp0
/ NarrowSize
;
954 // List the registers where the destination will be scattered.
955 SmallVector
<Register
, 2> DstRegs
;
956 // List the registers where the source will be split.
957 SmallVector
<Register
, 2> SrcRegs
;
959 // Create all the temporary registers.
960 for (int i
= 0; i
< NumParts
; ++i
) {
961 Register SrcReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
963 SrcRegs
.push_back(SrcReg
);
966 // Explode the big arguments into smaller chunks.
967 MIRBuilder
.buildUnmerge(SrcRegs
, MI
.getOperand(1).getReg());
969 Register AshrCstReg
=
970 MIRBuilder
.buildConstant(NarrowTy
, NarrowTy
.getScalarSizeInBits() - 1)
973 Register FullExtensionReg
= 0;
974 Register PartialExtensionReg
= 0;
976 // Do the operation on each small part.
977 for (int i
= 0; i
< NumParts
; ++i
) {
978 if ((i
+ 1) * NarrowTy
.getScalarSizeInBits() < SizeInBits
)
979 DstRegs
.push_back(SrcRegs
[i
]);
980 else if (i
* NarrowTy
.getScalarSizeInBits() > SizeInBits
) {
981 assert(PartialExtensionReg
&&
982 "Expected to visit partial extension before full");
983 if (FullExtensionReg
) {
984 DstRegs
.push_back(FullExtensionReg
);
987 DstRegs
.push_back(MIRBuilder
988 .buildInstr(TargetOpcode::G_ASHR
, {NarrowTy
},
989 {PartialExtensionReg
, AshrCstReg
})
992 FullExtensionReg
= DstRegs
.back();
997 TargetOpcode::G_SEXT_INREG
, {NarrowTy
},
998 {SrcRegs
[i
], SizeInBits
% NarrowTy
.getScalarSizeInBits()})
1001 PartialExtensionReg
= DstRegs
.back();
1005 // Gather the destination registers into the final destination.
1006 Register DstReg
= MI
.getOperand(0).getReg();
1007 MIRBuilder
.buildMerge(DstReg
, DstRegs
);
1008 MI
.eraseFromParent();
1014 void LegalizerHelper::widenScalarSrc(MachineInstr
&MI
, LLT WideTy
,
1015 unsigned OpIdx
, unsigned ExtOpcode
) {
1016 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
1017 auto ExtB
= MIRBuilder
.buildInstr(ExtOpcode
, {WideTy
}, {MO
.getReg()});
1018 MO
.setReg(ExtB
->getOperand(0).getReg());
1021 void LegalizerHelper::narrowScalarSrc(MachineInstr
&MI
, LLT NarrowTy
,
1023 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
1024 auto ExtB
= MIRBuilder
.buildInstr(TargetOpcode::G_TRUNC
, {NarrowTy
},
1026 MO
.setReg(ExtB
->getOperand(0).getReg());
1029 void LegalizerHelper::widenScalarDst(MachineInstr
&MI
, LLT WideTy
,
1030 unsigned OpIdx
, unsigned TruncOpcode
) {
1031 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
1032 Register DstExt
= MRI
.createGenericVirtualRegister(WideTy
);
1033 MIRBuilder
.setInsertPt(MIRBuilder
.getMBB(), ++MIRBuilder
.getInsertPt());
1034 MIRBuilder
.buildInstr(TruncOpcode
, {MO
.getReg()}, {DstExt
});
1038 void LegalizerHelper::narrowScalarDst(MachineInstr
&MI
, LLT NarrowTy
,
1039 unsigned OpIdx
, unsigned ExtOpcode
) {
1040 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
1041 Register DstTrunc
= MRI
.createGenericVirtualRegister(NarrowTy
);
1042 MIRBuilder
.setInsertPt(MIRBuilder
.getMBB(), ++MIRBuilder
.getInsertPt());
1043 MIRBuilder
.buildInstr(ExtOpcode
, {MO
.getReg()}, {DstTrunc
});
1044 MO
.setReg(DstTrunc
);
1047 void LegalizerHelper::moreElementsVectorDst(MachineInstr
&MI
, LLT WideTy
,
1049 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
1050 Register DstExt
= MRI
.createGenericVirtualRegister(WideTy
);
1051 MIRBuilder
.setInsertPt(MIRBuilder
.getMBB(), ++MIRBuilder
.getInsertPt());
1052 MIRBuilder
.buildExtract(MO
.getReg(), DstExt
, 0);
1056 void LegalizerHelper::moreElementsVectorSrc(MachineInstr
&MI
, LLT MoreTy
,
1058 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
1060 LLT OldTy
= MRI
.getType(MO
.getReg());
1061 unsigned OldElts
= OldTy
.getNumElements();
1062 unsigned NewElts
= MoreTy
.getNumElements();
1064 unsigned NumParts
= NewElts
/ OldElts
;
1066 // Use concat_vectors if the result is a multiple of the number of elements.
1067 if (NumParts
* OldElts
== NewElts
) {
1068 SmallVector
<Register
, 8> Parts
;
1069 Parts
.push_back(MO
.getReg());
1071 Register ImpDef
= MIRBuilder
.buildUndef(OldTy
).getReg(0);
1072 for (unsigned I
= 1; I
!= NumParts
; ++I
)
1073 Parts
.push_back(ImpDef
);
1075 auto Concat
= MIRBuilder
.buildConcatVectors(MoreTy
, Parts
);
1076 MO
.setReg(Concat
.getReg(0));
1080 Register MoreReg
= MRI
.createGenericVirtualRegister(MoreTy
);
1081 Register ImpDef
= MIRBuilder
.buildUndef(MoreTy
).getReg(0);
1082 MIRBuilder
.buildInsert(MoreReg
, ImpDef
, MO
.getReg(), 0);
1086 LegalizerHelper::LegalizeResult
1087 LegalizerHelper::widenScalarMergeValues(MachineInstr
&MI
, unsigned TypeIdx
,
1090 return UnableToLegalize
;
1092 Register DstReg
= MI
.getOperand(0).getReg();
1093 LLT DstTy
= MRI
.getType(DstReg
);
1094 if (DstTy
.isVector())
1095 return UnableToLegalize
;
1097 Register Src1
= MI
.getOperand(1).getReg();
1098 LLT SrcTy
= MRI
.getType(Src1
);
1099 const int DstSize
= DstTy
.getSizeInBits();
1100 const int SrcSize
= SrcTy
.getSizeInBits();
1101 const int WideSize
= WideTy
.getSizeInBits();
1102 const int NumMerge
= (DstSize
+ WideSize
- 1) / WideSize
;
1104 unsigned NumOps
= MI
.getNumOperands();
1105 unsigned NumSrc
= MI
.getNumOperands() - 1;
1106 unsigned PartSize
= DstTy
.getSizeInBits() / NumSrc
;
1108 if (WideSize
>= DstSize
) {
1109 // Directly pack the bits in the target type.
1110 Register ResultReg
= MIRBuilder
.buildZExt(WideTy
, Src1
).getReg(0);
1112 for (unsigned I
= 2; I
!= NumOps
; ++I
) {
1113 const unsigned Offset
= (I
- 1) * PartSize
;
1115 Register SrcReg
= MI
.getOperand(I
).getReg();
1116 assert(MRI
.getType(SrcReg
) == LLT::scalar(PartSize
));
1118 auto ZextInput
= MIRBuilder
.buildZExt(WideTy
, SrcReg
);
1120 Register NextResult
= I
+ 1 == NumOps
&& WideTy
== DstTy
? DstReg
:
1121 MRI
.createGenericVirtualRegister(WideTy
);
1123 auto ShiftAmt
= MIRBuilder
.buildConstant(WideTy
, Offset
);
1124 auto Shl
= MIRBuilder
.buildShl(WideTy
, ZextInput
, ShiftAmt
);
1125 MIRBuilder
.buildOr(NextResult
, ResultReg
, Shl
);
1126 ResultReg
= NextResult
;
1129 if (WideSize
> DstSize
)
1130 MIRBuilder
.buildTrunc(DstReg
, ResultReg
);
1131 else if (DstTy
.isPointer())
1132 MIRBuilder
.buildIntToPtr(DstReg
, ResultReg
);
1134 MI
.eraseFromParent();
1138 // Unmerge the original values to the GCD type, and recombine to the next
1139 // multiple greater than the original type.
1141 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
1142 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
1143 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
1144 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
1145 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
1146 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1147 // %12:_(s12) = G_MERGE_VALUES %10, %11
1149 // Padding with undef if necessary:
1151 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1152 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1153 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1154 // %7:_(s2) = G_IMPLICIT_DEF
1155 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1156 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1157 // %10:_(s12) = G_MERGE_VALUES %8, %9
1159 const int GCD
= greatestCommonDivisor(SrcSize
, WideSize
);
1160 LLT GCDTy
= LLT::scalar(GCD
);
1162 SmallVector
<Register
, 8> Parts
;
1163 SmallVector
<Register
, 8> NewMergeRegs
;
1164 SmallVector
<Register
, 8> Unmerges
;
1165 LLT WideDstTy
= LLT::scalar(NumMerge
* WideSize
);
1167 // Decompose the original operands if they don't evenly divide.
1168 for (int I
= 1, E
= MI
.getNumOperands(); I
!= E
; ++I
) {
1169 Register SrcReg
= MI
.getOperand(I
).getReg();
1170 if (GCD
== SrcSize
) {
1171 Unmerges
.push_back(SrcReg
);
1173 auto Unmerge
= MIRBuilder
.buildUnmerge(GCDTy
, SrcReg
);
1174 for (int J
= 0, JE
= Unmerge
->getNumOperands() - 1; J
!= JE
; ++J
)
1175 Unmerges
.push_back(Unmerge
.getReg(J
));
1179 // Pad with undef to the next size that is a multiple of the requested size.
1180 if (static_cast<int>(Unmerges
.size()) != NumMerge
* WideSize
) {
1181 Register UndefReg
= MIRBuilder
.buildUndef(GCDTy
).getReg(0);
1182 for (int I
= Unmerges
.size(); I
!= NumMerge
* WideSize
; ++I
)
1183 Unmerges
.push_back(UndefReg
);
1186 const int PartsPerGCD
= WideSize
/ GCD
;
1188 // Build merges of each piece.
1189 ArrayRef
<Register
> Slicer(Unmerges
);
1190 for (int I
= 0; I
!= NumMerge
; ++I
, Slicer
= Slicer
.drop_front(PartsPerGCD
)) {
1191 auto Merge
= MIRBuilder
.buildMerge(WideTy
, Slicer
.take_front(PartsPerGCD
));
1192 NewMergeRegs
.push_back(Merge
.getReg(0));
1195 // A truncate may be necessary if the requested type doesn't evenly divide the
1196 // original result type.
1197 if (DstTy
.getSizeInBits() == WideDstTy
.getSizeInBits()) {
1198 MIRBuilder
.buildMerge(DstReg
, NewMergeRegs
);
1200 auto FinalMerge
= MIRBuilder
.buildMerge(WideDstTy
, NewMergeRegs
);
1201 MIRBuilder
.buildTrunc(DstReg
, FinalMerge
.getReg(0));
1204 MI
.eraseFromParent();
1208 LegalizerHelper::LegalizeResult
1209 LegalizerHelper::widenScalarUnmergeValues(MachineInstr
&MI
, unsigned TypeIdx
,
1212 return UnableToLegalize
;
1214 unsigned NumDst
= MI
.getNumOperands() - 1;
1215 Register SrcReg
= MI
.getOperand(NumDst
).getReg();
1216 LLT SrcTy
= MRI
.getType(SrcReg
);
1217 if (!SrcTy
.isScalar())
1218 return UnableToLegalize
;
1220 Register Dst0Reg
= MI
.getOperand(0).getReg();
1221 LLT DstTy
= MRI
.getType(Dst0Reg
);
1222 if (!DstTy
.isScalar())
1223 return UnableToLegalize
;
1225 unsigned NewSrcSize
= NumDst
* WideTy
.getSizeInBits();
1226 LLT NewSrcTy
= LLT::scalar(NewSrcSize
);
1227 unsigned SizeDiff
= WideTy
.getSizeInBits() - DstTy
.getSizeInBits();
1229 auto WideSrc
= MIRBuilder
.buildZExt(NewSrcTy
, SrcReg
);
1231 for (unsigned I
= 1; I
!= NumDst
; ++I
) {
1232 auto ShiftAmt
= MIRBuilder
.buildConstant(NewSrcTy
, SizeDiff
* I
);
1233 auto Shl
= MIRBuilder
.buildShl(NewSrcTy
, WideSrc
, ShiftAmt
);
1234 WideSrc
= MIRBuilder
.buildOr(NewSrcTy
, WideSrc
, Shl
);
1237 Observer
.changingInstr(MI
);
1239 MI
.getOperand(NumDst
).setReg(WideSrc
->getOperand(0).getReg());
1240 for (unsigned I
= 0; I
!= NumDst
; ++I
)
1241 widenScalarDst(MI
, WideTy
, I
);
1243 Observer
.changedInstr(MI
);
1248 LegalizerHelper::LegalizeResult
1249 LegalizerHelper::widenScalarExtract(MachineInstr
&MI
, unsigned TypeIdx
,
1251 Register DstReg
= MI
.getOperand(0).getReg();
1252 Register SrcReg
= MI
.getOperand(1).getReg();
1253 LLT SrcTy
= MRI
.getType(SrcReg
);
1255 LLT DstTy
= MRI
.getType(DstReg
);
1256 unsigned Offset
= MI
.getOperand(2).getImm();
1259 if (SrcTy
.isVector() || DstTy
.isVector())
1260 return UnableToLegalize
;
1263 if (SrcTy
.isPointer()) {
1264 // Extracts from pointers can be handled only if they are really just
1266 const DataLayout
&DL
= MIRBuilder
.getDataLayout();
1267 if (DL
.isNonIntegralAddressSpace(SrcTy
.getAddressSpace()))
1268 return UnableToLegalize
;
1270 LLT SrcAsIntTy
= LLT::scalar(SrcTy
.getSizeInBits());
1271 Src
= MIRBuilder
.buildPtrToInt(SrcAsIntTy
, Src
);
1275 if (DstTy
.isPointer())
1276 return UnableToLegalize
;
1279 // Avoid a shift in the degenerate case.
1280 MIRBuilder
.buildTrunc(DstReg
,
1281 MIRBuilder
.buildAnyExtOrTrunc(WideTy
, Src
));
1282 MI
.eraseFromParent();
1286 // Do a shift in the source type.
1287 LLT ShiftTy
= SrcTy
;
1288 if (WideTy
.getSizeInBits() > SrcTy
.getSizeInBits()) {
1289 Src
= MIRBuilder
.buildAnyExt(WideTy
, Src
);
1291 } else if (WideTy
.getSizeInBits() > SrcTy
.getSizeInBits())
1292 return UnableToLegalize
;
1294 auto LShr
= MIRBuilder
.buildLShr(
1295 ShiftTy
, Src
, MIRBuilder
.buildConstant(ShiftTy
, Offset
));
1296 MIRBuilder
.buildTrunc(DstReg
, LShr
);
1297 MI
.eraseFromParent();
1301 if (SrcTy
.isScalar()) {
1302 Observer
.changingInstr(MI
);
1303 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
1304 Observer
.changedInstr(MI
);
1308 if (!SrcTy
.isVector())
1309 return UnableToLegalize
;
1311 if (DstTy
!= SrcTy
.getElementType())
1312 return UnableToLegalize
;
1314 if (Offset
% SrcTy
.getScalarSizeInBits() != 0)
1315 return UnableToLegalize
;
1317 Observer
.changingInstr(MI
);
1318 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
1320 MI
.getOperand(2).setImm((WideTy
.getSizeInBits() / SrcTy
.getSizeInBits()) *
1322 widenScalarDst(MI
, WideTy
.getScalarType(), 0);
1323 Observer
.changedInstr(MI
);
1327 LegalizerHelper::LegalizeResult
1328 LegalizerHelper::widenScalarInsert(MachineInstr
&MI
, unsigned TypeIdx
,
1331 return UnableToLegalize
;
1332 Observer
.changingInstr(MI
);
1333 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
1334 widenScalarDst(MI
, WideTy
);
1335 Observer
.changedInstr(MI
);
1339 LegalizerHelper::LegalizeResult
1340 LegalizerHelper::widenScalar(MachineInstr
&MI
, unsigned TypeIdx
, LLT WideTy
) {
1341 MIRBuilder
.setInstr(MI
);
1343 switch (MI
.getOpcode()) {
1345 return UnableToLegalize
;
1346 case TargetOpcode::G_EXTRACT
:
1347 return widenScalarExtract(MI
, TypeIdx
, WideTy
);
1348 case TargetOpcode::G_INSERT
:
1349 return widenScalarInsert(MI
, TypeIdx
, WideTy
);
1350 case TargetOpcode::G_MERGE_VALUES
:
1351 return widenScalarMergeValues(MI
, TypeIdx
, WideTy
);
1352 case TargetOpcode::G_UNMERGE_VALUES
:
1353 return widenScalarUnmergeValues(MI
, TypeIdx
, WideTy
);
1354 case TargetOpcode::G_UADDO
:
1355 case TargetOpcode::G_USUBO
: {
1357 return UnableToLegalize
; // TODO
1358 auto LHSZext
= MIRBuilder
.buildInstr(TargetOpcode::G_ZEXT
, {WideTy
},
1359 {MI
.getOperand(2).getReg()});
1360 auto RHSZext
= MIRBuilder
.buildInstr(TargetOpcode::G_ZEXT
, {WideTy
},
1361 {MI
.getOperand(3).getReg()});
1362 unsigned Opcode
= MI
.getOpcode() == TargetOpcode::G_UADDO
1363 ? TargetOpcode::G_ADD
1364 : TargetOpcode::G_SUB
;
1365 // Do the arithmetic in the larger type.
1366 auto NewOp
= MIRBuilder
.buildInstr(Opcode
, {WideTy
}, {LHSZext
, RHSZext
});
1367 LLT OrigTy
= MRI
.getType(MI
.getOperand(0).getReg());
1368 APInt Mask
= APInt::getAllOnesValue(OrigTy
.getSizeInBits());
1369 auto AndOp
= MIRBuilder
.buildInstr(
1370 TargetOpcode::G_AND
, {WideTy
},
1371 {NewOp
, MIRBuilder
.buildConstant(WideTy
, Mask
.getZExtValue())});
1372 // There is no overflow if the AndOp is the same as NewOp.
1373 MIRBuilder
.buildICmp(CmpInst::ICMP_NE
, MI
.getOperand(1).getReg(), NewOp
,
1375 // Now trunc the NewOp to the original result.
1376 MIRBuilder
.buildTrunc(MI
.getOperand(0).getReg(), NewOp
);
1377 MI
.eraseFromParent();
1380 case TargetOpcode::G_CTTZ
:
1381 case TargetOpcode::G_CTTZ_ZERO_UNDEF
:
1382 case TargetOpcode::G_CTLZ
:
1383 case TargetOpcode::G_CTLZ_ZERO_UNDEF
:
1384 case TargetOpcode::G_CTPOP
: {
1386 Observer
.changingInstr(MI
);
1387 widenScalarDst(MI
, WideTy
, 0);
1388 Observer
.changedInstr(MI
);
1392 Register SrcReg
= MI
.getOperand(1).getReg();
1394 // First ZEXT the input.
1395 auto MIBSrc
= MIRBuilder
.buildZExt(WideTy
, SrcReg
);
1396 LLT CurTy
= MRI
.getType(SrcReg
);
1397 if (MI
.getOpcode() == TargetOpcode::G_CTTZ
) {
1398 // The count is the same in the larger type except if the original
1399 // value was zero. This can be handled by setting the bit just off
1400 // the top of the original type.
1402 APInt::getOneBitSet(WideTy
.getSizeInBits(), CurTy
.getSizeInBits());
1403 MIBSrc
= MIRBuilder
.buildOr(
1404 WideTy
, MIBSrc
, MIRBuilder
.buildConstant(WideTy
, TopBit
));
1407 // Perform the operation at the larger size.
1408 auto MIBNewOp
= MIRBuilder
.buildInstr(MI
.getOpcode(), {WideTy
}, {MIBSrc
});
1409 // This is already the correct result for CTPOP and CTTZs
1410 if (MI
.getOpcode() == TargetOpcode::G_CTLZ
||
1411 MI
.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF
) {
1412 // The correct result is NewOp - (Difference in widety and current ty).
1413 unsigned SizeDiff
= WideTy
.getSizeInBits() - CurTy
.getSizeInBits();
1414 MIBNewOp
= MIRBuilder
.buildInstr(
1415 TargetOpcode::G_SUB
, {WideTy
},
1416 {MIBNewOp
, MIRBuilder
.buildConstant(WideTy
, SizeDiff
)});
1419 MIRBuilder
.buildZExtOrTrunc(MI
.getOperand(0), MIBNewOp
);
1420 MI
.eraseFromParent();
1423 case TargetOpcode::G_BSWAP
: {
1424 Observer
.changingInstr(MI
);
1425 Register DstReg
= MI
.getOperand(0).getReg();
1427 Register ShrReg
= MRI
.createGenericVirtualRegister(WideTy
);
1428 Register DstExt
= MRI
.createGenericVirtualRegister(WideTy
);
1429 Register ShiftAmtReg
= MRI
.createGenericVirtualRegister(WideTy
);
1430 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
1432 MI
.getOperand(0).setReg(DstExt
);
1434 MIRBuilder
.setInsertPt(MIRBuilder
.getMBB(), ++MIRBuilder
.getInsertPt());
1436 LLT Ty
= MRI
.getType(DstReg
);
1437 unsigned DiffBits
= WideTy
.getScalarSizeInBits() - Ty
.getScalarSizeInBits();
1438 MIRBuilder
.buildConstant(ShiftAmtReg
, DiffBits
);
1439 MIRBuilder
.buildInstr(TargetOpcode::G_LSHR
)
1442 .addUse(ShiftAmtReg
);
1444 MIRBuilder
.buildTrunc(DstReg
, ShrReg
);
1445 Observer
.changedInstr(MI
);
1448 case TargetOpcode::G_ADD
:
1449 case TargetOpcode::G_AND
:
1450 case TargetOpcode::G_MUL
:
1451 case TargetOpcode::G_OR
:
1452 case TargetOpcode::G_XOR
:
1453 case TargetOpcode::G_SUB
:
1454 // Perform operation at larger width (any extension is fines here, high bits
1455 // don't affect the result) and then truncate the result back to the
1457 Observer
.changingInstr(MI
);
1458 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
1459 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ANYEXT
);
1460 widenScalarDst(MI
, WideTy
);
1461 Observer
.changedInstr(MI
);
1464 case TargetOpcode::G_SHL
:
1465 Observer
.changingInstr(MI
);
1468 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
1469 widenScalarDst(MI
, WideTy
);
1471 assert(TypeIdx
== 1);
1472 // The "number of bits to shift" operand must preserve its value as an
1473 // unsigned integer:
1474 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ZEXT
);
1477 Observer
.changedInstr(MI
);
1480 case TargetOpcode::G_SDIV
:
1481 case TargetOpcode::G_SREM
:
1482 case TargetOpcode::G_SMIN
:
1483 case TargetOpcode::G_SMAX
:
1484 Observer
.changingInstr(MI
);
1485 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_SEXT
);
1486 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_SEXT
);
1487 widenScalarDst(MI
, WideTy
);
1488 Observer
.changedInstr(MI
);
1491 case TargetOpcode::G_ASHR
:
1492 case TargetOpcode::G_LSHR
:
1493 Observer
.changingInstr(MI
);
1496 unsigned CvtOp
= MI
.getOpcode() == TargetOpcode::G_ASHR
?
1497 TargetOpcode::G_SEXT
: TargetOpcode::G_ZEXT
;
1499 widenScalarSrc(MI
, WideTy
, 1, CvtOp
);
1500 widenScalarDst(MI
, WideTy
);
1502 assert(TypeIdx
== 1);
1503 // The "number of bits to shift" operand must preserve its value as an
1504 // unsigned integer:
1505 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ZEXT
);
1508 Observer
.changedInstr(MI
);
1510 case TargetOpcode::G_UDIV
:
1511 case TargetOpcode::G_UREM
:
1512 case TargetOpcode::G_UMIN
:
1513 case TargetOpcode::G_UMAX
:
1514 Observer
.changingInstr(MI
);
1515 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ZEXT
);
1516 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ZEXT
);
1517 widenScalarDst(MI
, WideTy
);
1518 Observer
.changedInstr(MI
);
1521 case TargetOpcode::G_SELECT
:
1522 Observer
.changingInstr(MI
);
1524 // Perform operation at larger width (any extension is fine here, high
1525 // bits don't affect the result) and then truncate the result back to the
1527 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ANYEXT
);
1528 widenScalarSrc(MI
, WideTy
, 3, TargetOpcode::G_ANYEXT
);
1529 widenScalarDst(MI
, WideTy
);
1531 bool IsVec
= MRI
.getType(MI
.getOperand(1).getReg()).isVector();
1532 // Explicit extension is required here since high bits affect the result.
1533 widenScalarSrc(MI
, WideTy
, 1, MIRBuilder
.getBoolExtOp(IsVec
, false));
1535 Observer
.changedInstr(MI
);
1538 case TargetOpcode::G_FPTOSI
:
1539 case TargetOpcode::G_FPTOUI
:
1541 return UnableToLegalize
;
1542 Observer
.changingInstr(MI
);
1543 widenScalarDst(MI
, WideTy
);
1544 Observer
.changedInstr(MI
);
1547 case TargetOpcode::G_SITOFP
:
1549 return UnableToLegalize
;
1550 Observer
.changingInstr(MI
);
1551 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_SEXT
);
1552 Observer
.changedInstr(MI
);
1555 case TargetOpcode::G_UITOFP
:
1557 return UnableToLegalize
;
1558 Observer
.changingInstr(MI
);
1559 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ZEXT
);
1560 Observer
.changedInstr(MI
);
1563 case TargetOpcode::G_LOAD
:
1564 case TargetOpcode::G_SEXTLOAD
:
1565 case TargetOpcode::G_ZEXTLOAD
:
1566 Observer
.changingInstr(MI
);
1567 widenScalarDst(MI
, WideTy
);
1568 Observer
.changedInstr(MI
);
1571 case TargetOpcode::G_STORE
: {
1573 return UnableToLegalize
;
1575 LLT Ty
= MRI
.getType(MI
.getOperand(0).getReg());
1576 if (!isPowerOf2_32(Ty
.getSizeInBits()))
1577 return UnableToLegalize
;
1579 Observer
.changingInstr(MI
);
1581 unsigned ExtType
= Ty
.getScalarSizeInBits() == 1 ?
1582 TargetOpcode::G_ZEXT
: TargetOpcode::G_ANYEXT
;
1583 widenScalarSrc(MI
, WideTy
, 0, ExtType
);
1585 Observer
.changedInstr(MI
);
1588 case TargetOpcode::G_CONSTANT
: {
1589 MachineOperand
&SrcMO
= MI
.getOperand(1);
1590 LLVMContext
&Ctx
= MIRBuilder
.getMF().getFunction().getContext();
1591 const APInt
&Val
= SrcMO
.getCImm()->getValue().sext(WideTy
.getSizeInBits());
1592 Observer
.changingInstr(MI
);
1593 SrcMO
.setCImm(ConstantInt::get(Ctx
, Val
));
1595 widenScalarDst(MI
, WideTy
);
1596 Observer
.changedInstr(MI
);
1599 case TargetOpcode::G_FCONSTANT
: {
1600 MachineOperand
&SrcMO
= MI
.getOperand(1);
1601 LLVMContext
&Ctx
= MIRBuilder
.getMF().getFunction().getContext();
1602 APFloat Val
= SrcMO
.getFPImm()->getValueAPF();
1604 switch (WideTy
.getSizeInBits()) {
1606 Val
.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven
,
1610 Val
.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven
,
1614 return UnableToLegalize
;
1617 assert(!LosesInfo
&& "extend should always be lossless");
1619 Observer
.changingInstr(MI
);
1620 SrcMO
.setFPImm(ConstantFP::get(Ctx
, Val
));
1622 widenScalarDst(MI
, WideTy
, 0, TargetOpcode::G_FPTRUNC
);
1623 Observer
.changedInstr(MI
);
1626 case TargetOpcode::G_IMPLICIT_DEF
: {
1627 Observer
.changingInstr(MI
);
1628 widenScalarDst(MI
, WideTy
);
1629 Observer
.changedInstr(MI
);
1632 case TargetOpcode::G_BRCOND
:
1633 Observer
.changingInstr(MI
);
1634 widenScalarSrc(MI
, WideTy
, 0, MIRBuilder
.getBoolExtOp(false, false));
1635 Observer
.changedInstr(MI
);
1638 case TargetOpcode::G_FCMP
:
1639 Observer
.changingInstr(MI
);
1641 widenScalarDst(MI
, WideTy
);
1643 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_FPEXT
);
1644 widenScalarSrc(MI
, WideTy
, 3, TargetOpcode::G_FPEXT
);
1646 Observer
.changedInstr(MI
);
1649 case TargetOpcode::G_ICMP
:
1650 Observer
.changingInstr(MI
);
1652 widenScalarDst(MI
, WideTy
);
1654 unsigned ExtOpcode
= CmpInst::isSigned(static_cast<CmpInst::Predicate
>(
1655 MI
.getOperand(1).getPredicate()))
1656 ? TargetOpcode::G_SEXT
1657 : TargetOpcode::G_ZEXT
;
1658 widenScalarSrc(MI
, WideTy
, 2, ExtOpcode
);
1659 widenScalarSrc(MI
, WideTy
, 3, ExtOpcode
);
1661 Observer
.changedInstr(MI
);
1664 case TargetOpcode::G_GEP
:
1665 assert(TypeIdx
== 1 && "unable to legalize pointer of GEP");
1666 Observer
.changingInstr(MI
);
1667 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_SEXT
);
1668 Observer
.changedInstr(MI
);
1671 case TargetOpcode::G_PHI
: {
1672 assert(TypeIdx
== 0 && "Expecting only Idx 0");
1674 Observer
.changingInstr(MI
);
1675 for (unsigned I
= 1; I
< MI
.getNumOperands(); I
+= 2) {
1676 MachineBasicBlock
&OpMBB
= *MI
.getOperand(I
+ 1).getMBB();
1677 MIRBuilder
.setInsertPt(OpMBB
, OpMBB
.getFirstTerminator());
1678 widenScalarSrc(MI
, WideTy
, I
, TargetOpcode::G_ANYEXT
);
1681 MachineBasicBlock
&MBB
= *MI
.getParent();
1682 MIRBuilder
.setInsertPt(MBB
, --MBB
.getFirstNonPHI());
1683 widenScalarDst(MI
, WideTy
);
1684 Observer
.changedInstr(MI
);
1687 case TargetOpcode::G_EXTRACT_VECTOR_ELT
: {
1689 Register VecReg
= MI
.getOperand(1).getReg();
1690 LLT VecTy
= MRI
.getType(VecReg
);
1691 Observer
.changingInstr(MI
);
1693 widenScalarSrc(MI
, LLT::vector(VecTy
.getNumElements(),
1694 WideTy
.getSizeInBits()),
1695 1, TargetOpcode::G_SEXT
);
1697 widenScalarDst(MI
, WideTy
, 0);
1698 Observer
.changedInstr(MI
);
1703 return UnableToLegalize
;
1704 Observer
.changingInstr(MI
);
1705 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_SEXT
);
1706 Observer
.changedInstr(MI
);
1709 case TargetOpcode::G_FADD
:
1710 case TargetOpcode::G_FMUL
:
1711 case TargetOpcode::G_FSUB
:
1712 case TargetOpcode::G_FMA
:
1713 case TargetOpcode::G_FNEG
:
1714 case TargetOpcode::G_FABS
:
1715 case TargetOpcode::G_FCANONICALIZE
:
1716 case TargetOpcode::G_FMINNUM
:
1717 case TargetOpcode::G_FMAXNUM
:
1718 case TargetOpcode::G_FMINNUM_IEEE
:
1719 case TargetOpcode::G_FMAXNUM_IEEE
:
1720 case TargetOpcode::G_FMINIMUM
:
1721 case TargetOpcode::G_FMAXIMUM
:
1722 case TargetOpcode::G_FDIV
:
1723 case TargetOpcode::G_FREM
:
1724 case TargetOpcode::G_FCEIL
:
1725 case TargetOpcode::G_FFLOOR
:
1726 case TargetOpcode::G_FCOS
:
1727 case TargetOpcode::G_FSIN
:
1728 case TargetOpcode::G_FLOG10
:
1729 case TargetOpcode::G_FLOG
:
1730 case TargetOpcode::G_FLOG2
:
1731 case TargetOpcode::G_FRINT
:
1732 case TargetOpcode::G_FNEARBYINT
:
1733 case TargetOpcode::G_FSQRT
:
1734 case TargetOpcode::G_FEXP
:
1735 case TargetOpcode::G_FEXP2
:
1736 case TargetOpcode::G_FPOW
:
1737 case TargetOpcode::G_INTRINSIC_TRUNC
:
1738 case TargetOpcode::G_INTRINSIC_ROUND
:
1739 assert(TypeIdx
== 0);
1740 Observer
.changingInstr(MI
);
1742 for (unsigned I
= 1, E
= MI
.getNumOperands(); I
!= E
; ++I
)
1743 widenScalarSrc(MI
, WideTy
, I
, TargetOpcode::G_FPEXT
);
1745 widenScalarDst(MI
, WideTy
, 0, TargetOpcode::G_FPTRUNC
);
1746 Observer
.changedInstr(MI
);
1748 case TargetOpcode::G_INTTOPTR
:
1750 return UnableToLegalize
;
1752 Observer
.changingInstr(MI
);
1753 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ZEXT
);
1754 Observer
.changedInstr(MI
);
1756 case TargetOpcode::G_PTRTOINT
:
1758 return UnableToLegalize
;
1760 Observer
.changingInstr(MI
);
1761 widenScalarDst(MI
, WideTy
, 0);
1762 Observer
.changedInstr(MI
);
1764 case TargetOpcode::G_BUILD_VECTOR
: {
1765 Observer
.changingInstr(MI
);
1767 const LLT WideEltTy
= TypeIdx
== 1 ? WideTy
: WideTy
.getElementType();
1768 for (int I
= 1, E
= MI
.getNumOperands(); I
!= E
; ++I
)
1769 widenScalarSrc(MI
, WideEltTy
, I
, TargetOpcode::G_ANYEXT
);
1771 // Avoid changing the result vector type if the source element type was
1774 auto &TII
= *MI
.getMF()->getSubtarget().getInstrInfo();
1775 MI
.setDesc(TII
.get(TargetOpcode::G_BUILD_VECTOR_TRUNC
));
1777 widenScalarDst(MI
, WideTy
, 0);
1780 Observer
.changedInstr(MI
);
1783 case TargetOpcode::G_SEXT_INREG
:
1785 return UnableToLegalize
;
1787 Observer
.changingInstr(MI
);
1788 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
1789 widenScalarDst(MI
, WideTy
, 0, TargetOpcode::G_TRUNC
);
1790 Observer
.changedInstr(MI
);
1795 LegalizerHelper::LegalizeResult
1796 LegalizerHelper::lower(MachineInstr
&MI
, unsigned TypeIdx
, LLT Ty
) {
1797 using namespace TargetOpcode
;
1798 MIRBuilder
.setInstr(MI
);
1800 switch(MI
.getOpcode()) {
1802 return UnableToLegalize
;
1803 case TargetOpcode::G_SREM
:
1804 case TargetOpcode::G_UREM
: {
1805 Register QuotReg
= MRI
.createGenericVirtualRegister(Ty
);
1806 MIRBuilder
.buildInstr(MI
.getOpcode() == G_SREM
? G_SDIV
: G_UDIV
)
1808 .addUse(MI
.getOperand(1).getReg())
1809 .addUse(MI
.getOperand(2).getReg());
1811 Register ProdReg
= MRI
.createGenericVirtualRegister(Ty
);
1812 MIRBuilder
.buildMul(ProdReg
, QuotReg
, MI
.getOperand(2).getReg());
1813 MIRBuilder
.buildSub(MI
.getOperand(0).getReg(), MI
.getOperand(1).getReg(),
1815 MI
.eraseFromParent();
1818 case TargetOpcode::G_SMULO
:
1819 case TargetOpcode::G_UMULO
: {
1820 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
1822 Register Res
= MI
.getOperand(0).getReg();
1823 Register Overflow
= MI
.getOperand(1).getReg();
1824 Register LHS
= MI
.getOperand(2).getReg();
1825 Register RHS
= MI
.getOperand(3).getReg();
1827 MIRBuilder
.buildMul(Res
, LHS
, RHS
);
1829 unsigned Opcode
= MI
.getOpcode() == TargetOpcode::G_SMULO
1830 ? TargetOpcode::G_SMULH
1831 : TargetOpcode::G_UMULH
;
1833 Register HiPart
= MRI
.createGenericVirtualRegister(Ty
);
1834 MIRBuilder
.buildInstr(Opcode
)
1839 Register Zero
= MRI
.createGenericVirtualRegister(Ty
);
1840 MIRBuilder
.buildConstant(Zero
, 0);
1842 // For *signed* multiply, overflow is detected by checking:
1843 // (hi != (lo >> bitwidth-1))
1844 if (Opcode
== TargetOpcode::G_SMULH
) {
1845 Register Shifted
= MRI
.createGenericVirtualRegister(Ty
);
1846 Register ShiftAmt
= MRI
.createGenericVirtualRegister(Ty
);
1847 MIRBuilder
.buildConstant(ShiftAmt
, Ty
.getSizeInBits() - 1);
1848 MIRBuilder
.buildInstr(TargetOpcode::G_ASHR
)
1852 MIRBuilder
.buildICmp(CmpInst::ICMP_NE
, Overflow
, HiPart
, Shifted
);
1854 MIRBuilder
.buildICmp(CmpInst::ICMP_NE
, Overflow
, HiPart
, Zero
);
1856 MI
.eraseFromParent();
1859 case TargetOpcode::G_FNEG
: {
1860 // TODO: Handle vector types once we are able to
1863 return UnableToLegalize
;
1864 Register Res
= MI
.getOperand(0).getReg();
1866 LLVMContext
&Ctx
= MIRBuilder
.getMF().getFunction().getContext();
1867 switch (Ty
.getSizeInBits()) {
1869 ZeroTy
= Type::getHalfTy(Ctx
);
1872 ZeroTy
= Type::getFloatTy(Ctx
);
1875 ZeroTy
= Type::getDoubleTy(Ctx
);
1878 ZeroTy
= Type::getFP128Ty(Ctx
);
1881 llvm_unreachable("unexpected floating-point type");
1883 ConstantFP
&ZeroForNegation
=
1884 *cast
<ConstantFP
>(ConstantFP::getZeroValueForNegation(ZeroTy
));
1885 auto Zero
= MIRBuilder
.buildFConstant(Ty
, ZeroForNegation
);
1886 Register SubByReg
= MI
.getOperand(1).getReg();
1887 Register ZeroReg
= Zero
->getOperand(0).getReg();
1888 MIRBuilder
.buildInstr(TargetOpcode::G_FSUB
, {Res
}, {ZeroReg
, SubByReg
},
1890 MI
.eraseFromParent();
1893 case TargetOpcode::G_FSUB
: {
1894 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
1895 // First, check if G_FNEG is marked as Lower. If so, we may
1896 // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
1897 if (LI
.getAction({G_FNEG
, {Ty
}}).Action
== Lower
)
1898 return UnableToLegalize
;
1899 Register Res
= MI
.getOperand(0).getReg();
1900 Register LHS
= MI
.getOperand(1).getReg();
1901 Register RHS
= MI
.getOperand(2).getReg();
1902 Register Neg
= MRI
.createGenericVirtualRegister(Ty
);
1903 MIRBuilder
.buildInstr(TargetOpcode::G_FNEG
).addDef(Neg
).addUse(RHS
);
1904 MIRBuilder
.buildInstr(TargetOpcode::G_FADD
, {Res
}, {LHS
, Neg
}, MI
.getFlags());
1905 MI
.eraseFromParent();
1908 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS
: {
1909 Register OldValRes
= MI
.getOperand(0).getReg();
1910 Register SuccessRes
= MI
.getOperand(1).getReg();
1911 Register Addr
= MI
.getOperand(2).getReg();
1912 Register CmpVal
= MI
.getOperand(3).getReg();
1913 Register NewVal
= MI
.getOperand(4).getReg();
1914 MIRBuilder
.buildAtomicCmpXchg(OldValRes
, Addr
, CmpVal
, NewVal
,
1915 **MI
.memoperands_begin());
1916 MIRBuilder
.buildICmp(CmpInst::ICMP_EQ
, SuccessRes
, OldValRes
, CmpVal
);
1917 MI
.eraseFromParent();
1920 case TargetOpcode::G_LOAD
:
1921 case TargetOpcode::G_SEXTLOAD
:
1922 case TargetOpcode::G_ZEXTLOAD
: {
1923 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
1924 Register DstReg
= MI
.getOperand(0).getReg();
1925 Register PtrReg
= MI
.getOperand(1).getReg();
1926 LLT DstTy
= MRI
.getType(DstReg
);
1927 auto &MMO
= **MI
.memoperands_begin();
1929 if (DstTy
.getSizeInBits() == MMO
.getSizeInBits()) {
1930 if (MI
.getOpcode() == TargetOpcode::G_LOAD
) {
1931 // This load needs splitting into power of 2 sized loads.
1932 if (DstTy
.isVector())
1933 return UnableToLegalize
;
1934 if (isPowerOf2_32(DstTy
.getSizeInBits()))
1935 return UnableToLegalize
; // Don't know what we're being asked to do.
1937 // Our strategy here is to generate anyextending loads for the smaller
1938 // types up to next power-2 result type, and then combine the two larger
1939 // result values together, before truncating back down to the non-pow-2
1941 // E.g. v1 = i24 load =>
1942 // v2 = i32 load (2 byte)
1943 // v3 = i32 load (1 byte)
1944 // v4 = i32 shl v3, 16
1945 // v5 = i32 or v4, v2
1946 // v1 = i24 trunc v5
1947 // By doing this we generate the correct truncate which should get
1948 // combined away as an artifact with a matching extend.
1949 uint64_t LargeSplitSize
= PowerOf2Floor(DstTy
.getSizeInBits());
1950 uint64_t SmallSplitSize
= DstTy
.getSizeInBits() - LargeSplitSize
;
1952 MachineFunction
&MF
= MIRBuilder
.getMF();
1953 MachineMemOperand
*LargeMMO
=
1954 MF
.getMachineMemOperand(&MMO
, 0, LargeSplitSize
/ 8);
1955 MachineMemOperand
*SmallMMO
= MF
.getMachineMemOperand(
1956 &MMO
, LargeSplitSize
/ 8, SmallSplitSize
/ 8);
1958 LLT PtrTy
= MRI
.getType(PtrReg
);
1959 unsigned AnyExtSize
= NextPowerOf2(DstTy
.getSizeInBits());
1960 LLT AnyExtTy
= LLT::scalar(AnyExtSize
);
1961 Register LargeLdReg
= MRI
.createGenericVirtualRegister(AnyExtTy
);
1962 Register SmallLdReg
= MRI
.createGenericVirtualRegister(AnyExtTy
);
1964 MIRBuilder
.buildLoad(LargeLdReg
, PtrReg
, *LargeMMO
);
1967 MIRBuilder
.buildConstant(LLT::scalar(64), LargeSplitSize
/ 8);
1968 Register GEPReg
= MRI
.createGenericVirtualRegister(PtrTy
);
1969 auto SmallPtr
= MIRBuilder
.buildGEP(GEPReg
, PtrReg
, OffsetCst
.getReg(0));
1970 auto SmallLoad
= MIRBuilder
.buildLoad(SmallLdReg
, SmallPtr
.getReg(0),
1973 auto ShiftAmt
= MIRBuilder
.buildConstant(AnyExtTy
, LargeSplitSize
);
1974 auto Shift
= MIRBuilder
.buildShl(AnyExtTy
, SmallLoad
, ShiftAmt
);
1975 auto Or
= MIRBuilder
.buildOr(AnyExtTy
, Shift
, LargeLoad
);
1976 MIRBuilder
.buildTrunc(DstReg
, {Or
.getReg(0)});
1977 MI
.eraseFromParent();
1980 MIRBuilder
.buildLoad(DstReg
, PtrReg
, MMO
);
1981 MI
.eraseFromParent();
1985 if (DstTy
.isScalar()) {
1987 MRI
.createGenericVirtualRegister(LLT::scalar(MMO
.getSizeInBits()));
1988 MIRBuilder
.buildLoad(TmpReg
, PtrReg
, MMO
);
1989 switch (MI
.getOpcode()) {
1991 llvm_unreachable("Unexpected opcode");
1992 case TargetOpcode::G_LOAD
:
1993 MIRBuilder
.buildAnyExt(DstReg
, TmpReg
);
1995 case TargetOpcode::G_SEXTLOAD
:
1996 MIRBuilder
.buildSExt(DstReg
, TmpReg
);
1998 case TargetOpcode::G_ZEXTLOAD
:
1999 MIRBuilder
.buildZExt(DstReg
, TmpReg
);
2002 MI
.eraseFromParent();
2006 return UnableToLegalize
;
2008 case TargetOpcode::G_STORE
: {
2009 // Lower a non-power of 2 store into multiple pow-2 stores.
2010 // E.g. split an i24 store into an i16 store + i8 store.
2011 // We do this by first extending the stored value to the next largest power
2012 // of 2 type, and then using truncating stores to store the components.
2013 // By doing this, likewise with G_LOAD, generate an extend that can be
2014 // artifact-combined away instead of leaving behind extracts.
2015 Register SrcReg
= MI
.getOperand(0).getReg();
2016 Register PtrReg
= MI
.getOperand(1).getReg();
2017 LLT SrcTy
= MRI
.getType(SrcReg
);
2018 MachineMemOperand
&MMO
= **MI
.memoperands_begin();
2019 if (SrcTy
.getSizeInBits() != MMO
.getSizeInBits())
2020 return UnableToLegalize
;
2021 if (SrcTy
.isVector())
2022 return UnableToLegalize
;
2023 if (isPowerOf2_32(SrcTy
.getSizeInBits()))
2024 return UnableToLegalize
; // Don't know what we're being asked to do.
2026 // Extend to the next pow-2.
2027 const LLT ExtendTy
= LLT::scalar(NextPowerOf2(SrcTy
.getSizeInBits()));
2028 auto ExtVal
= MIRBuilder
.buildAnyExt(ExtendTy
, SrcReg
);
2030 // Obtain the smaller value by shifting away the larger value.
2031 uint64_t LargeSplitSize
= PowerOf2Floor(SrcTy
.getSizeInBits());
2032 uint64_t SmallSplitSize
= SrcTy
.getSizeInBits() - LargeSplitSize
;
2033 auto ShiftAmt
= MIRBuilder
.buildConstant(ExtendTy
, LargeSplitSize
);
2034 auto SmallVal
= MIRBuilder
.buildLShr(ExtendTy
, ExtVal
, ShiftAmt
);
2036 // Generate the GEP and truncating stores.
2037 LLT PtrTy
= MRI
.getType(PtrReg
);
2039 MIRBuilder
.buildConstant(LLT::scalar(64), LargeSplitSize
/ 8);
2040 Register GEPReg
= MRI
.createGenericVirtualRegister(PtrTy
);
2041 auto SmallPtr
= MIRBuilder
.buildGEP(GEPReg
, PtrReg
, OffsetCst
.getReg(0));
2043 MachineFunction
&MF
= MIRBuilder
.getMF();
2044 MachineMemOperand
*LargeMMO
=
2045 MF
.getMachineMemOperand(&MMO
, 0, LargeSplitSize
/ 8);
2046 MachineMemOperand
*SmallMMO
=
2047 MF
.getMachineMemOperand(&MMO
, LargeSplitSize
/ 8, SmallSplitSize
/ 8);
2048 MIRBuilder
.buildStore(ExtVal
.getReg(0), PtrReg
, *LargeMMO
);
2049 MIRBuilder
.buildStore(SmallVal
.getReg(0), SmallPtr
.getReg(0), *SmallMMO
);
2050 MI
.eraseFromParent();
2053 case TargetOpcode::G_CTLZ_ZERO_UNDEF
:
2054 case TargetOpcode::G_CTTZ_ZERO_UNDEF
:
2055 case TargetOpcode::G_CTLZ
:
2056 case TargetOpcode::G_CTTZ
:
2057 case TargetOpcode::G_CTPOP
:
2058 return lowerBitCount(MI
, TypeIdx
, Ty
);
2060 Register Res
= MI
.getOperand(0).getReg();
2061 Register CarryOut
= MI
.getOperand(1).getReg();
2062 Register LHS
= MI
.getOperand(2).getReg();
2063 Register RHS
= MI
.getOperand(3).getReg();
2065 MIRBuilder
.buildAdd(Res
, LHS
, RHS
);
2066 MIRBuilder
.buildICmp(CmpInst::ICMP_ULT
, CarryOut
, Res
, RHS
);
2068 MI
.eraseFromParent();
2072 Register Res
= MI
.getOperand(0).getReg();
2073 Register CarryOut
= MI
.getOperand(1).getReg();
2074 Register LHS
= MI
.getOperand(2).getReg();
2075 Register RHS
= MI
.getOperand(3).getReg();
2076 Register CarryIn
= MI
.getOperand(4).getReg();
2078 Register TmpRes
= MRI
.createGenericVirtualRegister(Ty
);
2079 Register ZExtCarryIn
= MRI
.createGenericVirtualRegister(Ty
);
2081 MIRBuilder
.buildAdd(TmpRes
, LHS
, RHS
);
2082 MIRBuilder
.buildZExt(ZExtCarryIn
, CarryIn
);
2083 MIRBuilder
.buildAdd(Res
, TmpRes
, ZExtCarryIn
);
2084 MIRBuilder
.buildICmp(CmpInst::ICMP_ULT
, CarryOut
, Res
, LHS
);
2086 MI
.eraseFromParent();
2090 Register Res
= MI
.getOperand(0).getReg();
2091 Register BorrowOut
= MI
.getOperand(1).getReg();
2092 Register LHS
= MI
.getOperand(2).getReg();
2093 Register RHS
= MI
.getOperand(3).getReg();
2095 MIRBuilder
.buildSub(Res
, LHS
, RHS
);
2096 MIRBuilder
.buildICmp(CmpInst::ICMP_ULT
, BorrowOut
, LHS
, RHS
);
2098 MI
.eraseFromParent();
2102 Register Res
= MI
.getOperand(0).getReg();
2103 Register BorrowOut
= MI
.getOperand(1).getReg();
2104 Register LHS
= MI
.getOperand(2).getReg();
2105 Register RHS
= MI
.getOperand(3).getReg();
2106 Register BorrowIn
= MI
.getOperand(4).getReg();
2108 Register TmpRes
= MRI
.createGenericVirtualRegister(Ty
);
2109 Register ZExtBorrowIn
= MRI
.createGenericVirtualRegister(Ty
);
2110 Register LHS_EQ_RHS
= MRI
.createGenericVirtualRegister(LLT::scalar(1));
2111 Register LHS_ULT_RHS
= MRI
.createGenericVirtualRegister(LLT::scalar(1));
2113 MIRBuilder
.buildSub(TmpRes
, LHS
, RHS
);
2114 MIRBuilder
.buildZExt(ZExtBorrowIn
, BorrowIn
);
2115 MIRBuilder
.buildSub(Res
, TmpRes
, ZExtBorrowIn
);
2116 MIRBuilder
.buildICmp(CmpInst::ICMP_EQ
, LHS_EQ_RHS
, LHS
, RHS
);
2117 MIRBuilder
.buildICmp(CmpInst::ICMP_ULT
, LHS_ULT_RHS
, LHS
, RHS
);
2118 MIRBuilder
.buildSelect(BorrowOut
, LHS_EQ_RHS
, BorrowIn
, LHS_ULT_RHS
);
2120 MI
.eraseFromParent();
2124 return lowerUITOFP(MI
, TypeIdx
, Ty
);
2126 return lowerSITOFP(MI
, TypeIdx
, Ty
);
2131 return lowerMinMax(MI
, TypeIdx
, Ty
);
2133 return lowerFCopySign(MI
, TypeIdx
, Ty
);
2136 return lowerFMinNumMaxNum(MI
);
2137 case G_UNMERGE_VALUES
:
2138 return lowerUnmergeValues(MI
);
2139 case TargetOpcode::G_SEXT_INREG
: {
2140 assert(MI
.getOperand(2).isImm() && "Expected immediate");
2141 int64_t SizeInBits
= MI
.getOperand(2).getImm();
2143 Register DstReg
= MI
.getOperand(0).getReg();
2144 Register SrcReg
= MI
.getOperand(1).getReg();
2145 LLT DstTy
= MRI
.getType(DstReg
);
2146 Register TmpRes
= MRI
.createGenericVirtualRegister(DstTy
);
2148 auto MIBSz
= MIRBuilder
.buildConstant(DstTy
, DstTy
.getScalarSizeInBits() - SizeInBits
);
2149 MIRBuilder
.buildInstr(TargetOpcode::G_SHL
, {TmpRes
}, {SrcReg
, MIBSz
->getOperand(0).getReg()});
2150 MIRBuilder
.buildInstr(TargetOpcode::G_ASHR
, {DstReg
}, {TmpRes
, MIBSz
->getOperand(0).getReg()});
2151 MI
.eraseFromParent();
2154 case G_SHUFFLE_VECTOR
:
2155 return lowerShuffleVector(MI
);
2159 LegalizerHelper::LegalizeResult
LegalizerHelper::fewerElementsVectorImplicitDef(
2160 MachineInstr
&MI
, unsigned TypeIdx
, LLT NarrowTy
) {
2161 SmallVector
<Register
, 2> DstRegs
;
2163 unsigned NarrowSize
= NarrowTy
.getSizeInBits();
2164 Register DstReg
= MI
.getOperand(0).getReg();
2165 unsigned Size
= MRI
.getType(DstReg
).getSizeInBits();
2166 int NumParts
= Size
/ NarrowSize
;
2167 // FIXME: Don't know how to handle the situation where the small vectors
2168 // aren't all the same size yet.
2169 if (Size
% NarrowSize
!= 0)
2170 return UnableToLegalize
;
2172 for (int i
= 0; i
< NumParts
; ++i
) {
2173 Register TmpReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
2174 MIRBuilder
.buildUndef(TmpReg
);
2175 DstRegs
.push_back(TmpReg
);
2178 if (NarrowTy
.isVector())
2179 MIRBuilder
.buildConcatVectors(DstReg
, DstRegs
);
2181 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
2183 MI
.eraseFromParent();
2187 LegalizerHelper::LegalizeResult
2188 LegalizerHelper::fewerElementsVectorBasic(MachineInstr
&MI
, unsigned TypeIdx
,
2190 const unsigned Opc
= MI
.getOpcode();
2191 const unsigned NumOps
= MI
.getNumOperands() - 1;
2192 const unsigned NarrowSize
= NarrowTy
.getSizeInBits();
2193 const Register DstReg
= MI
.getOperand(0).getReg();
2194 const unsigned Flags
= MI
.getFlags();
2195 const LLT DstTy
= MRI
.getType(DstReg
);
2196 const unsigned Size
= DstTy
.getSizeInBits();
2197 const int NumParts
= Size
/ NarrowSize
;
2198 const LLT EltTy
= DstTy
.getElementType();
2199 const unsigned EltSize
= EltTy
.getSizeInBits();
2200 const unsigned BitsForNumParts
= NarrowSize
* NumParts
;
2202 // Check if we have any leftovers. If we do, then only handle the case where
2203 // the leftover is one element.
2204 if (BitsForNumParts
!= Size
&& BitsForNumParts
+ EltSize
!= Size
)
2205 return UnableToLegalize
;
2207 if (BitsForNumParts
!= Size
) {
2208 Register AccumDstReg
= MRI
.createGenericVirtualRegister(DstTy
);
2209 MIRBuilder
.buildUndef(AccumDstReg
);
2211 // Handle the pieces which evenly divide into the requested type with
2212 // extract/op/insert sequence.
2213 for (unsigned Offset
= 0; Offset
< BitsForNumParts
; Offset
+= NarrowSize
) {
2214 SmallVector
<SrcOp
, 4> SrcOps
;
2215 for (unsigned I
= 1, E
= MI
.getNumOperands(); I
!= E
; ++I
) {
2216 Register PartOpReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
2217 MIRBuilder
.buildExtract(PartOpReg
, MI
.getOperand(I
).getReg(), Offset
);
2218 SrcOps
.push_back(PartOpReg
);
2221 Register PartDstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
2222 MIRBuilder
.buildInstr(Opc
, {PartDstReg
}, SrcOps
, Flags
);
2224 Register PartInsertReg
= MRI
.createGenericVirtualRegister(DstTy
);
2225 MIRBuilder
.buildInsert(PartInsertReg
, AccumDstReg
, PartDstReg
, Offset
);
2226 AccumDstReg
= PartInsertReg
;
2229 // Handle the remaining element sized leftover piece.
2230 SmallVector
<SrcOp
, 4> SrcOps
;
2231 for (unsigned I
= 1, E
= MI
.getNumOperands(); I
!= E
; ++I
) {
2232 Register PartOpReg
= MRI
.createGenericVirtualRegister(EltTy
);
2233 MIRBuilder
.buildExtract(PartOpReg
, MI
.getOperand(I
).getReg(),
2235 SrcOps
.push_back(PartOpReg
);
2238 Register PartDstReg
= MRI
.createGenericVirtualRegister(EltTy
);
2239 MIRBuilder
.buildInstr(Opc
, {PartDstReg
}, SrcOps
, Flags
);
2240 MIRBuilder
.buildInsert(DstReg
, AccumDstReg
, PartDstReg
, BitsForNumParts
);
2241 MI
.eraseFromParent();
2246 SmallVector
<Register
, 2> DstRegs
, Src0Regs
, Src1Regs
, Src2Regs
;
2248 extractParts(MI
.getOperand(1).getReg(), NarrowTy
, NumParts
, Src0Regs
);
2251 extractParts(MI
.getOperand(2).getReg(), NarrowTy
, NumParts
, Src1Regs
);
2254 extractParts(MI
.getOperand(3).getReg(), NarrowTy
, NumParts
, Src2Regs
);
2256 for (int i
= 0; i
< NumParts
; ++i
) {
2257 Register DstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
2260 MIRBuilder
.buildInstr(Opc
, {DstReg
}, {Src0Regs
[i
]}, Flags
);
2261 else if (NumOps
== 2) {
2262 MIRBuilder
.buildInstr(Opc
, {DstReg
}, {Src0Regs
[i
], Src1Regs
[i
]}, Flags
);
2263 } else if (NumOps
== 3) {
2264 MIRBuilder
.buildInstr(Opc
, {DstReg
},
2265 {Src0Regs
[i
], Src1Regs
[i
], Src2Regs
[i
]}, Flags
);
2268 DstRegs
.push_back(DstReg
);
2271 if (NarrowTy
.isVector())
2272 MIRBuilder
.buildConcatVectors(DstReg
, DstRegs
);
2274 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
2276 MI
.eraseFromParent();
2280 // Handle splitting vector operations which need to have the same number of
2281 // elements in each type index, but each type index may have a different element
2284 // e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
2285 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2286 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2288 // Also handles some irregular breakdown cases, e.g.
2289 // e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
2290 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2291 // s64 = G_SHL s64, s32
2292 LegalizerHelper::LegalizeResult
2293 LegalizerHelper::fewerElementsVectorMultiEltType(
2294 MachineInstr
&MI
, unsigned TypeIdx
, LLT NarrowTyArg
) {
2296 return UnableToLegalize
;
2298 const LLT NarrowTy0
= NarrowTyArg
;
2299 const unsigned NewNumElts
=
2300 NarrowTy0
.isVector() ? NarrowTy0
.getNumElements() : 1;
2302 const Register DstReg
= MI
.getOperand(0).getReg();
2303 LLT DstTy
= MRI
.getType(DstReg
);
2306 // All of the operands need to have the same number of elements, so if we can
2307 // determine a type breakdown for the result type, we can for all of the
2309 int NumParts
= getNarrowTypeBreakDown(DstTy
, NarrowTy0
, LeftoverTy0
).first
;
2311 return UnableToLegalize
;
2313 SmallVector
<MachineInstrBuilder
, 4> NewInsts
;
2315 SmallVector
<Register
, 4> DstRegs
, LeftoverDstRegs
;
2316 SmallVector
<Register
, 4> PartRegs
, LeftoverRegs
;
2318 for (unsigned I
= 1, E
= MI
.getNumOperands(); I
!= E
; ++I
) {
2320 Register SrcReg
= MI
.getOperand(I
).getReg();
2321 LLT SrcTyI
= MRI
.getType(SrcReg
);
2322 LLT NarrowTyI
= LLT::scalarOrVector(NewNumElts
, SrcTyI
.getScalarType());
2325 // Split this operand into the requested typed registers, and any leftover
2326 // required to reproduce the original type.
2327 if (!extractParts(SrcReg
, SrcTyI
, NarrowTyI
, LeftoverTyI
, PartRegs
,
2329 return UnableToLegalize
;
2332 // For the first operand, create an instruction for each part and setup
2334 for (Register PartReg
: PartRegs
) {
2335 Register PartDstReg
= MRI
.createGenericVirtualRegister(NarrowTy0
);
2336 NewInsts
.push_back(MIRBuilder
.buildInstrNoInsert(MI
.getOpcode())
2339 DstRegs
.push_back(PartDstReg
);
2342 for (Register LeftoverReg
: LeftoverRegs
) {
2343 Register PartDstReg
= MRI
.createGenericVirtualRegister(LeftoverTy0
);
2344 NewInsts
.push_back(MIRBuilder
.buildInstrNoInsert(MI
.getOpcode())
2346 .addUse(LeftoverReg
));
2347 LeftoverDstRegs
.push_back(PartDstReg
);
2350 assert(NewInsts
.size() == PartRegs
.size() + LeftoverRegs
.size());
2352 // Add the newly created operand splits to the existing instructions. The
2353 // odd-sized pieces are ordered after the requested NarrowTyArg sized
2355 unsigned InstCount
= 0;
2356 for (unsigned J
= 0, JE
= PartRegs
.size(); J
!= JE
; ++J
)
2357 NewInsts
[InstCount
++].addUse(PartRegs
[J
]);
2358 for (unsigned J
= 0, JE
= LeftoverRegs
.size(); J
!= JE
; ++J
)
2359 NewInsts
[InstCount
++].addUse(LeftoverRegs
[J
]);
2363 LeftoverRegs
.clear();
2366 // Insert the newly built operations and rebuild the result register.
2367 for (auto &MIB
: NewInsts
)
2368 MIRBuilder
.insertInstr(MIB
);
2370 insertParts(DstReg
, DstTy
, NarrowTy0
, DstRegs
, LeftoverTy0
, LeftoverDstRegs
);
2372 MI
.eraseFromParent();
2376 LegalizerHelper::LegalizeResult
2377 LegalizerHelper::fewerElementsVectorCasts(MachineInstr
&MI
, unsigned TypeIdx
,
2380 return UnableToLegalize
;
2382 Register DstReg
= MI
.getOperand(0).getReg();
2383 Register SrcReg
= MI
.getOperand(1).getReg();
2384 LLT DstTy
= MRI
.getType(DstReg
);
2385 LLT SrcTy
= MRI
.getType(SrcReg
);
2387 LLT NarrowTy0
= NarrowTy
;
2391 if (NarrowTy
.isVector()) {
2392 // Uneven breakdown not handled.
2393 NumParts
= DstTy
.getNumElements() / NarrowTy
.getNumElements();
2394 if (NumParts
* NarrowTy
.getNumElements() != DstTy
.getNumElements())
2395 return UnableToLegalize
;
2397 NarrowTy1
= LLT::vector(NumParts
, SrcTy
.getElementType().getSizeInBits());
2399 NumParts
= DstTy
.getNumElements();
2400 NarrowTy1
= SrcTy
.getElementType();
2403 SmallVector
<Register
, 4> SrcRegs
, DstRegs
;
2404 extractParts(SrcReg
, NarrowTy1
, NumParts
, SrcRegs
);
2406 for (unsigned I
= 0; I
< NumParts
; ++I
) {
2407 Register DstReg
= MRI
.createGenericVirtualRegister(NarrowTy0
);
2408 MachineInstr
*NewInst
= MIRBuilder
.buildInstr(MI
.getOpcode())
2410 .addUse(SrcRegs
[I
]);
2412 NewInst
->setFlags(MI
.getFlags());
2413 DstRegs
.push_back(DstReg
);
2416 if (NarrowTy
.isVector())
2417 MIRBuilder
.buildConcatVectors(DstReg
, DstRegs
);
2419 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
2421 MI
.eraseFromParent();
2425 LegalizerHelper::LegalizeResult
2426 LegalizerHelper::fewerElementsVectorCmp(MachineInstr
&MI
, unsigned TypeIdx
,
2428 Register DstReg
= MI
.getOperand(0).getReg();
2429 Register Src0Reg
= MI
.getOperand(2).getReg();
2430 LLT DstTy
= MRI
.getType(DstReg
);
2431 LLT SrcTy
= MRI
.getType(Src0Reg
);
2434 LLT NarrowTy0
, NarrowTy1
;
2437 unsigned NewElts
= NarrowTy
.isVector() ? NarrowTy
.getNumElements() : 1;
2438 unsigned OldElts
= DstTy
.getNumElements();
2440 NarrowTy0
= NarrowTy
;
2441 NumParts
= NarrowTy
.isVector() ? (OldElts
/ NewElts
) : DstTy
.getNumElements();
2442 NarrowTy1
= NarrowTy
.isVector() ?
2443 LLT::vector(NarrowTy
.getNumElements(), SrcTy
.getScalarSizeInBits()) :
2444 SrcTy
.getElementType();
2447 unsigned NewElts
= NarrowTy
.isVector() ? NarrowTy
.getNumElements() : 1;
2448 unsigned OldElts
= SrcTy
.getNumElements();
2450 NumParts
= NarrowTy
.isVector() ? (OldElts
/ NewElts
) :
2451 NarrowTy
.getNumElements();
2452 NarrowTy0
= LLT::vector(NarrowTy
.getNumElements(),
2453 DstTy
.getScalarSizeInBits());
2454 NarrowTy1
= NarrowTy
;
2457 // FIXME: Don't know how to handle the situation where the small vectors
2458 // aren't all the same size yet.
2459 if (NarrowTy1
.isVector() &&
2460 NarrowTy1
.getNumElements() * NumParts
!= DstTy
.getNumElements())
2461 return UnableToLegalize
;
2463 CmpInst::Predicate Pred
2464 = static_cast<CmpInst::Predicate
>(MI
.getOperand(1).getPredicate());
2466 SmallVector
<Register
, 2> Src1Regs
, Src2Regs
, DstRegs
;
2467 extractParts(MI
.getOperand(2).getReg(), NarrowTy1
, NumParts
, Src1Regs
);
2468 extractParts(MI
.getOperand(3).getReg(), NarrowTy1
, NumParts
, Src2Regs
);
2470 for (unsigned I
= 0; I
< NumParts
; ++I
) {
2471 Register DstReg
= MRI
.createGenericVirtualRegister(NarrowTy0
);
2472 DstRegs
.push_back(DstReg
);
2474 if (MI
.getOpcode() == TargetOpcode::G_ICMP
)
2475 MIRBuilder
.buildICmp(Pred
, DstReg
, Src1Regs
[I
], Src2Regs
[I
]);
2477 MachineInstr
*NewCmp
2478 = MIRBuilder
.buildFCmp(Pred
, DstReg
, Src1Regs
[I
], Src2Regs
[I
]);
2479 NewCmp
->setFlags(MI
.getFlags());
2483 if (NarrowTy1
.isVector())
2484 MIRBuilder
.buildConcatVectors(DstReg
, DstRegs
);
2486 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
2488 MI
.eraseFromParent();
2492 LegalizerHelper::LegalizeResult
2493 LegalizerHelper::fewerElementsVectorSelect(MachineInstr
&MI
, unsigned TypeIdx
,
2495 Register DstReg
= MI
.getOperand(0).getReg();
2496 Register CondReg
= MI
.getOperand(1).getReg();
2498 unsigned NumParts
= 0;
2499 LLT NarrowTy0
, NarrowTy1
;
2501 LLT DstTy
= MRI
.getType(DstReg
);
2502 LLT CondTy
= MRI
.getType(CondReg
);
2503 unsigned Size
= DstTy
.getSizeInBits();
2505 assert(TypeIdx
== 0 || CondTy
.isVector());
2508 NarrowTy0
= NarrowTy
;
2511 unsigned NarrowSize
= NarrowTy0
.getSizeInBits();
2512 // FIXME: Don't know how to handle the situation where the small vectors
2513 // aren't all the same size yet.
2514 if (Size
% NarrowSize
!= 0)
2515 return UnableToLegalize
;
2517 NumParts
= Size
/ NarrowSize
;
2519 // Need to break down the condition type
2520 if (CondTy
.isVector()) {
2521 if (CondTy
.getNumElements() == NumParts
)
2522 NarrowTy1
= CondTy
.getElementType();
2524 NarrowTy1
= LLT::vector(CondTy
.getNumElements() / NumParts
,
2525 CondTy
.getScalarSizeInBits());
2528 NumParts
= CondTy
.getNumElements();
2529 if (NarrowTy
.isVector()) {
2530 // TODO: Handle uneven breakdown.
2531 if (NumParts
* NarrowTy
.getNumElements() != CondTy
.getNumElements())
2532 return UnableToLegalize
;
2534 return UnableToLegalize
;
2536 NarrowTy0
= DstTy
.getElementType();
2537 NarrowTy1
= NarrowTy
;
2541 SmallVector
<Register
, 2> DstRegs
, Src0Regs
, Src1Regs
, Src2Regs
;
2542 if (CondTy
.isVector())
2543 extractParts(MI
.getOperand(1).getReg(), NarrowTy1
, NumParts
, Src0Regs
);
2545 extractParts(MI
.getOperand(2).getReg(), NarrowTy0
, NumParts
, Src1Regs
);
2546 extractParts(MI
.getOperand(3).getReg(), NarrowTy0
, NumParts
, Src2Regs
);
2548 for (unsigned i
= 0; i
< NumParts
; ++i
) {
2549 Register DstReg
= MRI
.createGenericVirtualRegister(NarrowTy0
);
2550 MIRBuilder
.buildSelect(DstReg
, CondTy
.isVector() ? Src0Regs
[i
] : CondReg
,
2551 Src1Regs
[i
], Src2Regs
[i
]);
2552 DstRegs
.push_back(DstReg
);
2555 if (NarrowTy0
.isVector())
2556 MIRBuilder
.buildConcatVectors(DstReg
, DstRegs
);
2558 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
2560 MI
.eraseFromParent();
2564 LegalizerHelper::LegalizeResult
2565 LegalizerHelper::fewerElementsVectorPhi(MachineInstr
&MI
, unsigned TypeIdx
,
2567 const Register DstReg
= MI
.getOperand(0).getReg();
2568 LLT PhiTy
= MRI
.getType(DstReg
);
2571 // All of the operands need to have the same number of elements, so if we can
2572 // determine a type breakdown for the result type, we can for all of the
2574 int NumParts
, NumLeftover
;
2575 std::tie(NumParts
, NumLeftover
)
2576 = getNarrowTypeBreakDown(PhiTy
, NarrowTy
, LeftoverTy
);
2578 return UnableToLegalize
;
2580 SmallVector
<Register
, 4> DstRegs
, LeftoverDstRegs
;
2581 SmallVector
<MachineInstrBuilder
, 4> NewInsts
;
2583 const int TotalNumParts
= NumParts
+ NumLeftover
;
2585 // Insert the new phis in the result block first.
2586 for (int I
= 0; I
!= TotalNumParts
; ++I
) {
2587 LLT Ty
= I
< NumParts
? NarrowTy
: LeftoverTy
;
2588 Register PartDstReg
= MRI
.createGenericVirtualRegister(Ty
);
2589 NewInsts
.push_back(MIRBuilder
.buildInstr(TargetOpcode::G_PHI
)
2590 .addDef(PartDstReg
));
2592 DstRegs
.push_back(PartDstReg
);
2594 LeftoverDstRegs
.push_back(PartDstReg
);
2597 MachineBasicBlock
*MBB
= MI
.getParent();
2598 MIRBuilder
.setInsertPt(*MBB
, MBB
->getFirstNonPHI());
2599 insertParts(DstReg
, PhiTy
, NarrowTy
, DstRegs
, LeftoverTy
, LeftoverDstRegs
);
2601 SmallVector
<Register
, 4> PartRegs
, LeftoverRegs
;
2603 // Insert code to extract the incoming values in each predecessor block.
2604 for (unsigned I
= 1, E
= MI
.getNumOperands(); I
!= E
; I
+= 2) {
2606 LeftoverRegs
.clear();
2608 Register SrcReg
= MI
.getOperand(I
).getReg();
2609 MachineBasicBlock
&OpMBB
= *MI
.getOperand(I
+ 1).getMBB();
2610 MIRBuilder
.setInsertPt(OpMBB
, OpMBB
.getFirstTerminator());
2613 if (!extractParts(SrcReg
, PhiTy
, NarrowTy
, Unused
, PartRegs
,
2615 return UnableToLegalize
;
2617 // Add the newly created operand splits to the existing instructions. The
2618 // odd-sized pieces are ordered after the requested NarrowTyArg sized
2620 for (int J
= 0; J
!= TotalNumParts
; ++J
) {
2621 MachineInstrBuilder MIB
= NewInsts
[J
];
2622 MIB
.addUse(J
< NumParts
? PartRegs
[J
] : LeftoverRegs
[J
- NumParts
]);
2627 MI
.eraseFromParent();
2631 LegalizerHelper::LegalizeResult
2632 LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr
&MI
,
2636 return UnableToLegalize
;
2638 const int NumDst
= MI
.getNumOperands() - 1;
2639 const Register SrcReg
= MI
.getOperand(NumDst
).getReg();
2640 LLT SrcTy
= MRI
.getType(SrcReg
);
2642 LLT DstTy
= MRI
.getType(MI
.getOperand(0).getReg());
2644 // TODO: Create sequence of extracts.
2645 if (DstTy
== NarrowTy
)
2646 return UnableToLegalize
;
2648 LLT GCDTy
= getGCDType(SrcTy
, NarrowTy
);
2649 if (DstTy
== GCDTy
) {
2650 // This would just be a copy of the same unmerge.
2651 // TODO: Create extracts, pad with undef and create intermediate merges.
2652 return UnableToLegalize
;
2655 auto Unmerge
= MIRBuilder
.buildUnmerge(GCDTy
, SrcReg
);
2656 const int NumUnmerge
= Unmerge
->getNumOperands() - 1;
2657 const int PartsPerUnmerge
= NumDst
/ NumUnmerge
;
2659 for (int I
= 0; I
!= NumUnmerge
; ++I
) {
2660 auto MIB
= MIRBuilder
.buildInstr(TargetOpcode::G_UNMERGE_VALUES
);
2662 for (int J
= 0; J
!= PartsPerUnmerge
; ++J
)
2663 MIB
.addDef(MI
.getOperand(I
* PartsPerUnmerge
+ J
).getReg());
2664 MIB
.addUse(Unmerge
.getReg(I
));
2667 MI
.eraseFromParent();
2671 LegalizerHelper::LegalizeResult
2672 LegalizerHelper::reduceLoadStoreWidth(MachineInstr
&MI
, unsigned TypeIdx
,
2674 // FIXME: Don't know how to handle secondary types yet.
2676 return UnableToLegalize
;
2678 MachineMemOperand
*MMO
= *MI
.memoperands_begin();
2680 // This implementation doesn't work for atomics. Give up instead of doing
2681 // something invalid.
2682 if (MMO
->getOrdering() != AtomicOrdering::NotAtomic
||
2683 MMO
->getFailureOrdering() != AtomicOrdering::NotAtomic
)
2684 return UnableToLegalize
;
2686 bool IsLoad
= MI
.getOpcode() == TargetOpcode::G_LOAD
;
2687 Register ValReg
= MI
.getOperand(0).getReg();
2688 Register AddrReg
= MI
.getOperand(1).getReg();
2689 LLT ValTy
= MRI
.getType(ValReg
);
2692 int NumLeftover
= -1;
2694 SmallVector
<Register
, 8> NarrowRegs
, NarrowLeftoverRegs
;
2696 std::tie(NumParts
, NumLeftover
) = getNarrowTypeBreakDown(ValTy
, NarrowTy
, LeftoverTy
);
2698 if (extractParts(ValReg
, ValTy
, NarrowTy
, LeftoverTy
, NarrowRegs
,
2699 NarrowLeftoverRegs
)) {
2700 NumParts
= NarrowRegs
.size();
2701 NumLeftover
= NarrowLeftoverRegs
.size();
2706 return UnableToLegalize
;
2708 const LLT OffsetTy
= LLT::scalar(MRI
.getType(AddrReg
).getScalarSizeInBits());
2710 unsigned TotalSize
= ValTy
.getSizeInBits();
2712 // Split the load/store into PartTy sized pieces starting at Offset. If this
2713 // is a load, return the new registers in ValRegs. For a store, each elements
2714 // of ValRegs should be PartTy. Returns the next offset that needs to be
2716 auto splitTypePieces
= [=](LLT PartTy
, SmallVectorImpl
<Register
> &ValRegs
,
2717 unsigned Offset
) -> unsigned {
2718 MachineFunction
&MF
= MIRBuilder
.getMF();
2719 unsigned PartSize
= PartTy
.getSizeInBits();
2720 for (unsigned Idx
= 0, E
= NumParts
; Idx
!= E
&& Offset
< TotalSize
;
2721 Offset
+= PartSize
, ++Idx
) {
2722 unsigned ByteSize
= PartSize
/ 8;
2723 unsigned ByteOffset
= Offset
/ 8;
2724 Register NewAddrReg
;
2726 MIRBuilder
.materializeGEP(NewAddrReg
, AddrReg
, OffsetTy
, ByteOffset
);
2728 MachineMemOperand
*NewMMO
=
2729 MF
.getMachineMemOperand(MMO
, ByteOffset
, ByteSize
);
2732 Register Dst
= MRI
.createGenericVirtualRegister(PartTy
);
2733 ValRegs
.push_back(Dst
);
2734 MIRBuilder
.buildLoad(Dst
, NewAddrReg
, *NewMMO
);
2736 MIRBuilder
.buildStore(ValRegs
[Idx
], NewAddrReg
, *NewMMO
);
2743 unsigned HandledOffset
= splitTypePieces(NarrowTy
, NarrowRegs
, 0);
2745 // Handle the rest of the register if this isn't an even type breakdown.
2746 if (LeftoverTy
.isValid())
2747 splitTypePieces(LeftoverTy
, NarrowLeftoverRegs
, HandledOffset
);
2750 insertParts(ValReg
, ValTy
, NarrowTy
, NarrowRegs
,
2751 LeftoverTy
, NarrowLeftoverRegs
);
2754 MI
.eraseFromParent();
2758 LegalizerHelper::LegalizeResult
2759 LegalizerHelper::fewerElementsVector(MachineInstr
&MI
, unsigned TypeIdx
,
2761 using namespace TargetOpcode
;
2763 MIRBuilder
.setInstr(MI
);
2764 switch (MI
.getOpcode()) {
2765 case G_IMPLICIT_DEF
:
2766 return fewerElementsVectorImplicitDef(MI
, TypeIdx
, NarrowTy
);
2780 case G_FCANONICALIZE
:
2794 case G_INTRINSIC_ROUND
:
2795 case G_INTRINSIC_TRUNC
:
2807 case G_FMINNUM_IEEE
:
2808 case G_FMAXNUM_IEEE
:
2811 return fewerElementsVectorBasic(MI
, TypeIdx
, NarrowTy
);
2816 case G_CTLZ_ZERO_UNDEF
:
2818 case G_CTTZ_ZERO_UNDEF
:
2821 return fewerElementsVectorMultiEltType(MI
, TypeIdx
, NarrowTy
);
2833 case G_ADDRSPACE_CAST
:
2834 return fewerElementsVectorCasts(MI
, TypeIdx
, NarrowTy
);
2837 return fewerElementsVectorCmp(MI
, TypeIdx
, NarrowTy
);
2839 return fewerElementsVectorSelect(MI
, TypeIdx
, NarrowTy
);
2841 return fewerElementsVectorPhi(MI
, TypeIdx
, NarrowTy
);
2842 case G_UNMERGE_VALUES
:
2843 return fewerElementsVectorUnmergeValues(MI
, TypeIdx
, NarrowTy
);
2846 return reduceLoadStoreWidth(MI
, TypeIdx
, NarrowTy
);
2848 return UnableToLegalize
;
2852 LegalizerHelper::LegalizeResult
2853 LegalizerHelper::narrowScalarShiftByConstant(MachineInstr
&MI
, const APInt
&Amt
,
2854 const LLT HalfTy
, const LLT AmtTy
) {
2856 Register InL
= MRI
.createGenericVirtualRegister(HalfTy
);
2857 Register InH
= MRI
.createGenericVirtualRegister(HalfTy
);
2858 MIRBuilder
.buildUnmerge({InL
, InH
}, MI
.getOperand(1).getReg());
2860 if (Amt
.isNullValue()) {
2861 MIRBuilder
.buildMerge(MI
.getOperand(0).getReg(), {InL
, InH
});
2862 MI
.eraseFromParent();
2867 unsigned NVTBits
= HalfTy
.getSizeInBits();
2868 unsigned VTBits
= 2 * NVTBits
;
2870 SrcOp
Lo(Register(0)), Hi(Register(0));
2871 if (MI
.getOpcode() == TargetOpcode::G_SHL
) {
2872 if (Amt
.ugt(VTBits
)) {
2873 Lo
= Hi
= MIRBuilder
.buildConstant(NVT
, 0);
2874 } else if (Amt
.ugt(NVTBits
)) {
2875 Lo
= MIRBuilder
.buildConstant(NVT
, 0);
2876 Hi
= MIRBuilder
.buildShl(NVT
, InL
,
2877 MIRBuilder
.buildConstant(AmtTy
, Amt
- NVTBits
));
2878 } else if (Amt
== NVTBits
) {
2879 Lo
= MIRBuilder
.buildConstant(NVT
, 0);
2882 Lo
= MIRBuilder
.buildShl(NVT
, InL
, MIRBuilder
.buildConstant(AmtTy
, Amt
));
2884 MIRBuilder
.buildShl(NVT
, InH
, MIRBuilder
.buildConstant(AmtTy
, Amt
));
2885 auto OrRHS
= MIRBuilder
.buildLShr(
2886 NVT
, InL
, MIRBuilder
.buildConstant(AmtTy
, -Amt
+ NVTBits
));
2887 Hi
= MIRBuilder
.buildOr(NVT
, OrLHS
, OrRHS
);
2889 } else if (MI
.getOpcode() == TargetOpcode::G_LSHR
) {
2890 if (Amt
.ugt(VTBits
)) {
2891 Lo
= Hi
= MIRBuilder
.buildConstant(NVT
, 0);
2892 } else if (Amt
.ugt(NVTBits
)) {
2893 Lo
= MIRBuilder
.buildLShr(NVT
, InH
,
2894 MIRBuilder
.buildConstant(AmtTy
, Amt
- NVTBits
));
2895 Hi
= MIRBuilder
.buildConstant(NVT
, 0);
2896 } else if (Amt
== NVTBits
) {
2898 Hi
= MIRBuilder
.buildConstant(NVT
, 0);
2900 auto ShiftAmtConst
= MIRBuilder
.buildConstant(AmtTy
, Amt
);
2902 auto OrLHS
= MIRBuilder
.buildLShr(NVT
, InL
, ShiftAmtConst
);
2903 auto OrRHS
= MIRBuilder
.buildShl(
2904 NVT
, InH
, MIRBuilder
.buildConstant(AmtTy
, -Amt
+ NVTBits
));
2906 Lo
= MIRBuilder
.buildOr(NVT
, OrLHS
, OrRHS
);
2907 Hi
= MIRBuilder
.buildLShr(NVT
, InH
, ShiftAmtConst
);
2910 if (Amt
.ugt(VTBits
)) {
2911 Hi
= Lo
= MIRBuilder
.buildAShr(
2912 NVT
, InH
, MIRBuilder
.buildConstant(AmtTy
, NVTBits
- 1));
2913 } else if (Amt
.ugt(NVTBits
)) {
2914 Lo
= MIRBuilder
.buildAShr(NVT
, InH
,
2915 MIRBuilder
.buildConstant(AmtTy
, Amt
- NVTBits
));
2916 Hi
= MIRBuilder
.buildAShr(NVT
, InH
,
2917 MIRBuilder
.buildConstant(AmtTy
, NVTBits
- 1));
2918 } else if (Amt
== NVTBits
) {
2920 Hi
= MIRBuilder
.buildAShr(NVT
, InH
,
2921 MIRBuilder
.buildConstant(AmtTy
, NVTBits
- 1));
2923 auto ShiftAmtConst
= MIRBuilder
.buildConstant(AmtTy
, Amt
);
2925 auto OrLHS
= MIRBuilder
.buildLShr(NVT
, InL
, ShiftAmtConst
);
2926 auto OrRHS
= MIRBuilder
.buildShl(
2927 NVT
, InH
, MIRBuilder
.buildConstant(AmtTy
, -Amt
+ NVTBits
));
2929 Lo
= MIRBuilder
.buildOr(NVT
, OrLHS
, OrRHS
);
2930 Hi
= MIRBuilder
.buildAShr(NVT
, InH
, ShiftAmtConst
);
2934 MIRBuilder
.buildMerge(MI
.getOperand(0).getReg(), {Lo
.getReg(), Hi
.getReg()});
2935 MI
.eraseFromParent();
2940 // TODO: Optimize if constant shift amount.
2941 LegalizerHelper::LegalizeResult
2942 LegalizerHelper::narrowScalarShift(MachineInstr
&MI
, unsigned TypeIdx
,
2945 Observer
.changingInstr(MI
);
2946 narrowScalarSrc(MI
, RequestedTy
, 2);
2947 Observer
.changedInstr(MI
);
2951 Register DstReg
= MI
.getOperand(0).getReg();
2952 LLT DstTy
= MRI
.getType(DstReg
);
2953 if (DstTy
.isVector())
2954 return UnableToLegalize
;
2956 Register Amt
= MI
.getOperand(2).getReg();
2957 LLT ShiftAmtTy
= MRI
.getType(Amt
);
2958 const unsigned DstEltSize
= DstTy
.getScalarSizeInBits();
2959 if (DstEltSize
% 2 != 0)
2960 return UnableToLegalize
;
2962 // Ignore the input type. We can only go to exactly half the size of the
2963 // input. If that isn't small enough, the resulting pieces will be further
2965 const unsigned NewBitSize
= DstEltSize
/ 2;
2966 const LLT HalfTy
= LLT::scalar(NewBitSize
);
2967 const LLT CondTy
= LLT::scalar(1);
2969 if (const MachineInstr
*KShiftAmt
=
2970 getOpcodeDef(TargetOpcode::G_CONSTANT
, Amt
, MRI
)) {
2971 return narrowScalarShiftByConstant(
2972 MI
, KShiftAmt
->getOperand(1).getCImm()->getValue(), HalfTy
, ShiftAmtTy
);
2975 // TODO: Expand with known bits.
2977 // Handle the fully general expansion by an unknown amount.
2978 auto NewBits
= MIRBuilder
.buildConstant(ShiftAmtTy
, NewBitSize
);
2980 Register InL
= MRI
.createGenericVirtualRegister(HalfTy
);
2981 Register InH
= MRI
.createGenericVirtualRegister(HalfTy
);
2982 MIRBuilder
.buildUnmerge({InL
, InH
}, MI
.getOperand(1).getReg());
2984 auto AmtExcess
= MIRBuilder
.buildSub(ShiftAmtTy
, Amt
, NewBits
);
2985 auto AmtLack
= MIRBuilder
.buildSub(ShiftAmtTy
, NewBits
, Amt
);
2987 auto Zero
= MIRBuilder
.buildConstant(ShiftAmtTy
, 0);
2988 auto IsShort
= MIRBuilder
.buildICmp(ICmpInst::ICMP_ULT
, CondTy
, Amt
, NewBits
);
2989 auto IsZero
= MIRBuilder
.buildICmp(ICmpInst::ICMP_EQ
, CondTy
, Amt
, Zero
);
2991 Register ResultRegs
[2];
2992 switch (MI
.getOpcode()) {
2993 case TargetOpcode::G_SHL
: {
2994 // Short: ShAmt < NewBitSize
2995 auto LoS
= MIRBuilder
.buildShl(HalfTy
, InH
, Amt
);
2997 auto OrLHS
= MIRBuilder
.buildShl(HalfTy
, InH
, Amt
);
2998 auto OrRHS
= MIRBuilder
.buildLShr(HalfTy
, InL
, AmtLack
);
2999 auto HiS
= MIRBuilder
.buildOr(HalfTy
, OrLHS
, OrRHS
);
3001 // Long: ShAmt >= NewBitSize
3002 auto LoL
= MIRBuilder
.buildConstant(HalfTy
, 0); // Lo part is zero.
3003 auto HiL
= MIRBuilder
.buildShl(HalfTy
, InL
, AmtExcess
); // Hi from Lo part.
3005 auto Lo
= MIRBuilder
.buildSelect(HalfTy
, IsShort
, LoS
, LoL
);
3006 auto Hi
= MIRBuilder
.buildSelect(
3007 HalfTy
, IsZero
, InH
, MIRBuilder
.buildSelect(HalfTy
, IsShort
, HiS
, HiL
));
3009 ResultRegs
[0] = Lo
.getReg(0);
3010 ResultRegs
[1] = Hi
.getReg(0);
3013 case TargetOpcode::G_LSHR
: {
3014 // Short: ShAmt < NewBitSize
3015 auto HiS
= MIRBuilder
.buildLShr(HalfTy
, InH
, Amt
);
3017 auto OrLHS
= MIRBuilder
.buildLShr(HalfTy
, InL
, Amt
);
3018 auto OrRHS
= MIRBuilder
.buildShl(HalfTy
, InH
, AmtLack
);
3019 auto LoS
= MIRBuilder
.buildOr(HalfTy
, OrLHS
, OrRHS
);
3021 // Long: ShAmt >= NewBitSize
3022 auto HiL
= MIRBuilder
.buildConstant(HalfTy
, 0); // Hi part is zero.
3023 auto LoL
= MIRBuilder
.buildLShr(HalfTy
, InH
, AmtExcess
); // Lo from Hi part.
3025 auto Lo
= MIRBuilder
.buildSelect(
3026 HalfTy
, IsZero
, InL
, MIRBuilder
.buildSelect(HalfTy
, IsShort
, LoS
, LoL
));
3027 auto Hi
= MIRBuilder
.buildSelect(HalfTy
, IsShort
, HiS
, HiL
);
3029 ResultRegs
[0] = Lo
.getReg(0);
3030 ResultRegs
[1] = Hi
.getReg(0);
3033 case TargetOpcode::G_ASHR
: {
3034 // Short: ShAmt < NewBitSize
3035 auto HiS
= MIRBuilder
.buildAShr(HalfTy
, InH
, Amt
);
3037 auto OrLHS
= MIRBuilder
.buildLShr(HalfTy
, InL
, Amt
);
3038 auto OrRHS
= MIRBuilder
.buildLShr(HalfTy
, InH
, AmtLack
);
3039 auto LoS
= MIRBuilder
.buildOr(HalfTy
, OrLHS
, OrRHS
);
3041 // Long: ShAmt >= NewBitSize
3044 auto HiL
= MIRBuilder
.buildAShr(
3045 HalfTy
, InH
, MIRBuilder
.buildConstant(ShiftAmtTy
, NewBitSize
- 1));
3047 auto LoL
= MIRBuilder
.buildAShr(HalfTy
, InH
, AmtExcess
); // Lo from Hi part.
3049 auto Lo
= MIRBuilder
.buildSelect(
3050 HalfTy
, IsZero
, InL
, MIRBuilder
.buildSelect(HalfTy
, IsShort
, LoS
, LoL
));
3052 auto Hi
= MIRBuilder
.buildSelect(HalfTy
, IsShort
, HiS
, HiL
);
3054 ResultRegs
[0] = Lo
.getReg(0);
3055 ResultRegs
[1] = Hi
.getReg(0);
3059 llvm_unreachable("not a shift");
3062 MIRBuilder
.buildMerge(DstReg
, ResultRegs
);
3063 MI
.eraseFromParent();
3067 LegalizerHelper::LegalizeResult
3068 LegalizerHelper::moreElementsVectorPhi(MachineInstr
&MI
, unsigned TypeIdx
,
3070 assert(TypeIdx
== 0 && "Expecting only Idx 0");
3072 Observer
.changingInstr(MI
);
3073 for (unsigned I
= 1, E
= MI
.getNumOperands(); I
!= E
; I
+= 2) {
3074 MachineBasicBlock
&OpMBB
= *MI
.getOperand(I
+ 1).getMBB();
3075 MIRBuilder
.setInsertPt(OpMBB
, OpMBB
.getFirstTerminator());
3076 moreElementsVectorSrc(MI
, MoreTy
, I
);
3079 MachineBasicBlock
&MBB
= *MI
.getParent();
3080 MIRBuilder
.setInsertPt(MBB
, --MBB
.getFirstNonPHI());
3081 moreElementsVectorDst(MI
, MoreTy
, 0);
3082 Observer
.changedInstr(MI
);
3086 LegalizerHelper::LegalizeResult
3087 LegalizerHelper::moreElementsVector(MachineInstr
&MI
, unsigned TypeIdx
,
3089 MIRBuilder
.setInstr(MI
);
3090 unsigned Opc
= MI
.getOpcode();
3092 case TargetOpcode::G_IMPLICIT_DEF
:
3093 case TargetOpcode::G_LOAD
: {
3095 return UnableToLegalize
;
3096 Observer
.changingInstr(MI
);
3097 moreElementsVectorDst(MI
, MoreTy
, 0);
3098 Observer
.changedInstr(MI
);
3101 case TargetOpcode::G_STORE
:
3103 return UnableToLegalize
;
3104 Observer
.changingInstr(MI
);
3105 moreElementsVectorSrc(MI
, MoreTy
, 0);
3106 Observer
.changedInstr(MI
);
3108 case TargetOpcode::G_AND
:
3109 case TargetOpcode::G_OR
:
3110 case TargetOpcode::G_XOR
:
3111 case TargetOpcode::G_SMIN
:
3112 case TargetOpcode::G_SMAX
:
3113 case TargetOpcode::G_UMIN
:
3114 case TargetOpcode::G_UMAX
: {
3115 Observer
.changingInstr(MI
);
3116 moreElementsVectorSrc(MI
, MoreTy
, 1);
3117 moreElementsVectorSrc(MI
, MoreTy
, 2);
3118 moreElementsVectorDst(MI
, MoreTy
, 0);
3119 Observer
.changedInstr(MI
);
3122 case TargetOpcode::G_EXTRACT
:
3124 return UnableToLegalize
;
3125 Observer
.changingInstr(MI
);
3126 moreElementsVectorSrc(MI
, MoreTy
, 1);
3127 Observer
.changedInstr(MI
);
3129 case TargetOpcode::G_INSERT
:
3131 return UnableToLegalize
;
3132 Observer
.changingInstr(MI
);
3133 moreElementsVectorSrc(MI
, MoreTy
, 1);
3134 moreElementsVectorDst(MI
, MoreTy
, 0);
3135 Observer
.changedInstr(MI
);
3137 case TargetOpcode::G_SELECT
:
3139 return UnableToLegalize
;
3140 if (MRI
.getType(MI
.getOperand(1).getReg()).isVector())
3141 return UnableToLegalize
;
3143 Observer
.changingInstr(MI
);
3144 moreElementsVectorSrc(MI
, MoreTy
, 2);
3145 moreElementsVectorSrc(MI
, MoreTy
, 3);
3146 moreElementsVectorDst(MI
, MoreTy
, 0);
3147 Observer
.changedInstr(MI
);
3149 case TargetOpcode::G_UNMERGE_VALUES
: {
3151 return UnableToLegalize
;
3153 LLT DstTy
= MRI
.getType(MI
.getOperand(0).getReg());
3154 int NumDst
= MI
.getNumOperands() - 1;
3155 moreElementsVectorSrc(MI
, MoreTy
, NumDst
);
3157 auto MIB
= MIRBuilder
.buildInstr(TargetOpcode::G_UNMERGE_VALUES
);
3158 for (int I
= 0; I
!= NumDst
; ++I
)
3159 MIB
.addDef(MI
.getOperand(I
).getReg());
3161 int NewNumDst
= MoreTy
.getSizeInBits() / DstTy
.getSizeInBits();
3162 for (int I
= NumDst
; I
!= NewNumDst
; ++I
)
3163 MIB
.addDef(MRI
.createGenericVirtualRegister(DstTy
));
3165 MIB
.addUse(MI
.getOperand(NumDst
).getReg());
3166 MI
.eraseFromParent();
3169 case TargetOpcode::G_PHI
:
3170 return moreElementsVectorPhi(MI
, TypeIdx
, MoreTy
);
3172 return UnableToLegalize
;
3176 void LegalizerHelper::multiplyRegisters(SmallVectorImpl
<Register
> &DstRegs
,
3177 ArrayRef
<Register
> Src1Regs
,
3178 ArrayRef
<Register
> Src2Regs
,
3180 MachineIRBuilder
&B
= MIRBuilder
;
3181 unsigned SrcParts
= Src1Regs
.size();
3182 unsigned DstParts
= DstRegs
.size();
3184 unsigned DstIdx
= 0; // Low bits of the result.
3185 Register FactorSum
=
3186 B
.buildMul(NarrowTy
, Src1Regs
[DstIdx
], Src2Regs
[DstIdx
]).getReg(0);
3187 DstRegs
[DstIdx
] = FactorSum
;
3189 unsigned CarrySumPrevDstIdx
;
3190 SmallVector
<Register
, 4> Factors
;
3192 for (DstIdx
= 1; DstIdx
< DstParts
; DstIdx
++) {
3193 // Collect low parts of muls for DstIdx.
3194 for (unsigned i
= DstIdx
+ 1 < SrcParts
? 0 : DstIdx
- SrcParts
+ 1;
3195 i
<= std::min(DstIdx
, SrcParts
- 1); ++i
) {
3196 MachineInstrBuilder Mul
=
3197 B
.buildMul(NarrowTy
, Src1Regs
[DstIdx
- i
], Src2Regs
[i
]);
3198 Factors
.push_back(Mul
.getReg(0));
3200 // Collect high parts of muls from previous DstIdx.
3201 for (unsigned i
= DstIdx
< SrcParts
? 0 : DstIdx
- SrcParts
;
3202 i
<= std::min(DstIdx
- 1, SrcParts
- 1); ++i
) {
3203 MachineInstrBuilder Umulh
=
3204 B
.buildUMulH(NarrowTy
, Src1Regs
[DstIdx
- 1 - i
], Src2Regs
[i
]);
3205 Factors
.push_back(Umulh
.getReg(0));
3207 // Add CarrySum from additons calculated for previous DstIdx.
3209 Factors
.push_back(CarrySumPrevDstIdx
);
3213 // Add all factors and accumulate all carries into CarrySum.
3214 if (DstIdx
!= DstParts
- 1) {
3215 MachineInstrBuilder Uaddo
=
3216 B
.buildUAddo(NarrowTy
, LLT::scalar(1), Factors
[0], Factors
[1]);
3217 FactorSum
= Uaddo
.getReg(0);
3218 CarrySum
= B
.buildZExt(NarrowTy
, Uaddo
.getReg(1)).getReg(0);
3219 for (unsigned i
= 2; i
< Factors
.size(); ++i
) {
3220 MachineInstrBuilder Uaddo
=
3221 B
.buildUAddo(NarrowTy
, LLT::scalar(1), FactorSum
, Factors
[i
]);
3222 FactorSum
= Uaddo
.getReg(0);
3223 MachineInstrBuilder Carry
= B
.buildZExt(NarrowTy
, Uaddo
.getReg(1));
3224 CarrySum
= B
.buildAdd(NarrowTy
, CarrySum
, Carry
).getReg(0);
3227 // Since value for the next index is not calculated, neither is CarrySum.
3228 FactorSum
= B
.buildAdd(NarrowTy
, Factors
[0], Factors
[1]).getReg(0);
3229 for (unsigned i
= 2; i
< Factors
.size(); ++i
)
3230 FactorSum
= B
.buildAdd(NarrowTy
, FactorSum
, Factors
[i
]).getReg(0);
3233 CarrySumPrevDstIdx
= CarrySum
;
3234 DstRegs
[DstIdx
] = FactorSum
;
3239 LegalizerHelper::LegalizeResult
3240 LegalizerHelper::narrowScalarMul(MachineInstr
&MI
, LLT NarrowTy
) {
3241 Register DstReg
= MI
.getOperand(0).getReg();
3242 Register Src1
= MI
.getOperand(1).getReg();
3243 Register Src2
= MI
.getOperand(2).getReg();
3245 LLT Ty
= MRI
.getType(DstReg
);
3247 return UnableToLegalize
;
3249 unsigned SrcSize
= MRI
.getType(Src1
).getSizeInBits();
3250 unsigned DstSize
= Ty
.getSizeInBits();
3251 unsigned NarrowSize
= NarrowTy
.getSizeInBits();
3252 if (DstSize
% NarrowSize
!= 0 || SrcSize
% NarrowSize
!= 0)
3253 return UnableToLegalize
;
3255 unsigned NumDstParts
= DstSize
/ NarrowSize
;
3256 unsigned NumSrcParts
= SrcSize
/ NarrowSize
;
3257 bool IsMulHigh
= MI
.getOpcode() == TargetOpcode::G_UMULH
;
3258 unsigned DstTmpParts
= NumDstParts
* (IsMulHigh
? 2 : 1);
3260 SmallVector
<Register
, 2> Src1Parts
, Src2Parts
, DstTmpRegs
;
3261 extractParts(Src1
, NarrowTy
, NumSrcParts
, Src1Parts
);
3262 extractParts(Src2
, NarrowTy
, NumSrcParts
, Src2Parts
);
3263 DstTmpRegs
.resize(DstTmpParts
);
3264 multiplyRegisters(DstTmpRegs
, Src1Parts
, Src2Parts
, NarrowTy
);
3266 // Take only high half of registers if this is high mul.
3267 ArrayRef
<Register
> DstRegs(
3268 IsMulHigh
? &DstTmpRegs
[DstTmpParts
/ 2] : &DstTmpRegs
[0], NumDstParts
);
3269 MIRBuilder
.buildMerge(DstReg
, DstRegs
);
3270 MI
.eraseFromParent();
3274 LegalizerHelper::LegalizeResult
3275 LegalizerHelper::narrowScalarExtract(MachineInstr
&MI
, unsigned TypeIdx
,
3278 return UnableToLegalize
;
3280 uint64_t NarrowSize
= NarrowTy
.getSizeInBits();
3282 int64_t SizeOp1
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
3283 // FIXME: add support for when SizeOp1 isn't an exact multiple of
3285 if (SizeOp1
% NarrowSize
!= 0)
3286 return UnableToLegalize
;
3287 int NumParts
= SizeOp1
/ NarrowSize
;
3289 SmallVector
<Register
, 2> SrcRegs
, DstRegs
;
3290 SmallVector
<uint64_t, 2> Indexes
;
3291 extractParts(MI
.getOperand(1).getReg(), NarrowTy
, NumParts
, SrcRegs
);
3293 Register OpReg
= MI
.getOperand(0).getReg();
3294 uint64_t OpStart
= MI
.getOperand(2).getImm();
3295 uint64_t OpSize
= MRI
.getType(OpReg
).getSizeInBits();
3296 for (int i
= 0; i
< NumParts
; ++i
) {
3297 unsigned SrcStart
= i
* NarrowSize
;
3299 if (SrcStart
+ NarrowSize
<= OpStart
|| SrcStart
>= OpStart
+ OpSize
) {
3300 // No part of the extract uses this subregister, ignore it.
3302 } else if (SrcStart
== OpStart
&& NarrowTy
== MRI
.getType(OpReg
)) {
3303 // The entire subregister is extracted, forward the value.
3304 DstRegs
.push_back(SrcRegs
[i
]);
3308 // OpSegStart is where this destination segment would start in OpReg if it
3309 // extended infinitely in both directions.
3310 int64_t ExtractOffset
;
3312 if (OpStart
< SrcStart
) {
3314 SegSize
= std::min(NarrowSize
, OpStart
+ OpSize
- SrcStart
);
3316 ExtractOffset
= OpStart
- SrcStart
;
3317 SegSize
= std::min(SrcStart
+ NarrowSize
- OpStart
, OpSize
);
3320 Register SegReg
= SrcRegs
[i
];
3321 if (ExtractOffset
!= 0 || SegSize
!= NarrowSize
) {
3322 // A genuine extract is needed.
3323 SegReg
= MRI
.createGenericVirtualRegister(LLT::scalar(SegSize
));
3324 MIRBuilder
.buildExtract(SegReg
, SrcRegs
[i
], ExtractOffset
);
3327 DstRegs
.push_back(SegReg
);
3330 Register DstReg
= MI
.getOperand(0).getReg();
3331 if(MRI
.getType(DstReg
).isVector())
3332 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
3334 MIRBuilder
.buildMerge(DstReg
, DstRegs
);
3335 MI
.eraseFromParent();
3339 LegalizerHelper::LegalizeResult
3340 LegalizerHelper::narrowScalarInsert(MachineInstr
&MI
, unsigned TypeIdx
,
3342 // FIXME: Don't know how to handle secondary types yet.
3344 return UnableToLegalize
;
3346 uint64_t SizeOp0
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
3347 uint64_t NarrowSize
= NarrowTy
.getSizeInBits();
3349 // FIXME: add support for when SizeOp0 isn't an exact multiple of
3351 if (SizeOp0
% NarrowSize
!= 0)
3352 return UnableToLegalize
;
3354 int NumParts
= SizeOp0
/ NarrowSize
;
3356 SmallVector
<Register
, 2> SrcRegs
, DstRegs
;
3357 SmallVector
<uint64_t, 2> Indexes
;
3358 extractParts(MI
.getOperand(1).getReg(), NarrowTy
, NumParts
, SrcRegs
);
3360 Register OpReg
= MI
.getOperand(2).getReg();
3361 uint64_t OpStart
= MI
.getOperand(3).getImm();
3362 uint64_t OpSize
= MRI
.getType(OpReg
).getSizeInBits();
3363 for (int i
= 0; i
< NumParts
; ++i
) {
3364 unsigned DstStart
= i
* NarrowSize
;
3366 if (DstStart
+ NarrowSize
<= OpStart
|| DstStart
>= OpStart
+ OpSize
) {
3367 // No part of the insert affects this subregister, forward the original.
3368 DstRegs
.push_back(SrcRegs
[i
]);
3370 } else if (DstStart
== OpStart
&& NarrowTy
== MRI
.getType(OpReg
)) {
3371 // The entire subregister is defined by this insert, forward the new
3373 DstRegs
.push_back(OpReg
);
3377 // OpSegStart is where this destination segment would start in OpReg if it
3378 // extended infinitely in both directions.
3379 int64_t ExtractOffset
, InsertOffset
;
3381 if (OpStart
< DstStart
) {
3383 ExtractOffset
= DstStart
- OpStart
;
3384 SegSize
= std::min(NarrowSize
, OpStart
+ OpSize
- DstStart
);
3386 InsertOffset
= OpStart
- DstStart
;
3389 std::min(NarrowSize
- InsertOffset
, OpStart
+ OpSize
- DstStart
);
3392 Register SegReg
= OpReg
;
3393 if (ExtractOffset
!= 0 || SegSize
!= OpSize
) {
3394 // A genuine extract is needed.
3395 SegReg
= MRI
.createGenericVirtualRegister(LLT::scalar(SegSize
));
3396 MIRBuilder
.buildExtract(SegReg
, OpReg
, ExtractOffset
);
3399 Register DstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
3400 MIRBuilder
.buildInsert(DstReg
, SrcRegs
[i
], SegReg
, InsertOffset
);
3401 DstRegs
.push_back(DstReg
);
3404 assert(DstRegs
.size() == (unsigned)NumParts
&& "not all parts covered");
3405 Register DstReg
= MI
.getOperand(0).getReg();
3406 if(MRI
.getType(DstReg
).isVector())
3407 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
3409 MIRBuilder
.buildMerge(DstReg
, DstRegs
);
3410 MI
.eraseFromParent();
3414 LegalizerHelper::LegalizeResult
3415 LegalizerHelper::narrowScalarBasic(MachineInstr
&MI
, unsigned TypeIdx
,
3417 Register DstReg
= MI
.getOperand(0).getReg();
3418 LLT DstTy
= MRI
.getType(DstReg
);
3420 assert(MI
.getNumOperands() == 3 && TypeIdx
== 0);
3422 SmallVector
<Register
, 4> DstRegs
, DstLeftoverRegs
;
3423 SmallVector
<Register
, 4> Src0Regs
, Src0LeftoverRegs
;
3424 SmallVector
<Register
, 4> Src1Regs
, Src1LeftoverRegs
;
3426 if (!extractParts(MI
.getOperand(1).getReg(), DstTy
, NarrowTy
, LeftoverTy
,
3427 Src0Regs
, Src0LeftoverRegs
))
3428 return UnableToLegalize
;
3431 if (!extractParts(MI
.getOperand(2).getReg(), DstTy
, NarrowTy
, Unused
,
3432 Src1Regs
, Src1LeftoverRegs
))
3433 llvm_unreachable("inconsistent extractParts result");
3435 for (unsigned I
= 0, E
= Src1Regs
.size(); I
!= E
; ++I
) {
3436 auto Inst
= MIRBuilder
.buildInstr(MI
.getOpcode(), {NarrowTy
},
3437 {Src0Regs
[I
], Src1Regs
[I
]});
3438 DstRegs
.push_back(Inst
->getOperand(0).getReg());
3441 for (unsigned I
= 0, E
= Src1LeftoverRegs
.size(); I
!= E
; ++I
) {
3442 auto Inst
= MIRBuilder
.buildInstr(
3444 {LeftoverTy
}, {Src0LeftoverRegs
[I
], Src1LeftoverRegs
[I
]});
3445 DstLeftoverRegs
.push_back(Inst
->getOperand(0).getReg());
3448 insertParts(DstReg
, DstTy
, NarrowTy
, DstRegs
,
3449 LeftoverTy
, DstLeftoverRegs
);
3451 MI
.eraseFromParent();
3455 LegalizerHelper::LegalizeResult
3456 LegalizerHelper::narrowScalarSelect(MachineInstr
&MI
, unsigned TypeIdx
,
3459 return UnableToLegalize
;
3461 Register CondReg
= MI
.getOperand(1).getReg();
3462 LLT CondTy
= MRI
.getType(CondReg
);
3463 if (CondTy
.isVector()) // TODO: Handle vselect
3464 return UnableToLegalize
;
3466 Register DstReg
= MI
.getOperand(0).getReg();
3467 LLT DstTy
= MRI
.getType(DstReg
);
3469 SmallVector
<Register
, 4> DstRegs
, DstLeftoverRegs
;
3470 SmallVector
<Register
, 4> Src1Regs
, Src1LeftoverRegs
;
3471 SmallVector
<Register
, 4> Src2Regs
, Src2LeftoverRegs
;
3473 if (!extractParts(MI
.getOperand(2).getReg(), DstTy
, NarrowTy
, LeftoverTy
,
3474 Src1Regs
, Src1LeftoverRegs
))
3475 return UnableToLegalize
;
3478 if (!extractParts(MI
.getOperand(3).getReg(), DstTy
, NarrowTy
, Unused
,
3479 Src2Regs
, Src2LeftoverRegs
))
3480 llvm_unreachable("inconsistent extractParts result");
3482 for (unsigned I
= 0, E
= Src1Regs
.size(); I
!= E
; ++I
) {
3483 auto Select
= MIRBuilder
.buildSelect(NarrowTy
,
3484 CondReg
, Src1Regs
[I
], Src2Regs
[I
]);
3485 DstRegs
.push_back(Select
->getOperand(0).getReg());
3488 for (unsigned I
= 0, E
= Src1LeftoverRegs
.size(); I
!= E
; ++I
) {
3489 auto Select
= MIRBuilder
.buildSelect(
3490 LeftoverTy
, CondReg
, Src1LeftoverRegs
[I
], Src2LeftoverRegs
[I
]);
3491 DstLeftoverRegs
.push_back(Select
->getOperand(0).getReg());
3494 insertParts(DstReg
, DstTy
, NarrowTy
, DstRegs
,
3495 LeftoverTy
, DstLeftoverRegs
);
3497 MI
.eraseFromParent();
3501 LegalizerHelper::LegalizeResult
3502 LegalizerHelper::lowerBitCount(MachineInstr
&MI
, unsigned TypeIdx
, LLT Ty
) {
3503 unsigned Opc
= MI
.getOpcode();
3504 auto &TII
= *MI
.getMF()->getSubtarget().getInstrInfo();
3505 auto isSupported
= [this](const LegalityQuery
&Q
) {
3506 auto QAction
= LI
.getAction(Q
).Action
;
3507 return QAction
== Legal
|| QAction
== Libcall
|| QAction
== Custom
;
3511 return UnableToLegalize
;
3512 case TargetOpcode::G_CTLZ_ZERO_UNDEF
: {
3513 // This trivially expands to CTLZ.
3514 Observer
.changingInstr(MI
);
3515 MI
.setDesc(TII
.get(TargetOpcode::G_CTLZ
));
3516 Observer
.changedInstr(MI
);
3519 case TargetOpcode::G_CTLZ
: {
3520 Register SrcReg
= MI
.getOperand(1).getReg();
3521 unsigned Len
= Ty
.getSizeInBits();
3522 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF
, {Ty
, Ty
}})) {
3523 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
3524 auto MIBCtlzZU
= MIRBuilder
.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF
,
3526 auto MIBZero
= MIRBuilder
.buildConstant(Ty
, 0);
3527 auto MIBLen
= MIRBuilder
.buildConstant(Ty
, Len
);
3528 auto MIBICmp
= MIRBuilder
.buildICmp(CmpInst::ICMP_EQ
, LLT::scalar(1),
3530 MIRBuilder
.buildSelect(MI
.getOperand(0).getReg(), MIBICmp
, MIBLen
,
3532 MI
.eraseFromParent();
3535 // for now, we do this:
3536 // NewLen = NextPowerOf2(Len);
3537 // x = x | (x >> 1);
3538 // x = x | (x >> 2);
3540 // x = x | (x >>16);
3541 // x = x | (x >>32); // for 64-bit input
3543 // return Len - popcount(x);
3545 // Ref: "Hacker's Delight" by Henry Warren
3546 Register Op
= SrcReg
;
3547 unsigned NewLen
= PowerOf2Ceil(Len
);
3548 for (unsigned i
= 0; (1U << i
) <= (NewLen
/ 2); ++i
) {
3549 auto MIBShiftAmt
= MIRBuilder
.buildConstant(Ty
, 1ULL << i
);
3550 auto MIBOp
= MIRBuilder
.buildInstr(
3551 TargetOpcode::G_OR
, {Ty
},
3552 {Op
, MIRBuilder
.buildInstr(TargetOpcode::G_LSHR
, {Ty
},
3553 {Op
, MIBShiftAmt
})});
3554 Op
= MIBOp
->getOperand(0).getReg();
3556 auto MIBPop
= MIRBuilder
.buildInstr(TargetOpcode::G_CTPOP
, {Ty
}, {Op
});
3557 MIRBuilder
.buildInstr(TargetOpcode::G_SUB
, {MI
.getOperand(0).getReg()},
3558 {MIRBuilder
.buildConstant(Ty
, Len
), MIBPop
});
3559 MI
.eraseFromParent();
3562 case TargetOpcode::G_CTTZ_ZERO_UNDEF
: {
3563 // This trivially expands to CTTZ.
3564 Observer
.changingInstr(MI
);
3565 MI
.setDesc(TII
.get(TargetOpcode::G_CTTZ
));
3566 Observer
.changedInstr(MI
);
3569 case TargetOpcode::G_CTTZ
: {
3570 Register SrcReg
= MI
.getOperand(1).getReg();
3571 unsigned Len
= Ty
.getSizeInBits();
3572 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF
, {Ty
, Ty
}})) {
3573 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
3575 auto MIBCttzZU
= MIRBuilder
.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF
,
3577 auto MIBZero
= MIRBuilder
.buildConstant(Ty
, 0);
3578 auto MIBLen
= MIRBuilder
.buildConstant(Ty
, Len
);
3579 auto MIBICmp
= MIRBuilder
.buildICmp(CmpInst::ICMP_EQ
, LLT::scalar(1),
3581 MIRBuilder
.buildSelect(MI
.getOperand(0).getReg(), MIBICmp
, MIBLen
,
3583 MI
.eraseFromParent();
3586 // for now, we use: { return popcount(~x & (x - 1)); }
3587 // unless the target has ctlz but not ctpop, in which case we use:
3588 // { return 32 - nlz(~x & (x-1)); }
3589 // Ref: "Hacker's Delight" by Henry Warren
3590 auto MIBCstNeg1
= MIRBuilder
.buildConstant(Ty
, -1);
3592 MIRBuilder
.buildInstr(TargetOpcode::G_XOR
, {Ty
}, {SrcReg
, MIBCstNeg1
});
3593 auto MIBTmp
= MIRBuilder
.buildInstr(
3594 TargetOpcode::G_AND
, {Ty
},
3595 {MIBNot
, MIRBuilder
.buildInstr(TargetOpcode::G_ADD
, {Ty
},
3596 {SrcReg
, MIBCstNeg1
})});
3597 if (!isSupported({TargetOpcode::G_CTPOP
, {Ty
, Ty
}}) &&
3598 isSupported({TargetOpcode::G_CTLZ
, {Ty
, Ty
}})) {
3599 auto MIBCstLen
= MIRBuilder
.buildConstant(Ty
, Len
);
3600 MIRBuilder
.buildInstr(
3601 TargetOpcode::G_SUB
, {MI
.getOperand(0).getReg()},
3603 MIRBuilder
.buildInstr(TargetOpcode::G_CTLZ
, {Ty
}, {MIBTmp
})});
3604 MI
.eraseFromParent();
3607 MI
.setDesc(TII
.get(TargetOpcode::G_CTPOP
));
3608 MI
.getOperand(1).setReg(MIBTmp
->getOperand(0).getReg());
3614 // Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
3616 LegalizerHelper::LegalizeResult
3617 LegalizerHelper::lowerU64ToF32BitOps(MachineInstr
&MI
) {
3618 Register Dst
= MI
.getOperand(0).getReg();
3619 Register Src
= MI
.getOperand(1).getReg();
3620 const LLT S64
= LLT::scalar(64);
3621 const LLT S32
= LLT::scalar(32);
3622 const LLT S1
= LLT::scalar(1);
3624 assert(MRI
.getType(Src
) == S64
&& MRI
.getType(Dst
) == S32
);
3626 // unsigned cul2f(ulong u) {
3627 // uint lz = clz(u);
3628 // uint e = (u != 0) ? 127U + 63U - lz : 0;
3629 // u = (u << lz) & 0x7fffffffffffffffUL;
3630 // ulong t = u & 0xffffffffffUL;
3631 // uint v = (e << 23) | (uint)(u >> 40);
3632 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
3633 // return as_float(v + r);
3636 auto Zero32
= MIRBuilder
.buildConstant(S32
, 0);
3637 auto Zero64
= MIRBuilder
.buildConstant(S64
, 0);
3639 auto LZ
= MIRBuilder
.buildCTLZ_ZERO_UNDEF(S32
, Src
);
3641 auto K
= MIRBuilder
.buildConstant(S32
, 127U + 63U);
3642 auto Sub
= MIRBuilder
.buildSub(S32
, K
, LZ
);
3644 auto NotZero
= MIRBuilder
.buildICmp(CmpInst::ICMP_NE
, S1
, Src
, Zero64
);
3645 auto E
= MIRBuilder
.buildSelect(S32
, NotZero
, Sub
, Zero32
);
3647 auto Mask0
= MIRBuilder
.buildConstant(S64
, (-1ULL) >> 1);
3648 auto ShlLZ
= MIRBuilder
.buildShl(S64
, Src
, LZ
);
3650 auto U
= MIRBuilder
.buildAnd(S64
, ShlLZ
, Mask0
);
3652 auto Mask1
= MIRBuilder
.buildConstant(S64
, 0xffffffffffULL
);
3653 auto T
= MIRBuilder
.buildAnd(S64
, U
, Mask1
);
3655 auto UShl
= MIRBuilder
.buildLShr(S64
, U
, MIRBuilder
.buildConstant(S64
, 40));
3656 auto ShlE
= MIRBuilder
.buildShl(S32
, E
, MIRBuilder
.buildConstant(S32
, 23));
3657 auto V
= MIRBuilder
.buildOr(S32
, ShlE
, MIRBuilder
.buildTrunc(S32
, UShl
));
3659 auto C
= MIRBuilder
.buildConstant(S64
, 0x8000000000ULL
);
3660 auto RCmp
= MIRBuilder
.buildICmp(CmpInst::ICMP_UGT
, S1
, T
, C
);
3661 auto TCmp
= MIRBuilder
.buildICmp(CmpInst::ICMP_EQ
, S1
, T
, C
);
3662 auto One
= MIRBuilder
.buildConstant(S32
, 1);
3664 auto VTrunc1
= MIRBuilder
.buildAnd(S32
, V
, One
);
3665 auto Select0
= MIRBuilder
.buildSelect(S32
, TCmp
, VTrunc1
, Zero32
);
3666 auto R
= MIRBuilder
.buildSelect(S32
, RCmp
, One
, Select0
);
3667 MIRBuilder
.buildAdd(Dst
, V
, R
);
3672 LegalizerHelper::LegalizeResult
3673 LegalizerHelper::lowerUITOFP(MachineInstr
&MI
, unsigned TypeIdx
, LLT Ty
) {
3674 Register Dst
= MI
.getOperand(0).getReg();
3675 Register Src
= MI
.getOperand(1).getReg();
3676 LLT DstTy
= MRI
.getType(Dst
);
3677 LLT SrcTy
= MRI
.getType(Src
);
3679 if (SrcTy
!= LLT::scalar(64))
3680 return UnableToLegalize
;
3682 if (DstTy
== LLT::scalar(32)) {
3683 // TODO: SelectionDAG has several alternative expansions to port which may
3684 // be more reasonble depending on the available instructions. If a target
3685 // has sitofp, does not have CTLZ, or can efficiently use f64 as an
3686 // intermediate type, this is probably worse.
3687 return lowerU64ToF32BitOps(MI
);
3690 return UnableToLegalize
;
3693 LegalizerHelper::LegalizeResult
3694 LegalizerHelper::lowerSITOFP(MachineInstr
&MI
, unsigned TypeIdx
, LLT Ty
) {
3695 Register Dst
= MI
.getOperand(0).getReg();
3696 Register Src
= MI
.getOperand(1).getReg();
3697 LLT DstTy
= MRI
.getType(Dst
);
3698 LLT SrcTy
= MRI
.getType(Src
);
3700 const LLT S64
= LLT::scalar(64);
3701 const LLT S32
= LLT::scalar(32);
3702 const LLT S1
= LLT::scalar(1);
3705 return UnableToLegalize
;
3708 // signed cl2f(long l) {
3709 // long s = l >> 63;
3710 // float r = cul2f((l + s) ^ s);
3711 // return s ? -r : r;
3714 auto SignBit
= MIRBuilder
.buildConstant(S64
, 63);
3715 auto S
= MIRBuilder
.buildAShr(S64
, L
, SignBit
);
3717 auto LPlusS
= MIRBuilder
.buildAdd(S64
, L
, S
);
3718 auto Xor
= MIRBuilder
.buildXor(S64
, LPlusS
, S
);
3719 auto R
= MIRBuilder
.buildUITOFP(S32
, Xor
);
3721 auto RNeg
= MIRBuilder
.buildFNeg(S32
, R
);
3722 auto SignNotZero
= MIRBuilder
.buildICmp(CmpInst::ICMP_NE
, S1
, S
,
3723 MIRBuilder
.buildConstant(S64
, 0));
3724 MIRBuilder
.buildSelect(Dst
, SignNotZero
, RNeg
, R
);
3728 return UnableToLegalize
;
3731 static CmpInst::Predicate
minMaxToCompare(unsigned Opc
) {
3733 case TargetOpcode::G_SMIN
:
3734 return CmpInst::ICMP_SLT
;
3735 case TargetOpcode::G_SMAX
:
3736 return CmpInst::ICMP_SGT
;
3737 case TargetOpcode::G_UMIN
:
3738 return CmpInst::ICMP_ULT
;
3739 case TargetOpcode::G_UMAX
:
3740 return CmpInst::ICMP_UGT
;
3742 llvm_unreachable("not in integer min/max");
3746 LegalizerHelper::LegalizeResult
3747 LegalizerHelper::lowerMinMax(MachineInstr
&MI
, unsigned TypeIdx
, LLT Ty
) {
3748 Register Dst
= MI
.getOperand(0).getReg();
3749 Register Src0
= MI
.getOperand(1).getReg();
3750 Register Src1
= MI
.getOperand(2).getReg();
3752 const CmpInst::Predicate Pred
= minMaxToCompare(MI
.getOpcode());
3753 LLT CmpType
= MRI
.getType(Dst
).changeElementSize(1);
3755 auto Cmp
= MIRBuilder
.buildICmp(Pred
, CmpType
, Src0
, Src1
);
3756 MIRBuilder
.buildSelect(Dst
, Cmp
, Src0
, Src1
);
3758 MI
.eraseFromParent();
3762 LegalizerHelper::LegalizeResult
3763 LegalizerHelper::lowerFCopySign(MachineInstr
&MI
, unsigned TypeIdx
, LLT Ty
) {
3764 Register Dst
= MI
.getOperand(0).getReg();
3765 Register Src0
= MI
.getOperand(1).getReg();
3766 Register Src1
= MI
.getOperand(2).getReg();
3768 const LLT Src0Ty
= MRI
.getType(Src0
);
3769 const LLT Src1Ty
= MRI
.getType(Src1
);
3771 const int Src0Size
= Src0Ty
.getScalarSizeInBits();
3772 const int Src1Size
= Src1Ty
.getScalarSizeInBits();
3774 auto SignBitMask
= MIRBuilder
.buildConstant(
3775 Src0Ty
, APInt::getSignMask(Src0Size
));
3777 auto NotSignBitMask
= MIRBuilder
.buildConstant(
3778 Src0Ty
, APInt::getLowBitsSet(Src0Size
, Src0Size
- 1));
3780 auto And0
= MIRBuilder
.buildAnd(Src0Ty
, Src0
, NotSignBitMask
);
3783 if (Src0Ty
== Src1Ty
) {
3784 auto And1
= MIRBuilder
.buildAnd(Src1Ty
, Src0
, SignBitMask
);
3785 Or
= MIRBuilder
.buildOr(Dst
, And0
, And1
);
3786 } else if (Src0Size
> Src1Size
) {
3787 auto ShiftAmt
= MIRBuilder
.buildConstant(Src0Ty
, Src0Size
- Src1Size
);
3788 auto Zext
= MIRBuilder
.buildZExt(Src0Ty
, Src1
);
3789 auto Shift
= MIRBuilder
.buildShl(Src0Ty
, Zext
, ShiftAmt
);
3790 auto And1
= MIRBuilder
.buildAnd(Src0Ty
, Shift
, SignBitMask
);
3791 Or
= MIRBuilder
.buildOr(Dst
, And0
, And1
);
3793 auto ShiftAmt
= MIRBuilder
.buildConstant(Src1Ty
, Src1Size
- Src0Size
);
3794 auto Shift
= MIRBuilder
.buildLShr(Src1Ty
, Src1
, ShiftAmt
);
3795 auto Trunc
= MIRBuilder
.buildTrunc(Src0Ty
, Shift
);
3796 auto And1
= MIRBuilder
.buildAnd(Src0Ty
, Trunc
, SignBitMask
);
3797 Or
= MIRBuilder
.buildOr(Dst
, And0
, And1
);
3800 // Be careful about setting nsz/nnan/ninf on every instruction, since the
3801 // constants are a nan and -0.0, but the final result should preserve
3803 if (unsigned Flags
= MI
.getFlags())
3804 Or
->setFlags(Flags
);
3806 MI
.eraseFromParent();
3810 LegalizerHelper::LegalizeResult
3811 LegalizerHelper::lowerFMinNumMaxNum(MachineInstr
&MI
) {
3812 unsigned NewOp
= MI
.getOpcode() == TargetOpcode::G_FMINNUM
?
3813 TargetOpcode::G_FMINNUM_IEEE
: TargetOpcode::G_FMAXNUM_IEEE
;
3815 Register Dst
= MI
.getOperand(0).getReg();
3816 Register Src0
= MI
.getOperand(1).getReg();
3817 Register Src1
= MI
.getOperand(2).getReg();
3818 LLT Ty
= MRI
.getType(Dst
);
3820 if (!MI
.getFlag(MachineInstr::FmNoNans
)) {
3821 // Insert canonicalizes if it's possible we need to quiet to get correct
3824 // Note this must be done here, and not as an optimization combine in the
3825 // absence of a dedicate quiet-snan instruction as we're using an
3826 // omni-purpose G_FCANONICALIZE.
3827 if (!isKnownNeverSNaN(Src0
, MRI
))
3828 Src0
= MIRBuilder
.buildFCanonicalize(Ty
, Src0
, MI
.getFlags()).getReg(0);
3830 if (!isKnownNeverSNaN(Src1
, MRI
))
3831 Src1
= MIRBuilder
.buildFCanonicalize(Ty
, Src1
, MI
.getFlags()).getReg(0);
3834 // If there are no nans, it's safe to simply replace this with the non-IEEE
3836 MIRBuilder
.buildInstr(NewOp
, {Dst
}, {Src0
, Src1
}, MI
.getFlags());
3837 MI
.eraseFromParent();
3841 LegalizerHelper::LegalizeResult
3842 LegalizerHelper::lowerUnmergeValues(MachineInstr
&MI
) {
3843 const unsigned NumDst
= MI
.getNumOperands() - 1;
3844 const Register SrcReg
= MI
.getOperand(NumDst
).getReg();
3845 LLT SrcTy
= MRI
.getType(SrcReg
);
3847 Register Dst0Reg
= MI
.getOperand(0).getReg();
3848 LLT DstTy
= MRI
.getType(Dst0Reg
);
3851 // Expand scalarizing unmerge as bitcast to integer and shift.
3852 if (!DstTy
.isVector() && SrcTy
.isVector() &&
3853 SrcTy
.getElementType() == DstTy
) {
3854 LLT IntTy
= LLT::scalar(SrcTy
.getSizeInBits());
3855 Register Cast
= MIRBuilder
.buildBitcast(IntTy
, SrcReg
).getReg(0);
3857 MIRBuilder
.buildTrunc(Dst0Reg
, Cast
);
3859 const unsigned DstSize
= DstTy
.getSizeInBits();
3860 unsigned Offset
= DstSize
;
3861 for (unsigned I
= 1; I
!= NumDst
; ++I
, Offset
+= DstSize
) {
3862 auto ShiftAmt
= MIRBuilder
.buildConstant(IntTy
, Offset
);
3863 auto Shift
= MIRBuilder
.buildLShr(IntTy
, Cast
, ShiftAmt
);
3864 MIRBuilder
.buildTrunc(MI
.getOperand(I
), Shift
);
3867 MI
.eraseFromParent();
3871 return UnableToLegalize
;
3874 LegalizerHelper::LegalizeResult
3875 LegalizerHelper::lowerShuffleVector(MachineInstr
&MI
) {
3876 Register DstReg
= MI
.getOperand(0).getReg();
3877 Register Src0Reg
= MI
.getOperand(1).getReg();
3878 Register Src1Reg
= MI
.getOperand(2).getReg();
3879 LLT Src0Ty
= MRI
.getType(Src0Reg
);
3880 LLT DstTy
= MRI
.getType(DstReg
);
3881 LLT IdxTy
= LLT::scalar(32);
3883 const Constant
*ShufMask
= MI
.getOperand(3).getShuffleMask();
3885 SmallVector
<int, 32> Mask
;
3886 ShuffleVectorInst::getShuffleMask(ShufMask
, Mask
);
3888 if (DstTy
.isScalar()) {
3889 if (Src0Ty
.isVector())
3890 return UnableToLegalize
;
3892 // This is just a SELECT.
3893 assert(Mask
.size() == 1 && "Expected a single mask element");
3895 if (Mask
[0] < 0 || Mask
[0] > 1)
3896 Val
= MIRBuilder
.buildUndef(DstTy
).getReg(0);
3898 Val
= Mask
[0] == 0 ? Src0Reg
: Src1Reg
;
3899 MIRBuilder
.buildCopy(DstReg
, Val
);
3900 MI
.eraseFromParent();
3905 SmallVector
<Register
, 32> BuildVec
;
3906 LLT EltTy
= DstTy
.getElementType();
3908 for (int Idx
: Mask
) {
3910 if (!Undef
.isValid())
3911 Undef
= MIRBuilder
.buildUndef(EltTy
).getReg(0);
3912 BuildVec
.push_back(Undef
);
3916 if (Src0Ty
.isScalar()) {
3917 BuildVec
.push_back(Idx
== 0 ? Src0Reg
: Src1Reg
);
3919 int NumElts
= Src0Ty
.getNumElements();
3920 Register SrcVec
= Idx
< NumElts
? Src0Reg
: Src1Reg
;
3921 int ExtractIdx
= Idx
< NumElts
? Idx
: Idx
- NumElts
;
3922 auto IdxK
= MIRBuilder
.buildConstant(IdxTy
, ExtractIdx
);
3923 auto Extract
= MIRBuilder
.buildExtractVectorElement(EltTy
, SrcVec
, IdxK
);
3924 BuildVec
.push_back(Extract
.getReg(0));
3928 MIRBuilder
.buildBuildVector(DstReg
, BuildVec
);
3929 MI
.eraseFromParent();