1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// \file This file implements the LegalizerHelper class to legalize
10 /// individual instructions and the LegalizeMachineIR wrapper pass for the
11 /// primary legalization.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
16 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
17 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/TargetInstrInfo.h"
21 #include "llvm/CodeGen/TargetLowering.h"
22 #include "llvm/CodeGen/TargetSubtargetInfo.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/MathExtras.h"
25 #include "llvm/Support/raw_ostream.h"
27 #define DEBUG_TYPE "legalizer"
30 using namespace LegalizeActions
;
32 /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
34 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
35 /// with any leftover piece as type \p LeftoverTy
37 /// Returns -1 in the first element of the pair if the breakdown is not
39 static std::pair
<int, int>
40 getNarrowTypeBreakDown(LLT OrigTy
, LLT NarrowTy
, LLT
&LeftoverTy
) {
41 assert(!LeftoverTy
.isValid() && "this is an out argument");
43 unsigned Size
= OrigTy
.getSizeInBits();
44 unsigned NarrowSize
= NarrowTy
.getSizeInBits();
45 unsigned NumParts
= Size
/ NarrowSize
;
46 unsigned LeftoverSize
= Size
- NumParts
* NarrowSize
;
47 assert(Size
> NarrowSize
);
49 if (LeftoverSize
== 0)
52 if (NarrowTy
.isVector()) {
53 unsigned EltSize
= OrigTy
.getScalarSizeInBits();
54 if (LeftoverSize
% EltSize
!= 0)
56 LeftoverTy
= LLT::scalarOrVector(LeftoverSize
/ EltSize
, EltSize
);
58 LeftoverTy
= LLT::scalar(LeftoverSize
);
61 int NumLeftover
= LeftoverSize
/ LeftoverTy
.getSizeInBits();
62 return std::make_pair(NumParts
, NumLeftover
);
65 LegalizerHelper::LegalizerHelper(MachineFunction
&MF
,
66 GISelChangeObserver
&Observer
,
67 MachineIRBuilder
&Builder
)
68 : MIRBuilder(Builder
), MRI(MF
.getRegInfo()),
69 LI(*MF
.getSubtarget().getLegalizerInfo()), Observer(Observer
) {
71 MIRBuilder
.setChangeObserver(Observer
);
74 LegalizerHelper::LegalizerHelper(MachineFunction
&MF
, const LegalizerInfo
&LI
,
75 GISelChangeObserver
&Observer
,
77 : MIRBuilder(B
), MRI(MF
.getRegInfo()), LI(LI
), Observer(Observer
) {
79 MIRBuilder
.setChangeObserver(Observer
);
81 LegalizerHelper::LegalizeResult
82 LegalizerHelper::legalizeInstrStep(MachineInstr
&MI
) {
83 LLVM_DEBUG(dbgs() << "Legalizing: "; MI
.print(dbgs()));
85 if (MI
.getOpcode() == TargetOpcode::G_INTRINSIC
||
86 MI
.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
)
87 return LI
.legalizeIntrinsic(MI
, MRI
, MIRBuilder
) ? Legalized
89 auto Step
= LI
.getAction(MI
, MRI
);
90 switch (Step
.Action
) {
92 LLVM_DEBUG(dbgs() << ".. Already legal\n");
95 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
98 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
99 return narrowScalar(MI
, Step
.TypeIdx
, Step
.NewType
);
101 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
102 return widenScalar(MI
, Step
.TypeIdx
, Step
.NewType
);
104 LLVM_DEBUG(dbgs() << ".. Lower\n");
105 return lower(MI
, Step
.TypeIdx
, Step
.NewType
);
107 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
108 return fewerElementsVector(MI
, Step
.TypeIdx
, Step
.NewType
);
110 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
111 return moreElementsVector(MI
, Step
.TypeIdx
, Step
.NewType
);
113 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
114 return LI
.legalizeCustom(MI
, MRI
, MIRBuilder
, Observer
) ? Legalized
117 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
118 return UnableToLegalize
;
122 void LegalizerHelper::extractParts(Register Reg
, LLT Ty
, int NumParts
,
123 SmallVectorImpl
<Register
> &VRegs
) {
124 for (int i
= 0; i
< NumParts
; ++i
)
125 VRegs
.push_back(MRI
.createGenericVirtualRegister(Ty
));
126 MIRBuilder
.buildUnmerge(VRegs
, Reg
);
129 bool LegalizerHelper::extractParts(Register Reg
, LLT RegTy
,
130 LLT MainTy
, LLT
&LeftoverTy
,
131 SmallVectorImpl
<Register
> &VRegs
,
132 SmallVectorImpl
<Register
> &LeftoverRegs
) {
133 assert(!LeftoverTy
.isValid() && "this is an out argument");
135 unsigned RegSize
= RegTy
.getSizeInBits();
136 unsigned MainSize
= MainTy
.getSizeInBits();
137 unsigned NumParts
= RegSize
/ MainSize
;
138 unsigned LeftoverSize
= RegSize
- NumParts
* MainSize
;
140 // Use an unmerge when possible.
141 if (LeftoverSize
== 0) {
142 for (unsigned I
= 0; I
< NumParts
; ++I
)
143 VRegs
.push_back(MRI
.createGenericVirtualRegister(MainTy
));
144 MIRBuilder
.buildUnmerge(VRegs
, Reg
);
148 if (MainTy
.isVector()) {
149 unsigned EltSize
= MainTy
.getScalarSizeInBits();
150 if (LeftoverSize
% EltSize
!= 0)
152 LeftoverTy
= LLT::scalarOrVector(LeftoverSize
/ EltSize
, EltSize
);
154 LeftoverTy
= LLT::scalar(LeftoverSize
);
157 // For irregular sizes, extract the individual parts.
158 for (unsigned I
= 0; I
!= NumParts
; ++I
) {
159 Register NewReg
= MRI
.createGenericVirtualRegister(MainTy
);
160 VRegs
.push_back(NewReg
);
161 MIRBuilder
.buildExtract(NewReg
, Reg
, MainSize
* I
);
164 for (unsigned Offset
= MainSize
* NumParts
; Offset
< RegSize
;
165 Offset
+= LeftoverSize
) {
166 Register NewReg
= MRI
.createGenericVirtualRegister(LeftoverTy
);
167 LeftoverRegs
.push_back(NewReg
);
168 MIRBuilder
.buildExtract(NewReg
, Reg
, Offset
);
174 static LLT
getGCDType(LLT OrigTy
, LLT TargetTy
) {
175 if (OrigTy
.isVector() && TargetTy
.isVector()) {
176 assert(OrigTy
.getElementType() == TargetTy
.getElementType());
177 int GCD
= greatestCommonDivisor(OrigTy
.getNumElements(),
178 TargetTy
.getNumElements());
179 return LLT::scalarOrVector(GCD
, OrigTy
.getElementType());
182 if (OrigTy
.isVector() && !TargetTy
.isVector()) {
183 assert(OrigTy
.getElementType() == TargetTy
);
187 assert(!OrigTy
.isVector() && !TargetTy
.isVector());
189 int GCD
= greatestCommonDivisor(OrigTy
.getSizeInBits(),
190 TargetTy
.getSizeInBits());
191 return LLT::scalar(GCD
);
194 void LegalizerHelper::insertParts(Register DstReg
,
195 LLT ResultTy
, LLT PartTy
,
196 ArrayRef
<Register
> PartRegs
,
198 ArrayRef
<Register
> LeftoverRegs
) {
199 if (!LeftoverTy
.isValid()) {
200 assert(LeftoverRegs
.empty());
202 if (!ResultTy
.isVector()) {
203 MIRBuilder
.buildMerge(DstReg
, PartRegs
);
207 if (PartTy
.isVector())
208 MIRBuilder
.buildConcatVectors(DstReg
, PartRegs
);
210 MIRBuilder
.buildBuildVector(DstReg
, PartRegs
);
214 unsigned PartSize
= PartTy
.getSizeInBits();
215 unsigned LeftoverPartSize
= LeftoverTy
.getSizeInBits();
217 Register CurResultReg
= MRI
.createGenericVirtualRegister(ResultTy
);
218 MIRBuilder
.buildUndef(CurResultReg
);
221 for (Register PartReg
: PartRegs
) {
222 Register NewResultReg
= MRI
.createGenericVirtualRegister(ResultTy
);
223 MIRBuilder
.buildInsert(NewResultReg
, CurResultReg
, PartReg
, Offset
);
224 CurResultReg
= NewResultReg
;
228 for (unsigned I
= 0, E
= LeftoverRegs
.size(); I
!= E
; ++I
) {
229 // Use the original output register for the final insert to avoid a copy.
230 Register NewResultReg
= (I
+ 1 == E
) ?
231 DstReg
: MRI
.createGenericVirtualRegister(ResultTy
);
233 MIRBuilder
.buildInsert(NewResultReg
, CurResultReg
, LeftoverRegs
[I
], Offset
);
234 CurResultReg
= NewResultReg
;
235 Offset
+= LeftoverPartSize
;
239 static RTLIB::Libcall
getRTLibDesc(unsigned Opcode
, unsigned Size
) {
241 case TargetOpcode::G_SDIV
:
242 assert((Size
== 32 || Size
== 64) && "Unsupported size");
243 return Size
== 64 ? RTLIB::SDIV_I64
: RTLIB::SDIV_I32
;
244 case TargetOpcode::G_UDIV
:
245 assert((Size
== 32 || Size
== 64) && "Unsupported size");
246 return Size
== 64 ? RTLIB::UDIV_I64
: RTLIB::UDIV_I32
;
247 case TargetOpcode::G_SREM
:
248 assert((Size
== 32 || Size
== 64) && "Unsupported size");
249 return Size
== 64 ? RTLIB::SREM_I64
: RTLIB::SREM_I32
;
250 case TargetOpcode::G_UREM
:
251 assert((Size
== 32 || Size
== 64) && "Unsupported size");
252 return Size
== 64 ? RTLIB::UREM_I64
: RTLIB::UREM_I32
;
253 case TargetOpcode::G_CTLZ_ZERO_UNDEF
:
254 assert(Size
== 32 && "Unsupported size");
255 return RTLIB::CTLZ_I32
;
256 case TargetOpcode::G_FADD
:
257 assert((Size
== 32 || Size
== 64) && "Unsupported size");
258 return Size
== 64 ? RTLIB::ADD_F64
: RTLIB::ADD_F32
;
259 case TargetOpcode::G_FSUB
:
260 assert((Size
== 32 || Size
== 64) && "Unsupported size");
261 return Size
== 64 ? RTLIB::SUB_F64
: RTLIB::SUB_F32
;
262 case TargetOpcode::G_FMUL
:
263 assert((Size
== 32 || Size
== 64) && "Unsupported size");
264 return Size
== 64 ? RTLIB::MUL_F64
: RTLIB::MUL_F32
;
265 case TargetOpcode::G_FDIV
:
266 assert((Size
== 32 || Size
== 64) && "Unsupported size");
267 return Size
== 64 ? RTLIB::DIV_F64
: RTLIB::DIV_F32
;
268 case TargetOpcode::G_FEXP
:
269 assert((Size
== 32 || Size
== 64) && "Unsupported size");
270 return Size
== 64 ? RTLIB::EXP_F64
: RTLIB::EXP_F32
;
271 case TargetOpcode::G_FEXP2
:
272 assert((Size
== 32 || Size
== 64) && "Unsupported size");
273 return Size
== 64 ? RTLIB::EXP2_F64
: RTLIB::EXP2_F32
;
274 case TargetOpcode::G_FREM
:
275 return Size
== 64 ? RTLIB::REM_F64
: RTLIB::REM_F32
;
276 case TargetOpcode::G_FPOW
:
277 return Size
== 64 ? RTLIB::POW_F64
: RTLIB::POW_F32
;
278 case TargetOpcode::G_FMA
:
279 assert((Size
== 32 || Size
== 64) && "Unsupported size");
280 return Size
== 64 ? RTLIB::FMA_F64
: RTLIB::FMA_F32
;
281 case TargetOpcode::G_FSIN
:
282 assert((Size
== 32 || Size
== 64 || Size
== 128) && "Unsupported size");
283 return Size
== 128 ? RTLIB::SIN_F128
284 : Size
== 64 ? RTLIB::SIN_F64
: RTLIB::SIN_F32
;
285 case TargetOpcode::G_FCOS
:
286 assert((Size
== 32 || Size
== 64 || Size
== 128) && "Unsupported size");
287 return Size
== 128 ? RTLIB::COS_F128
288 : Size
== 64 ? RTLIB::COS_F64
: RTLIB::COS_F32
;
289 case TargetOpcode::G_FLOG10
:
290 assert((Size
== 32 || Size
== 64 || Size
== 128) && "Unsupported size");
291 return Size
== 128 ? RTLIB::LOG10_F128
292 : Size
== 64 ? RTLIB::LOG10_F64
: RTLIB::LOG10_F32
;
293 case TargetOpcode::G_FLOG
:
294 assert((Size
== 32 || Size
== 64 || Size
== 128) && "Unsupported size");
295 return Size
== 128 ? RTLIB::LOG_F128
296 : Size
== 64 ? RTLIB::LOG_F64
: RTLIB::LOG_F32
;
297 case TargetOpcode::G_FLOG2
:
298 assert((Size
== 32 || Size
== 64 || Size
== 128) && "Unsupported size");
299 return Size
== 128 ? RTLIB::LOG2_F128
300 : Size
== 64 ? RTLIB::LOG2_F64
: RTLIB::LOG2_F32
;
301 case TargetOpcode::G_FCEIL
:
302 assert((Size
== 32 || Size
== 64) && "Unsupported size");
303 return Size
== 64 ? RTLIB::CEIL_F64
: RTLIB::CEIL_F32
;
304 case TargetOpcode::G_FFLOOR
:
305 assert((Size
== 32 || Size
== 64) && "Unsupported size");
306 return Size
== 64 ? RTLIB::FLOOR_F64
: RTLIB::FLOOR_F32
;
308 llvm_unreachable("Unknown libcall function");
311 LegalizerHelper::LegalizeResult
312 llvm::createLibcall(MachineIRBuilder
&MIRBuilder
, RTLIB::Libcall Libcall
,
313 const CallLowering::ArgInfo
&Result
,
314 ArrayRef
<CallLowering::ArgInfo
> Args
) {
315 auto &CLI
= *MIRBuilder
.getMF().getSubtarget().getCallLowering();
316 auto &TLI
= *MIRBuilder
.getMF().getSubtarget().getTargetLowering();
317 const char *Name
= TLI
.getLibcallName(Libcall
);
319 MIRBuilder
.getMF().getFrameInfo().setHasCalls(true);
321 CallLowering::CallLoweringInfo Info
;
322 Info
.CallConv
= TLI
.getLibcallCallingConv(Libcall
);
323 Info
.Callee
= MachineOperand::CreateES(Name
);
324 Info
.OrigRet
= Result
;
325 std::copy(Args
.begin(), Args
.end(), std::back_inserter(Info
.OrigArgs
));
326 if (!CLI
.lowerCall(MIRBuilder
, Info
))
327 return LegalizerHelper::UnableToLegalize
;
329 return LegalizerHelper::Legalized
;
332 // Useful for libcalls where all operands have the same type.
333 static LegalizerHelper::LegalizeResult
334 simpleLibcall(MachineInstr
&MI
, MachineIRBuilder
&MIRBuilder
, unsigned Size
,
336 auto Libcall
= getRTLibDesc(MI
.getOpcode(), Size
);
338 SmallVector
<CallLowering::ArgInfo
, 3> Args
;
339 for (unsigned i
= 1; i
< MI
.getNumOperands(); i
++)
340 Args
.push_back({MI
.getOperand(i
).getReg(), OpType
});
341 return createLibcall(MIRBuilder
, Libcall
, {MI
.getOperand(0).getReg(), OpType
},
345 LegalizerHelper::LegalizeResult
346 llvm::createMemLibcall(MachineIRBuilder
&MIRBuilder
, MachineRegisterInfo
&MRI
,
348 assert(MI
.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
);
349 auto &Ctx
= MIRBuilder
.getMF().getFunction().getContext();
351 SmallVector
<CallLowering::ArgInfo
, 3> Args
;
352 for (unsigned i
= 1; i
< MI
.getNumOperands(); i
++) {
353 Register Reg
= MI
.getOperand(i
).getReg();
355 // Need derive an IR type for call lowering.
356 LLT OpLLT
= MRI
.getType(Reg
);
357 Type
*OpTy
= nullptr;
358 if (OpLLT
.isPointer())
359 OpTy
= Type::getInt8PtrTy(Ctx
, OpLLT
.getAddressSpace());
361 OpTy
= IntegerType::get(Ctx
, OpLLT
.getSizeInBits());
362 Args
.push_back({Reg
, OpTy
});
365 auto &CLI
= *MIRBuilder
.getMF().getSubtarget().getCallLowering();
366 auto &TLI
= *MIRBuilder
.getMF().getSubtarget().getTargetLowering();
367 Intrinsic::ID ID
= MI
.getOperand(0).getIntrinsicID();
368 RTLIB::Libcall RTLibcall
;
370 case Intrinsic::memcpy
:
371 RTLibcall
= RTLIB::MEMCPY
;
373 case Intrinsic::memset
:
374 RTLibcall
= RTLIB::MEMSET
;
376 case Intrinsic::memmove
:
377 RTLibcall
= RTLIB::MEMMOVE
;
380 return LegalizerHelper::UnableToLegalize
;
382 const char *Name
= TLI
.getLibcallName(RTLibcall
);
384 MIRBuilder
.setInstr(MI
);
385 MIRBuilder
.getMF().getFrameInfo().setHasCalls(true);
387 CallLowering::CallLoweringInfo Info
;
388 Info
.CallConv
= TLI
.getLibcallCallingConv(RTLibcall
);
389 Info
.Callee
= MachineOperand::CreateES(Name
);
390 Info
.OrigRet
= CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx
));
391 std::copy(Args
.begin(), Args
.end(), std::back_inserter(Info
.OrigArgs
));
392 if (!CLI
.lowerCall(MIRBuilder
, Info
))
393 return LegalizerHelper::UnableToLegalize
;
395 return LegalizerHelper::Legalized
;
398 static RTLIB::Libcall
getConvRTLibDesc(unsigned Opcode
, Type
*ToType
,
400 auto ToMVT
= MVT::getVT(ToType
);
401 auto FromMVT
= MVT::getVT(FromType
);
404 case TargetOpcode::G_FPEXT
:
405 return RTLIB::getFPEXT(FromMVT
, ToMVT
);
406 case TargetOpcode::G_FPTRUNC
:
407 return RTLIB::getFPROUND(FromMVT
, ToMVT
);
408 case TargetOpcode::G_FPTOSI
:
409 return RTLIB::getFPTOSINT(FromMVT
, ToMVT
);
410 case TargetOpcode::G_FPTOUI
:
411 return RTLIB::getFPTOUINT(FromMVT
, ToMVT
);
412 case TargetOpcode::G_SITOFP
:
413 return RTLIB::getSINTTOFP(FromMVT
, ToMVT
);
414 case TargetOpcode::G_UITOFP
:
415 return RTLIB::getUINTTOFP(FromMVT
, ToMVT
);
417 llvm_unreachable("Unsupported libcall function");
420 static LegalizerHelper::LegalizeResult
421 conversionLibcall(MachineInstr
&MI
, MachineIRBuilder
&MIRBuilder
, Type
*ToType
,
423 RTLIB::Libcall Libcall
= getConvRTLibDesc(MI
.getOpcode(), ToType
, FromType
);
424 return createLibcall(MIRBuilder
, Libcall
, {MI
.getOperand(0).getReg(), ToType
},
425 {{MI
.getOperand(1).getReg(), FromType
}});
428 LegalizerHelper::LegalizeResult
429 LegalizerHelper::libcall(MachineInstr
&MI
) {
430 LLT LLTy
= MRI
.getType(MI
.getOperand(0).getReg());
431 unsigned Size
= LLTy
.getSizeInBits();
432 auto &Ctx
= MIRBuilder
.getMF().getFunction().getContext();
434 MIRBuilder
.setInstr(MI
);
436 switch (MI
.getOpcode()) {
438 return UnableToLegalize
;
439 case TargetOpcode::G_SDIV
:
440 case TargetOpcode::G_UDIV
:
441 case TargetOpcode::G_SREM
:
442 case TargetOpcode::G_UREM
:
443 case TargetOpcode::G_CTLZ_ZERO_UNDEF
: {
444 Type
*HLTy
= IntegerType::get(Ctx
, Size
);
445 auto Status
= simpleLibcall(MI
, MIRBuilder
, Size
, HLTy
);
446 if (Status
!= Legalized
)
450 case TargetOpcode::G_FADD
:
451 case TargetOpcode::G_FSUB
:
452 case TargetOpcode::G_FMUL
:
453 case TargetOpcode::G_FDIV
:
454 case TargetOpcode::G_FMA
:
455 case TargetOpcode::G_FPOW
:
456 case TargetOpcode::G_FREM
:
457 case TargetOpcode::G_FCOS
:
458 case TargetOpcode::G_FSIN
:
459 case TargetOpcode::G_FLOG10
:
460 case TargetOpcode::G_FLOG
:
461 case TargetOpcode::G_FLOG2
:
462 case TargetOpcode::G_FEXP
:
463 case TargetOpcode::G_FEXP2
:
464 case TargetOpcode::G_FCEIL
:
465 case TargetOpcode::G_FFLOOR
: {
467 LLVM_DEBUG(dbgs() << "Size " << Size
<< " too large to legalize.\n");
468 return UnableToLegalize
;
470 Type
*HLTy
= Size
== 64 ? Type::getDoubleTy(Ctx
) : Type::getFloatTy(Ctx
);
471 auto Status
= simpleLibcall(MI
, MIRBuilder
, Size
, HLTy
);
472 if (Status
!= Legalized
)
476 case TargetOpcode::G_FPEXT
: {
477 // FIXME: Support other floating point types (half, fp128 etc)
478 unsigned FromSize
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
479 unsigned ToSize
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
480 if (ToSize
!= 64 || FromSize
!= 32)
481 return UnableToLegalize
;
482 LegalizeResult Status
= conversionLibcall(
483 MI
, MIRBuilder
, Type::getDoubleTy(Ctx
), Type::getFloatTy(Ctx
));
484 if (Status
!= Legalized
)
488 case TargetOpcode::G_FPTRUNC
: {
489 // FIXME: Support other floating point types (half, fp128 etc)
490 unsigned FromSize
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
491 unsigned ToSize
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
492 if (ToSize
!= 32 || FromSize
!= 64)
493 return UnableToLegalize
;
494 LegalizeResult Status
= conversionLibcall(
495 MI
, MIRBuilder
, Type::getFloatTy(Ctx
), Type::getDoubleTy(Ctx
));
496 if (Status
!= Legalized
)
500 case TargetOpcode::G_FPTOSI
:
501 case TargetOpcode::G_FPTOUI
: {
502 // FIXME: Support other types
503 unsigned FromSize
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
504 unsigned ToSize
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
505 if ((ToSize
!= 32 && ToSize
!= 64) || (FromSize
!= 32 && FromSize
!= 64))
506 return UnableToLegalize
;
507 LegalizeResult Status
= conversionLibcall(
509 ToSize
== 32 ? Type::getInt32Ty(Ctx
) : Type::getInt64Ty(Ctx
),
510 FromSize
== 64 ? Type::getDoubleTy(Ctx
) : Type::getFloatTy(Ctx
));
511 if (Status
!= Legalized
)
515 case TargetOpcode::G_SITOFP
:
516 case TargetOpcode::G_UITOFP
: {
517 // FIXME: Support other types
518 unsigned FromSize
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
519 unsigned ToSize
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
520 if ((FromSize
!= 32 && FromSize
!= 64) || (ToSize
!= 32 && ToSize
!= 64))
521 return UnableToLegalize
;
522 LegalizeResult Status
= conversionLibcall(
524 ToSize
== 64 ? Type::getDoubleTy(Ctx
) : Type::getFloatTy(Ctx
),
525 FromSize
== 32 ? Type::getInt32Ty(Ctx
) : Type::getInt64Ty(Ctx
));
526 if (Status
!= Legalized
)
532 MI
.eraseFromParent();
536 LegalizerHelper::LegalizeResult
LegalizerHelper::narrowScalar(MachineInstr
&MI
,
539 MIRBuilder
.setInstr(MI
);
541 uint64_t SizeOp0
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
542 uint64_t NarrowSize
= NarrowTy
.getSizeInBits();
544 switch (MI
.getOpcode()) {
546 return UnableToLegalize
;
547 case TargetOpcode::G_IMPLICIT_DEF
: {
548 // FIXME: add support for when SizeOp0 isn't an exact multiple of
550 if (SizeOp0
% NarrowSize
!= 0)
551 return UnableToLegalize
;
552 int NumParts
= SizeOp0
/ NarrowSize
;
554 SmallVector
<Register
, 2> DstRegs
;
555 for (int i
= 0; i
< NumParts
; ++i
)
557 MIRBuilder
.buildUndef(NarrowTy
)->getOperand(0).getReg());
559 Register DstReg
= MI
.getOperand(0).getReg();
560 if(MRI
.getType(DstReg
).isVector())
561 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
563 MIRBuilder
.buildMerge(DstReg
, DstRegs
);
564 MI
.eraseFromParent();
567 case TargetOpcode::G_CONSTANT
: {
568 LLT Ty
= MRI
.getType(MI
.getOperand(0).getReg());
569 const APInt
&Val
= MI
.getOperand(1).getCImm()->getValue();
570 unsigned TotalSize
= Ty
.getSizeInBits();
571 unsigned NarrowSize
= NarrowTy
.getSizeInBits();
572 int NumParts
= TotalSize
/ NarrowSize
;
574 SmallVector
<Register
, 4> PartRegs
;
575 for (int I
= 0; I
!= NumParts
; ++I
) {
576 unsigned Offset
= I
* NarrowSize
;
577 auto K
= MIRBuilder
.buildConstant(NarrowTy
,
578 Val
.lshr(Offset
).trunc(NarrowSize
));
579 PartRegs
.push_back(K
.getReg(0));
583 unsigned LeftoverBits
= TotalSize
- NumParts
* NarrowSize
;
584 SmallVector
<Register
, 1> LeftoverRegs
;
585 if (LeftoverBits
!= 0) {
586 LeftoverTy
= LLT::scalar(LeftoverBits
);
587 auto K
= MIRBuilder
.buildConstant(
589 Val
.lshr(NumParts
* NarrowSize
).trunc(LeftoverBits
));
590 LeftoverRegs
.push_back(K
.getReg(0));
593 insertParts(MI
.getOperand(0).getReg(),
594 Ty
, NarrowTy
, PartRegs
, LeftoverTy
, LeftoverRegs
);
596 MI
.eraseFromParent();
599 case TargetOpcode::G_SEXT
: {
601 return UnableToLegalize
;
603 if (NarrowTy
.getSizeInBits() != SizeOp0
/ 2) {
604 LLVM_DEBUG(dbgs() << "Can't narrow sext to type " << NarrowTy
<< "\n");
605 return UnableToLegalize
;
608 Register SrcReg
= MI
.getOperand(1).getReg();
610 // Shift the sign bit of the low register through the high register.
612 MIRBuilder
.buildConstant(LLT::scalar(64), NarrowTy
.getSizeInBits() - 1);
613 auto Shift
= MIRBuilder
.buildAShr(NarrowTy
, SrcReg
, ShiftAmt
);
614 MIRBuilder
.buildMerge(MI
.getOperand(0).getReg(), {SrcReg
, Shift
.getReg(0)});
615 MI
.eraseFromParent();
618 case TargetOpcode::G_ZEXT
: {
620 return UnableToLegalize
;
622 if (SizeOp0
% NarrowTy
.getSizeInBits() != 0)
623 return UnableToLegalize
;
625 // Generate a merge where the bottom bits are taken from the source, and
626 // zero everything else.
627 Register ZeroReg
= MIRBuilder
.buildConstant(NarrowTy
, 0).getReg(0);
628 unsigned NumParts
= SizeOp0
/ NarrowTy
.getSizeInBits();
629 SmallVector
<Register
, 4> Srcs
= {MI
.getOperand(1).getReg()};
630 for (unsigned Part
= 1; Part
< NumParts
; ++Part
)
631 Srcs
.push_back(ZeroReg
);
632 MIRBuilder
.buildMerge(MI
.getOperand(0).getReg(), Srcs
);
633 MI
.eraseFromParent();
636 case TargetOpcode::G_TRUNC
: {
638 return UnableToLegalize
;
640 uint64_t SizeOp1
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
641 if (NarrowTy
.getSizeInBits() * 2 != SizeOp1
) {
642 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy
<< "\n");
643 return UnableToLegalize
;
646 auto Unmerge
= MIRBuilder
.buildUnmerge(NarrowTy
, MI
.getOperand(1).getReg());
647 MIRBuilder
.buildCopy(MI
.getOperand(0).getReg(), Unmerge
.getReg(0));
648 MI
.eraseFromParent();
652 case TargetOpcode::G_ADD
: {
653 // FIXME: add support for when SizeOp0 isn't an exact multiple of
655 if (SizeOp0
% NarrowSize
!= 0)
656 return UnableToLegalize
;
657 // Expand in terms of carry-setting/consuming G_ADDE instructions.
658 int NumParts
= SizeOp0
/ NarrowTy
.getSizeInBits();
660 SmallVector
<Register
, 2> Src1Regs
, Src2Regs
, DstRegs
;
661 extractParts(MI
.getOperand(1).getReg(), NarrowTy
, NumParts
, Src1Regs
);
662 extractParts(MI
.getOperand(2).getReg(), NarrowTy
, NumParts
, Src2Regs
);
664 Register CarryIn
= MRI
.createGenericVirtualRegister(LLT::scalar(1));
665 MIRBuilder
.buildConstant(CarryIn
, 0);
667 for (int i
= 0; i
< NumParts
; ++i
) {
668 Register DstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
669 Register CarryOut
= MRI
.createGenericVirtualRegister(LLT::scalar(1));
671 MIRBuilder
.buildUAdde(DstReg
, CarryOut
, Src1Regs
[i
],
672 Src2Regs
[i
], CarryIn
);
674 DstRegs
.push_back(DstReg
);
677 Register DstReg
= MI
.getOperand(0).getReg();
678 if(MRI
.getType(DstReg
).isVector())
679 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
681 MIRBuilder
.buildMerge(DstReg
, DstRegs
);
682 MI
.eraseFromParent();
685 case TargetOpcode::G_SUB
: {
686 // FIXME: add support for when SizeOp0 isn't an exact multiple of
688 if (SizeOp0
% NarrowSize
!= 0)
689 return UnableToLegalize
;
691 int NumParts
= SizeOp0
/ NarrowTy
.getSizeInBits();
693 SmallVector
<Register
, 2> Src1Regs
, Src2Regs
, DstRegs
;
694 extractParts(MI
.getOperand(1).getReg(), NarrowTy
, NumParts
, Src1Regs
);
695 extractParts(MI
.getOperand(2).getReg(), NarrowTy
, NumParts
, Src2Regs
);
697 Register DstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
698 Register BorrowOut
= MRI
.createGenericVirtualRegister(LLT::scalar(1));
699 MIRBuilder
.buildInstr(TargetOpcode::G_USUBO
, {DstReg
, BorrowOut
},
700 {Src1Regs
[0], Src2Regs
[0]});
701 DstRegs
.push_back(DstReg
);
702 Register BorrowIn
= BorrowOut
;
703 for (int i
= 1; i
< NumParts
; ++i
) {
704 DstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
705 BorrowOut
= MRI
.createGenericVirtualRegister(LLT::scalar(1));
707 MIRBuilder
.buildInstr(TargetOpcode::G_USUBE
, {DstReg
, BorrowOut
},
708 {Src1Regs
[i
], Src2Regs
[i
], BorrowIn
});
710 DstRegs
.push_back(DstReg
);
711 BorrowIn
= BorrowOut
;
713 MIRBuilder
.buildMerge(MI
.getOperand(0).getReg(), DstRegs
);
714 MI
.eraseFromParent();
717 case TargetOpcode::G_MUL
:
718 case TargetOpcode::G_UMULH
:
719 return narrowScalarMul(MI
, NarrowTy
);
720 case TargetOpcode::G_EXTRACT
:
721 return narrowScalarExtract(MI
, TypeIdx
, NarrowTy
);
722 case TargetOpcode::G_INSERT
:
723 return narrowScalarInsert(MI
, TypeIdx
, NarrowTy
);
724 case TargetOpcode::G_LOAD
: {
725 const auto &MMO
= **MI
.memoperands_begin();
726 Register DstReg
= MI
.getOperand(0).getReg();
727 LLT DstTy
= MRI
.getType(DstReg
);
728 if (DstTy
.isVector())
729 return UnableToLegalize
;
731 if (8 * MMO
.getSize() != DstTy
.getSizeInBits()) {
732 Register TmpReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
733 auto &MMO
= **MI
.memoperands_begin();
734 MIRBuilder
.buildLoad(TmpReg
, MI
.getOperand(1).getReg(), MMO
);
735 MIRBuilder
.buildAnyExt(DstReg
, TmpReg
);
736 MI
.eraseFromParent();
740 return reduceLoadStoreWidth(MI
, TypeIdx
, NarrowTy
);
742 case TargetOpcode::G_ZEXTLOAD
:
743 case TargetOpcode::G_SEXTLOAD
: {
744 bool ZExt
= MI
.getOpcode() == TargetOpcode::G_ZEXTLOAD
;
745 Register DstReg
= MI
.getOperand(0).getReg();
746 Register PtrReg
= MI
.getOperand(1).getReg();
748 Register TmpReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
749 auto &MMO
= **MI
.memoperands_begin();
750 if (MMO
.getSizeInBits() == NarrowSize
) {
751 MIRBuilder
.buildLoad(TmpReg
, PtrReg
, MMO
);
753 unsigned ExtLoad
= ZExt
? TargetOpcode::G_ZEXTLOAD
754 : TargetOpcode::G_SEXTLOAD
;
755 MIRBuilder
.buildInstr(ExtLoad
)
758 .addMemOperand(&MMO
);
762 MIRBuilder
.buildZExt(DstReg
, TmpReg
);
764 MIRBuilder
.buildSExt(DstReg
, TmpReg
);
766 MI
.eraseFromParent();
769 case TargetOpcode::G_STORE
: {
770 const auto &MMO
= **MI
.memoperands_begin();
772 Register SrcReg
= MI
.getOperand(0).getReg();
773 LLT SrcTy
= MRI
.getType(SrcReg
);
774 if (SrcTy
.isVector())
775 return UnableToLegalize
;
777 int NumParts
= SizeOp0
/ NarrowSize
;
778 unsigned HandledSize
= NumParts
* NarrowTy
.getSizeInBits();
779 unsigned LeftoverBits
= SrcTy
.getSizeInBits() - HandledSize
;
780 if (SrcTy
.isVector() && LeftoverBits
!= 0)
781 return UnableToLegalize
;
783 if (8 * MMO
.getSize() != SrcTy
.getSizeInBits()) {
784 Register TmpReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
785 auto &MMO
= **MI
.memoperands_begin();
786 MIRBuilder
.buildTrunc(TmpReg
, SrcReg
);
787 MIRBuilder
.buildStore(TmpReg
, MI
.getOperand(1).getReg(), MMO
);
788 MI
.eraseFromParent();
792 return reduceLoadStoreWidth(MI
, 0, NarrowTy
);
794 case TargetOpcode::G_SELECT
:
795 return narrowScalarSelect(MI
, TypeIdx
, NarrowTy
);
796 case TargetOpcode::G_AND
:
797 case TargetOpcode::G_OR
:
798 case TargetOpcode::G_XOR
: {
799 // Legalize bitwise operation:
800 // A = BinOp<Ty> B, C
802 // B1, ..., BN = G_UNMERGE_VALUES B
803 // C1, ..., CN = G_UNMERGE_VALUES C
804 // A1 = BinOp<Ty/N> B1, C2
806 // AN = BinOp<Ty/N> BN, CN
807 // A = G_MERGE_VALUES A1, ..., AN
808 return narrowScalarBasic(MI
, TypeIdx
, NarrowTy
);
810 case TargetOpcode::G_SHL
:
811 case TargetOpcode::G_LSHR
:
812 case TargetOpcode::G_ASHR
:
813 return narrowScalarShift(MI
, TypeIdx
, NarrowTy
);
814 case TargetOpcode::G_CTLZ
:
815 case TargetOpcode::G_CTLZ_ZERO_UNDEF
:
816 case TargetOpcode::G_CTTZ
:
817 case TargetOpcode::G_CTTZ_ZERO_UNDEF
:
818 case TargetOpcode::G_CTPOP
:
820 return UnableToLegalize
; // TODO
822 Observer
.changingInstr(MI
);
823 narrowScalarDst(MI
, NarrowTy
, 0, TargetOpcode::G_ZEXT
);
824 Observer
.changedInstr(MI
);
826 case TargetOpcode::G_INTTOPTR
:
828 return UnableToLegalize
;
830 Observer
.changingInstr(MI
);
831 narrowScalarSrc(MI
, NarrowTy
, 1);
832 Observer
.changedInstr(MI
);
834 case TargetOpcode::G_PTRTOINT
:
836 return UnableToLegalize
;
838 Observer
.changingInstr(MI
);
839 narrowScalarDst(MI
, NarrowTy
, 0, TargetOpcode::G_ZEXT
);
840 Observer
.changedInstr(MI
);
842 case TargetOpcode::G_PHI
: {
843 unsigned NumParts
= SizeOp0
/ NarrowSize
;
844 SmallVector
<Register
, 2> DstRegs
;
845 SmallVector
<SmallVector
<Register
, 2>, 2> SrcRegs
;
846 DstRegs
.resize(NumParts
);
847 SrcRegs
.resize(MI
.getNumOperands() / 2);
848 Observer
.changingInstr(MI
);
849 for (unsigned i
= 1; i
< MI
.getNumOperands(); i
+= 2) {
850 MachineBasicBlock
&OpMBB
= *MI
.getOperand(i
+ 1).getMBB();
851 MIRBuilder
.setInsertPt(OpMBB
, OpMBB
.getFirstTerminator());
852 extractParts(MI
.getOperand(i
).getReg(), NarrowTy
, NumParts
,
855 MachineBasicBlock
&MBB
= *MI
.getParent();
856 MIRBuilder
.setInsertPt(MBB
, MI
);
857 for (unsigned i
= 0; i
< NumParts
; ++i
) {
858 DstRegs
[i
] = MRI
.createGenericVirtualRegister(NarrowTy
);
859 MachineInstrBuilder MIB
=
860 MIRBuilder
.buildInstr(TargetOpcode::G_PHI
).addDef(DstRegs
[i
]);
861 for (unsigned j
= 1; j
< MI
.getNumOperands(); j
+= 2)
862 MIB
.addUse(SrcRegs
[j
/ 2][i
]).add(MI
.getOperand(j
+ 1));
864 MIRBuilder
.setInsertPt(MBB
, --MBB
.getFirstNonPHI());
865 MIRBuilder
.buildMerge(MI
.getOperand(0).getReg(), DstRegs
);
866 Observer
.changedInstr(MI
);
867 MI
.eraseFromParent();
870 case TargetOpcode::G_EXTRACT_VECTOR_ELT
:
871 case TargetOpcode::G_INSERT_VECTOR_ELT
: {
873 return UnableToLegalize
;
875 int OpIdx
= MI
.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT
? 2 : 3;
876 Observer
.changingInstr(MI
);
877 narrowScalarSrc(MI
, NarrowTy
, OpIdx
);
878 Observer
.changedInstr(MI
);
881 case TargetOpcode::G_ICMP
: {
882 uint64_t SrcSize
= MRI
.getType(MI
.getOperand(2).getReg()).getSizeInBits();
883 if (NarrowSize
* 2 != SrcSize
)
884 return UnableToLegalize
;
886 Observer
.changingInstr(MI
);
887 Register LHSL
= MRI
.createGenericVirtualRegister(NarrowTy
);
888 Register LHSH
= MRI
.createGenericVirtualRegister(NarrowTy
);
889 MIRBuilder
.buildUnmerge({LHSL
, LHSH
}, MI
.getOperand(2).getReg());
891 Register RHSL
= MRI
.createGenericVirtualRegister(NarrowTy
);
892 Register RHSH
= MRI
.createGenericVirtualRegister(NarrowTy
);
893 MIRBuilder
.buildUnmerge({RHSL
, RHSH
}, MI
.getOperand(3).getReg());
895 CmpInst::Predicate Pred
=
896 static_cast<CmpInst::Predicate
>(MI
.getOperand(1).getPredicate());
897 LLT ResTy
= MRI
.getType(MI
.getOperand(0).getReg());
899 if (Pred
== CmpInst::ICMP_EQ
|| Pred
== CmpInst::ICMP_NE
) {
900 MachineInstrBuilder XorL
= MIRBuilder
.buildXor(NarrowTy
, LHSL
, RHSL
);
901 MachineInstrBuilder XorH
= MIRBuilder
.buildXor(NarrowTy
, LHSH
, RHSH
);
902 MachineInstrBuilder Or
= MIRBuilder
.buildOr(NarrowTy
, XorL
, XorH
);
903 MachineInstrBuilder Zero
= MIRBuilder
.buildConstant(NarrowTy
, 0);
904 MIRBuilder
.buildICmp(Pred
, MI
.getOperand(0).getReg(), Or
, Zero
);
906 MachineInstrBuilder CmpH
= MIRBuilder
.buildICmp(Pred
, ResTy
, LHSH
, RHSH
);
907 MachineInstrBuilder CmpHEQ
=
908 MIRBuilder
.buildICmp(CmpInst::Predicate::ICMP_EQ
, ResTy
, LHSH
, RHSH
);
909 MachineInstrBuilder CmpLU
= MIRBuilder
.buildICmp(
910 ICmpInst::getUnsignedPredicate(Pred
), ResTy
, LHSL
, RHSL
);
911 MIRBuilder
.buildSelect(MI
.getOperand(0).getReg(), CmpHEQ
, CmpLU
, CmpH
);
913 Observer
.changedInstr(MI
);
914 MI
.eraseFromParent();
917 case TargetOpcode::G_SEXT_INREG
: {
919 return UnableToLegalize
;
921 if (!MI
.getOperand(2).isImm())
922 return UnableToLegalize
;
923 int64_t SizeInBits
= MI
.getOperand(2).getImm();
925 // So long as the new type has more bits than the bits we're extending we
926 // don't need to break it apart.
927 if (NarrowTy
.getScalarSizeInBits() >= SizeInBits
) {
928 Observer
.changingInstr(MI
);
929 // We don't lose any non-extension bits by truncating the src and
930 // sign-extending the dst.
931 MachineOperand
&MO1
= MI
.getOperand(1);
932 auto TruncMIB
= MIRBuilder
.buildTrunc(NarrowTy
, MO1
.getReg());
933 MO1
.setReg(TruncMIB
->getOperand(0).getReg());
935 MachineOperand
&MO2
= MI
.getOperand(0);
936 Register DstExt
= MRI
.createGenericVirtualRegister(NarrowTy
);
937 MIRBuilder
.setInsertPt(MIRBuilder
.getMBB(), ++MIRBuilder
.getInsertPt());
938 MIRBuilder
.buildInstr(TargetOpcode::G_SEXT
, {MO2
.getReg()}, {DstExt
});
940 Observer
.changedInstr(MI
);
944 // Break it apart. Components below the extension point are unmodified. The
945 // component containing the extension point becomes a narrower SEXT_INREG.
946 // Components above it are ashr'd from the component containing the
948 if (SizeOp0
% NarrowSize
!= 0)
949 return UnableToLegalize
;
950 int NumParts
= SizeOp0
/ NarrowSize
;
952 // List the registers where the destination will be scattered.
953 SmallVector
<Register
, 2> DstRegs
;
954 // List the registers where the source will be split.
955 SmallVector
<Register
, 2> SrcRegs
;
957 // Create all the temporary registers.
958 for (int i
= 0; i
< NumParts
; ++i
) {
959 Register SrcReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
961 SrcRegs
.push_back(SrcReg
);
964 // Explode the big arguments into smaller chunks.
965 MIRBuilder
.buildUnmerge(SrcRegs
, MI
.getOperand(1).getReg());
967 Register AshrCstReg
=
968 MIRBuilder
.buildConstant(NarrowTy
, NarrowTy
.getScalarSizeInBits() - 1)
971 Register FullExtensionReg
= 0;
972 Register PartialExtensionReg
= 0;
974 // Do the operation on each small part.
975 for (int i
= 0; i
< NumParts
; ++i
) {
976 if ((i
+ 1) * NarrowTy
.getScalarSizeInBits() < SizeInBits
)
977 DstRegs
.push_back(SrcRegs
[i
]);
978 else if (i
* NarrowTy
.getScalarSizeInBits() > SizeInBits
) {
979 assert(PartialExtensionReg
&&
980 "Expected to visit partial extension before full");
981 if (FullExtensionReg
) {
982 DstRegs
.push_back(FullExtensionReg
);
985 DstRegs
.push_back(MIRBuilder
986 .buildInstr(TargetOpcode::G_ASHR
, {NarrowTy
},
987 {PartialExtensionReg
, AshrCstReg
})
990 FullExtensionReg
= DstRegs
.back();
995 TargetOpcode::G_SEXT_INREG
, {NarrowTy
},
996 {SrcRegs
[i
], SizeInBits
% NarrowTy
.getScalarSizeInBits()})
999 PartialExtensionReg
= DstRegs
.back();
1003 // Gather the destination registers into the final destination.
1004 Register DstReg
= MI
.getOperand(0).getReg();
1005 MIRBuilder
.buildMerge(DstReg
, DstRegs
);
1006 MI
.eraseFromParent();
1012 void LegalizerHelper::widenScalarSrc(MachineInstr
&MI
, LLT WideTy
,
1013 unsigned OpIdx
, unsigned ExtOpcode
) {
1014 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
1015 auto ExtB
= MIRBuilder
.buildInstr(ExtOpcode
, {WideTy
}, {MO
.getReg()});
1016 MO
.setReg(ExtB
->getOperand(0).getReg());
1019 void LegalizerHelper::narrowScalarSrc(MachineInstr
&MI
, LLT NarrowTy
,
1021 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
1022 auto ExtB
= MIRBuilder
.buildInstr(TargetOpcode::G_TRUNC
, {NarrowTy
},
1024 MO
.setReg(ExtB
->getOperand(0).getReg());
1027 void LegalizerHelper::widenScalarDst(MachineInstr
&MI
, LLT WideTy
,
1028 unsigned OpIdx
, unsigned TruncOpcode
) {
1029 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
1030 Register DstExt
= MRI
.createGenericVirtualRegister(WideTy
);
1031 MIRBuilder
.setInsertPt(MIRBuilder
.getMBB(), ++MIRBuilder
.getInsertPt());
1032 MIRBuilder
.buildInstr(TruncOpcode
, {MO
.getReg()}, {DstExt
});
1036 void LegalizerHelper::narrowScalarDst(MachineInstr
&MI
, LLT NarrowTy
,
1037 unsigned OpIdx
, unsigned ExtOpcode
) {
1038 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
1039 Register DstTrunc
= MRI
.createGenericVirtualRegister(NarrowTy
);
1040 MIRBuilder
.setInsertPt(MIRBuilder
.getMBB(), ++MIRBuilder
.getInsertPt());
1041 MIRBuilder
.buildInstr(ExtOpcode
, {MO
.getReg()}, {DstTrunc
});
1042 MO
.setReg(DstTrunc
);
1045 void LegalizerHelper::moreElementsVectorDst(MachineInstr
&MI
, LLT WideTy
,
1047 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
1048 Register DstExt
= MRI
.createGenericVirtualRegister(WideTy
);
1049 MIRBuilder
.setInsertPt(MIRBuilder
.getMBB(), ++MIRBuilder
.getInsertPt());
1050 MIRBuilder
.buildExtract(MO
.getReg(), DstExt
, 0);
1054 void LegalizerHelper::moreElementsVectorSrc(MachineInstr
&MI
, LLT MoreTy
,
1056 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
1058 LLT OldTy
= MRI
.getType(MO
.getReg());
1059 unsigned OldElts
= OldTy
.getNumElements();
1060 unsigned NewElts
= MoreTy
.getNumElements();
1062 unsigned NumParts
= NewElts
/ OldElts
;
1064 // Use concat_vectors if the result is a multiple of the number of elements.
1065 if (NumParts
* OldElts
== NewElts
) {
1066 SmallVector
<Register
, 8> Parts
;
1067 Parts
.push_back(MO
.getReg());
1069 Register ImpDef
= MIRBuilder
.buildUndef(OldTy
).getReg(0);
1070 for (unsigned I
= 1; I
!= NumParts
; ++I
)
1071 Parts
.push_back(ImpDef
);
1073 auto Concat
= MIRBuilder
.buildConcatVectors(MoreTy
, Parts
);
1074 MO
.setReg(Concat
.getReg(0));
1078 Register MoreReg
= MRI
.createGenericVirtualRegister(MoreTy
);
1079 Register ImpDef
= MIRBuilder
.buildUndef(MoreTy
).getReg(0);
1080 MIRBuilder
.buildInsert(MoreReg
, ImpDef
, MO
.getReg(), 0);
1084 LegalizerHelper::LegalizeResult
1085 LegalizerHelper::widenScalarMergeValues(MachineInstr
&MI
, unsigned TypeIdx
,
1088 return UnableToLegalize
;
1090 Register DstReg
= MI
.getOperand(0).getReg();
1091 LLT DstTy
= MRI
.getType(DstReg
);
1092 if (DstTy
.isVector())
1093 return UnableToLegalize
;
1095 Register Src1
= MI
.getOperand(1).getReg();
1096 LLT SrcTy
= MRI
.getType(Src1
);
1097 const int DstSize
= DstTy
.getSizeInBits();
1098 const int SrcSize
= SrcTy
.getSizeInBits();
1099 const int WideSize
= WideTy
.getSizeInBits();
1100 const int NumMerge
= (DstSize
+ WideSize
- 1) / WideSize
;
1102 unsigned NumOps
= MI
.getNumOperands();
1103 unsigned NumSrc
= MI
.getNumOperands() - 1;
1104 unsigned PartSize
= DstTy
.getSizeInBits() / NumSrc
;
1106 if (WideSize
>= DstSize
) {
1107 // Directly pack the bits in the target type.
1108 Register ResultReg
= MIRBuilder
.buildZExt(WideTy
, Src1
).getReg(0);
1110 for (unsigned I
= 2; I
!= NumOps
; ++I
) {
1111 const unsigned Offset
= (I
- 1) * PartSize
;
1113 Register SrcReg
= MI
.getOperand(I
).getReg();
1114 assert(MRI
.getType(SrcReg
) == LLT::scalar(PartSize
));
1116 auto ZextInput
= MIRBuilder
.buildZExt(WideTy
, SrcReg
);
1118 Register NextResult
= I
+ 1 == NumOps
&& WideTy
== DstTy
? DstReg
:
1119 MRI
.createGenericVirtualRegister(WideTy
);
1121 auto ShiftAmt
= MIRBuilder
.buildConstant(WideTy
, Offset
);
1122 auto Shl
= MIRBuilder
.buildShl(WideTy
, ZextInput
, ShiftAmt
);
1123 MIRBuilder
.buildOr(NextResult
, ResultReg
, Shl
);
1124 ResultReg
= NextResult
;
1127 if (WideSize
> DstSize
)
1128 MIRBuilder
.buildTrunc(DstReg
, ResultReg
);
1129 else if (DstTy
.isPointer())
1130 MIRBuilder
.buildIntToPtr(DstReg
, ResultReg
);
1132 MI
.eraseFromParent();
1136 // Unmerge the original values to the GCD type, and recombine to the next
1137 // multiple greater than the original type.
1139 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
1140 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
1141 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
1142 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
1143 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
1144 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1145 // %12:_(s12) = G_MERGE_VALUES %10, %11
1147 // Padding with undef if necessary:
1149 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1150 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1151 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1152 // %7:_(s2) = G_IMPLICIT_DEF
1153 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1154 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1155 // %10:_(s12) = G_MERGE_VALUES %8, %9
1157 const int GCD
= greatestCommonDivisor(SrcSize
, WideSize
);
1158 LLT GCDTy
= LLT::scalar(GCD
);
1160 SmallVector
<Register
, 8> Parts
;
1161 SmallVector
<Register
, 8> NewMergeRegs
;
1162 SmallVector
<Register
, 8> Unmerges
;
1163 LLT WideDstTy
= LLT::scalar(NumMerge
* WideSize
);
1165 // Decompose the original operands if they don't evenly divide.
1166 for (int I
= 1, E
= MI
.getNumOperands(); I
!= E
; ++I
) {
1167 Register SrcReg
= MI
.getOperand(I
).getReg();
1168 if (GCD
== SrcSize
) {
1169 Unmerges
.push_back(SrcReg
);
1171 auto Unmerge
= MIRBuilder
.buildUnmerge(GCDTy
, SrcReg
);
1172 for (int J
= 0, JE
= Unmerge
->getNumOperands() - 1; J
!= JE
; ++J
)
1173 Unmerges
.push_back(Unmerge
.getReg(J
));
1177 // Pad with undef to the next size that is a multiple of the requested size.
1178 if (static_cast<int>(Unmerges
.size()) != NumMerge
* WideSize
) {
1179 Register UndefReg
= MIRBuilder
.buildUndef(GCDTy
).getReg(0);
1180 for (int I
= Unmerges
.size(); I
!= NumMerge
* WideSize
; ++I
)
1181 Unmerges
.push_back(UndefReg
);
1184 const int PartsPerGCD
= WideSize
/ GCD
;
1186 // Build merges of each piece.
1187 ArrayRef
<Register
> Slicer(Unmerges
);
1188 for (int I
= 0; I
!= NumMerge
; ++I
, Slicer
= Slicer
.drop_front(PartsPerGCD
)) {
1189 auto Merge
= MIRBuilder
.buildMerge(WideTy
, Slicer
.take_front(PartsPerGCD
));
1190 NewMergeRegs
.push_back(Merge
.getReg(0));
1193 // A truncate may be necessary if the requested type doesn't evenly divide the
1194 // original result type.
1195 if (DstTy
.getSizeInBits() == WideDstTy
.getSizeInBits()) {
1196 MIRBuilder
.buildMerge(DstReg
, NewMergeRegs
);
1198 auto FinalMerge
= MIRBuilder
.buildMerge(WideDstTy
, NewMergeRegs
);
1199 MIRBuilder
.buildTrunc(DstReg
, FinalMerge
.getReg(0));
1202 MI
.eraseFromParent();
1206 LegalizerHelper::LegalizeResult
1207 LegalizerHelper::widenScalarUnmergeValues(MachineInstr
&MI
, unsigned TypeIdx
,
1210 return UnableToLegalize
;
1212 unsigned NumDst
= MI
.getNumOperands() - 1;
1213 Register SrcReg
= MI
.getOperand(NumDst
).getReg();
1214 LLT SrcTy
= MRI
.getType(SrcReg
);
1215 if (!SrcTy
.isScalar())
1216 return UnableToLegalize
;
1218 Register Dst0Reg
= MI
.getOperand(0).getReg();
1219 LLT DstTy
= MRI
.getType(Dst0Reg
);
1220 if (!DstTy
.isScalar())
1221 return UnableToLegalize
;
1223 unsigned NewSrcSize
= NumDst
* WideTy
.getSizeInBits();
1224 LLT NewSrcTy
= LLT::scalar(NewSrcSize
);
1225 unsigned SizeDiff
= WideTy
.getSizeInBits() - DstTy
.getSizeInBits();
1227 auto WideSrc
= MIRBuilder
.buildZExt(NewSrcTy
, SrcReg
);
1229 for (unsigned I
= 1; I
!= NumDst
; ++I
) {
1230 auto ShiftAmt
= MIRBuilder
.buildConstant(NewSrcTy
, SizeDiff
* I
);
1231 auto Shl
= MIRBuilder
.buildShl(NewSrcTy
, WideSrc
, ShiftAmt
);
1232 WideSrc
= MIRBuilder
.buildOr(NewSrcTy
, WideSrc
, Shl
);
1235 Observer
.changingInstr(MI
);
1237 MI
.getOperand(NumDst
).setReg(WideSrc
->getOperand(0).getReg());
1238 for (unsigned I
= 0; I
!= NumDst
; ++I
)
1239 widenScalarDst(MI
, WideTy
, I
);
1241 Observer
.changedInstr(MI
);
1246 LegalizerHelper::LegalizeResult
1247 LegalizerHelper::widenScalarExtract(MachineInstr
&MI
, unsigned TypeIdx
,
1249 Register DstReg
= MI
.getOperand(0).getReg();
1250 Register SrcReg
= MI
.getOperand(1).getReg();
1251 LLT SrcTy
= MRI
.getType(SrcReg
);
1253 LLT DstTy
= MRI
.getType(DstReg
);
1254 unsigned Offset
= MI
.getOperand(2).getImm();
1257 if (SrcTy
.isVector() || DstTy
.isVector())
1258 return UnableToLegalize
;
1261 if (SrcTy
.isPointer()) {
1262 // Extracts from pointers can be handled only if they are really just
1264 const DataLayout
&DL
= MIRBuilder
.getDataLayout();
1265 if (DL
.isNonIntegralAddressSpace(SrcTy
.getAddressSpace()))
1266 return UnableToLegalize
;
1268 LLT SrcAsIntTy
= LLT::scalar(SrcTy
.getSizeInBits());
1269 Src
= MIRBuilder
.buildPtrToInt(SrcAsIntTy
, Src
);
1273 if (DstTy
.isPointer())
1274 return UnableToLegalize
;
1277 // Avoid a shift in the degenerate case.
1278 MIRBuilder
.buildTrunc(DstReg
,
1279 MIRBuilder
.buildAnyExtOrTrunc(WideTy
, Src
));
1280 MI
.eraseFromParent();
1284 // Do a shift in the source type.
1285 LLT ShiftTy
= SrcTy
;
1286 if (WideTy
.getSizeInBits() > SrcTy
.getSizeInBits()) {
1287 Src
= MIRBuilder
.buildAnyExt(WideTy
, Src
);
1289 } else if (WideTy
.getSizeInBits() > SrcTy
.getSizeInBits())
1290 return UnableToLegalize
;
1292 auto LShr
= MIRBuilder
.buildLShr(
1293 ShiftTy
, Src
, MIRBuilder
.buildConstant(ShiftTy
, Offset
));
1294 MIRBuilder
.buildTrunc(DstReg
, LShr
);
1295 MI
.eraseFromParent();
1299 if (SrcTy
.isScalar()) {
1300 Observer
.changingInstr(MI
);
1301 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
1302 Observer
.changedInstr(MI
);
1306 if (!SrcTy
.isVector())
1307 return UnableToLegalize
;
1309 if (DstTy
!= SrcTy
.getElementType())
1310 return UnableToLegalize
;
1312 if (Offset
% SrcTy
.getScalarSizeInBits() != 0)
1313 return UnableToLegalize
;
1315 Observer
.changingInstr(MI
);
1316 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
1318 MI
.getOperand(2).setImm((WideTy
.getSizeInBits() / SrcTy
.getSizeInBits()) *
1320 widenScalarDst(MI
, WideTy
.getScalarType(), 0);
1321 Observer
.changedInstr(MI
);
1325 LegalizerHelper::LegalizeResult
1326 LegalizerHelper::widenScalarInsert(MachineInstr
&MI
, unsigned TypeIdx
,
1329 return UnableToLegalize
;
1330 Observer
.changingInstr(MI
);
1331 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
1332 widenScalarDst(MI
, WideTy
);
1333 Observer
.changedInstr(MI
);
1337 LegalizerHelper::LegalizeResult
1338 LegalizerHelper::widenScalar(MachineInstr
&MI
, unsigned TypeIdx
, LLT WideTy
) {
1339 MIRBuilder
.setInstr(MI
);
1341 switch (MI
.getOpcode()) {
1343 return UnableToLegalize
;
1344 case TargetOpcode::G_EXTRACT
:
1345 return widenScalarExtract(MI
, TypeIdx
, WideTy
);
1346 case TargetOpcode::G_INSERT
:
1347 return widenScalarInsert(MI
, TypeIdx
, WideTy
);
1348 case TargetOpcode::G_MERGE_VALUES
:
1349 return widenScalarMergeValues(MI
, TypeIdx
, WideTy
);
1350 case TargetOpcode::G_UNMERGE_VALUES
:
1351 return widenScalarUnmergeValues(MI
, TypeIdx
, WideTy
);
1352 case TargetOpcode::G_UADDO
:
1353 case TargetOpcode::G_USUBO
: {
1355 return UnableToLegalize
; // TODO
1356 auto LHSZext
= MIRBuilder
.buildInstr(TargetOpcode::G_ZEXT
, {WideTy
},
1357 {MI
.getOperand(2).getReg()});
1358 auto RHSZext
= MIRBuilder
.buildInstr(TargetOpcode::G_ZEXT
, {WideTy
},
1359 {MI
.getOperand(3).getReg()});
1360 unsigned Opcode
= MI
.getOpcode() == TargetOpcode::G_UADDO
1361 ? TargetOpcode::G_ADD
1362 : TargetOpcode::G_SUB
;
1363 // Do the arithmetic in the larger type.
1364 auto NewOp
= MIRBuilder
.buildInstr(Opcode
, {WideTy
}, {LHSZext
, RHSZext
});
1365 LLT OrigTy
= MRI
.getType(MI
.getOperand(0).getReg());
1366 APInt Mask
= APInt::getAllOnesValue(OrigTy
.getSizeInBits());
1367 auto AndOp
= MIRBuilder
.buildInstr(
1368 TargetOpcode::G_AND
, {WideTy
},
1369 {NewOp
, MIRBuilder
.buildConstant(WideTy
, Mask
.getZExtValue())});
1370 // There is no overflow if the AndOp is the same as NewOp.
1371 MIRBuilder
.buildICmp(CmpInst::ICMP_NE
, MI
.getOperand(1).getReg(), NewOp
,
1373 // Now trunc the NewOp to the original result.
1374 MIRBuilder
.buildTrunc(MI
.getOperand(0).getReg(), NewOp
);
1375 MI
.eraseFromParent();
1378 case TargetOpcode::G_CTTZ
:
1379 case TargetOpcode::G_CTTZ_ZERO_UNDEF
:
1380 case TargetOpcode::G_CTLZ
:
1381 case TargetOpcode::G_CTLZ_ZERO_UNDEF
:
1382 case TargetOpcode::G_CTPOP
: {
1384 Observer
.changingInstr(MI
);
1385 widenScalarDst(MI
, WideTy
, 0);
1386 Observer
.changedInstr(MI
);
1390 Register SrcReg
= MI
.getOperand(1).getReg();
1392 // First ZEXT the input.
1393 auto MIBSrc
= MIRBuilder
.buildZExt(WideTy
, SrcReg
);
1394 LLT CurTy
= MRI
.getType(SrcReg
);
1395 if (MI
.getOpcode() == TargetOpcode::G_CTTZ
) {
1396 // The count is the same in the larger type except if the original
1397 // value was zero. This can be handled by setting the bit just off
1398 // the top of the original type.
1400 APInt::getOneBitSet(WideTy
.getSizeInBits(), CurTy
.getSizeInBits());
1401 MIBSrc
= MIRBuilder
.buildOr(
1402 WideTy
, MIBSrc
, MIRBuilder
.buildConstant(WideTy
, TopBit
));
1405 // Perform the operation at the larger size.
1406 auto MIBNewOp
= MIRBuilder
.buildInstr(MI
.getOpcode(), {WideTy
}, {MIBSrc
});
1407 // This is already the correct result for CTPOP and CTTZs
1408 if (MI
.getOpcode() == TargetOpcode::G_CTLZ
||
1409 MI
.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF
) {
1410 // The correct result is NewOp - (Difference in widety and current ty).
1411 unsigned SizeDiff
= WideTy
.getSizeInBits() - CurTy
.getSizeInBits();
1412 MIBNewOp
= MIRBuilder
.buildInstr(
1413 TargetOpcode::G_SUB
, {WideTy
},
1414 {MIBNewOp
, MIRBuilder
.buildConstant(WideTy
, SizeDiff
)});
1417 MIRBuilder
.buildZExtOrTrunc(MI
.getOperand(0), MIBNewOp
);
1418 MI
.eraseFromParent();
1421 case TargetOpcode::G_BSWAP
: {
1422 Observer
.changingInstr(MI
);
1423 Register DstReg
= MI
.getOperand(0).getReg();
1425 Register ShrReg
= MRI
.createGenericVirtualRegister(WideTy
);
1426 Register DstExt
= MRI
.createGenericVirtualRegister(WideTy
);
1427 Register ShiftAmtReg
= MRI
.createGenericVirtualRegister(WideTy
);
1428 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
1430 MI
.getOperand(0).setReg(DstExt
);
1432 MIRBuilder
.setInsertPt(MIRBuilder
.getMBB(), ++MIRBuilder
.getInsertPt());
1434 LLT Ty
= MRI
.getType(DstReg
);
1435 unsigned DiffBits
= WideTy
.getScalarSizeInBits() - Ty
.getScalarSizeInBits();
1436 MIRBuilder
.buildConstant(ShiftAmtReg
, DiffBits
);
1437 MIRBuilder
.buildInstr(TargetOpcode::G_LSHR
)
1440 .addUse(ShiftAmtReg
);
1442 MIRBuilder
.buildTrunc(DstReg
, ShrReg
);
1443 Observer
.changedInstr(MI
);
1446 case TargetOpcode::G_ADD
:
1447 case TargetOpcode::G_AND
:
1448 case TargetOpcode::G_MUL
:
1449 case TargetOpcode::G_OR
:
1450 case TargetOpcode::G_XOR
:
1451 case TargetOpcode::G_SUB
:
1452 // Perform operation at larger width (any extension is fines here, high bits
1453 // don't affect the result) and then truncate the result back to the
1455 Observer
.changingInstr(MI
);
1456 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
1457 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ANYEXT
);
1458 widenScalarDst(MI
, WideTy
);
1459 Observer
.changedInstr(MI
);
1462 case TargetOpcode::G_SHL
:
1463 Observer
.changingInstr(MI
);
1466 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
1467 widenScalarDst(MI
, WideTy
);
1469 assert(TypeIdx
== 1);
1470 // The "number of bits to shift" operand must preserve its value as an
1471 // unsigned integer:
1472 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ZEXT
);
1475 Observer
.changedInstr(MI
);
1478 case TargetOpcode::G_SDIV
:
1479 case TargetOpcode::G_SREM
:
1480 case TargetOpcode::G_SMIN
:
1481 case TargetOpcode::G_SMAX
:
1482 Observer
.changingInstr(MI
);
1483 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_SEXT
);
1484 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_SEXT
);
1485 widenScalarDst(MI
, WideTy
);
1486 Observer
.changedInstr(MI
);
1489 case TargetOpcode::G_ASHR
:
1490 case TargetOpcode::G_LSHR
:
1491 Observer
.changingInstr(MI
);
1494 unsigned CvtOp
= MI
.getOpcode() == TargetOpcode::G_ASHR
?
1495 TargetOpcode::G_SEXT
: TargetOpcode::G_ZEXT
;
1497 widenScalarSrc(MI
, WideTy
, 1, CvtOp
);
1498 widenScalarDst(MI
, WideTy
);
1500 assert(TypeIdx
== 1);
1501 // The "number of bits to shift" operand must preserve its value as an
1502 // unsigned integer:
1503 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ZEXT
);
1506 Observer
.changedInstr(MI
);
1508 case TargetOpcode::G_UDIV
:
1509 case TargetOpcode::G_UREM
:
1510 case TargetOpcode::G_UMIN
:
1511 case TargetOpcode::G_UMAX
:
1512 Observer
.changingInstr(MI
);
1513 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ZEXT
);
1514 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ZEXT
);
1515 widenScalarDst(MI
, WideTy
);
1516 Observer
.changedInstr(MI
);
1519 case TargetOpcode::G_SELECT
:
1520 Observer
.changingInstr(MI
);
1522 // Perform operation at larger width (any extension is fine here, high
1523 // bits don't affect the result) and then truncate the result back to the
1525 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ANYEXT
);
1526 widenScalarSrc(MI
, WideTy
, 3, TargetOpcode::G_ANYEXT
);
1527 widenScalarDst(MI
, WideTy
);
1529 bool IsVec
= MRI
.getType(MI
.getOperand(1).getReg()).isVector();
1530 // Explicit extension is required here since high bits affect the result.
1531 widenScalarSrc(MI
, WideTy
, 1, MIRBuilder
.getBoolExtOp(IsVec
, false));
1533 Observer
.changedInstr(MI
);
1536 case TargetOpcode::G_FPTOSI
:
1537 case TargetOpcode::G_FPTOUI
:
1539 return UnableToLegalize
;
1540 Observer
.changingInstr(MI
);
1541 widenScalarDst(MI
, WideTy
);
1542 Observer
.changedInstr(MI
);
1545 case TargetOpcode::G_SITOFP
:
1547 return UnableToLegalize
;
1548 Observer
.changingInstr(MI
);
1549 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_SEXT
);
1550 Observer
.changedInstr(MI
);
1553 case TargetOpcode::G_UITOFP
:
1555 return UnableToLegalize
;
1556 Observer
.changingInstr(MI
);
1557 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ZEXT
);
1558 Observer
.changedInstr(MI
);
1561 case TargetOpcode::G_LOAD
:
1562 case TargetOpcode::G_SEXTLOAD
:
1563 case TargetOpcode::G_ZEXTLOAD
:
1564 Observer
.changingInstr(MI
);
1565 widenScalarDst(MI
, WideTy
);
1566 Observer
.changedInstr(MI
);
1569 case TargetOpcode::G_STORE
: {
1571 return UnableToLegalize
;
1573 LLT Ty
= MRI
.getType(MI
.getOperand(0).getReg());
1574 if (!isPowerOf2_32(Ty
.getSizeInBits()))
1575 return UnableToLegalize
;
1577 Observer
.changingInstr(MI
);
1579 unsigned ExtType
= Ty
.getScalarSizeInBits() == 1 ?
1580 TargetOpcode::G_ZEXT
: TargetOpcode::G_ANYEXT
;
1581 widenScalarSrc(MI
, WideTy
, 0, ExtType
);
1583 Observer
.changedInstr(MI
);
1586 case TargetOpcode::G_CONSTANT
: {
1587 MachineOperand
&SrcMO
= MI
.getOperand(1);
1588 LLVMContext
&Ctx
= MIRBuilder
.getMF().getFunction().getContext();
1589 const APInt
&Val
= SrcMO
.getCImm()->getValue().sext(WideTy
.getSizeInBits());
1590 Observer
.changingInstr(MI
);
1591 SrcMO
.setCImm(ConstantInt::get(Ctx
, Val
));
1593 widenScalarDst(MI
, WideTy
);
1594 Observer
.changedInstr(MI
);
1597 case TargetOpcode::G_FCONSTANT
: {
1598 MachineOperand
&SrcMO
= MI
.getOperand(1);
1599 LLVMContext
&Ctx
= MIRBuilder
.getMF().getFunction().getContext();
1600 APFloat Val
= SrcMO
.getFPImm()->getValueAPF();
1602 switch (WideTy
.getSizeInBits()) {
1604 Val
.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven
,
1608 Val
.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven
,
1612 return UnableToLegalize
;
1615 assert(!LosesInfo
&& "extend should always be lossless");
1617 Observer
.changingInstr(MI
);
1618 SrcMO
.setFPImm(ConstantFP::get(Ctx
, Val
));
1620 widenScalarDst(MI
, WideTy
, 0, TargetOpcode::G_FPTRUNC
);
1621 Observer
.changedInstr(MI
);
1624 case TargetOpcode::G_IMPLICIT_DEF
: {
1625 Observer
.changingInstr(MI
);
1626 widenScalarDst(MI
, WideTy
);
1627 Observer
.changedInstr(MI
);
1630 case TargetOpcode::G_BRCOND
:
1631 Observer
.changingInstr(MI
);
1632 widenScalarSrc(MI
, WideTy
, 0, MIRBuilder
.getBoolExtOp(false, false));
1633 Observer
.changedInstr(MI
);
1636 case TargetOpcode::G_FCMP
:
1637 Observer
.changingInstr(MI
);
1639 widenScalarDst(MI
, WideTy
);
1641 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_FPEXT
);
1642 widenScalarSrc(MI
, WideTy
, 3, TargetOpcode::G_FPEXT
);
1644 Observer
.changedInstr(MI
);
1647 case TargetOpcode::G_ICMP
:
1648 Observer
.changingInstr(MI
);
1650 widenScalarDst(MI
, WideTy
);
1652 unsigned ExtOpcode
= CmpInst::isSigned(static_cast<CmpInst::Predicate
>(
1653 MI
.getOperand(1).getPredicate()))
1654 ? TargetOpcode::G_SEXT
1655 : TargetOpcode::G_ZEXT
;
1656 widenScalarSrc(MI
, WideTy
, 2, ExtOpcode
);
1657 widenScalarSrc(MI
, WideTy
, 3, ExtOpcode
);
1659 Observer
.changedInstr(MI
);
1662 case TargetOpcode::G_GEP
:
1663 assert(TypeIdx
== 1 && "unable to legalize pointer of GEP");
1664 Observer
.changingInstr(MI
);
1665 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_SEXT
);
1666 Observer
.changedInstr(MI
);
1669 case TargetOpcode::G_PHI
: {
1670 assert(TypeIdx
== 0 && "Expecting only Idx 0");
1672 Observer
.changingInstr(MI
);
1673 for (unsigned I
= 1; I
< MI
.getNumOperands(); I
+= 2) {
1674 MachineBasicBlock
&OpMBB
= *MI
.getOperand(I
+ 1).getMBB();
1675 MIRBuilder
.setInsertPt(OpMBB
, OpMBB
.getFirstTerminator());
1676 widenScalarSrc(MI
, WideTy
, I
, TargetOpcode::G_ANYEXT
);
1679 MachineBasicBlock
&MBB
= *MI
.getParent();
1680 MIRBuilder
.setInsertPt(MBB
, --MBB
.getFirstNonPHI());
1681 widenScalarDst(MI
, WideTy
);
1682 Observer
.changedInstr(MI
);
1685 case TargetOpcode::G_EXTRACT_VECTOR_ELT
: {
1687 Register VecReg
= MI
.getOperand(1).getReg();
1688 LLT VecTy
= MRI
.getType(VecReg
);
1689 Observer
.changingInstr(MI
);
1691 widenScalarSrc(MI
, LLT::vector(VecTy
.getNumElements(),
1692 WideTy
.getSizeInBits()),
1693 1, TargetOpcode::G_SEXT
);
1695 widenScalarDst(MI
, WideTy
, 0);
1696 Observer
.changedInstr(MI
);
1701 return UnableToLegalize
;
1702 Observer
.changingInstr(MI
);
1703 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_SEXT
);
1704 Observer
.changedInstr(MI
);
1707 case TargetOpcode::G_FADD
:
1708 case TargetOpcode::G_FMUL
:
1709 case TargetOpcode::G_FSUB
:
1710 case TargetOpcode::G_FMA
:
1711 case TargetOpcode::G_FNEG
:
1712 case TargetOpcode::G_FABS
:
1713 case TargetOpcode::G_FCANONICALIZE
:
1714 case TargetOpcode::G_FMINNUM
:
1715 case TargetOpcode::G_FMAXNUM
:
1716 case TargetOpcode::G_FMINNUM_IEEE
:
1717 case TargetOpcode::G_FMAXNUM_IEEE
:
1718 case TargetOpcode::G_FMINIMUM
:
1719 case TargetOpcode::G_FMAXIMUM
:
1720 case TargetOpcode::G_FDIV
:
1721 case TargetOpcode::G_FREM
:
1722 case TargetOpcode::G_FCEIL
:
1723 case TargetOpcode::G_FFLOOR
:
1724 case TargetOpcode::G_FCOS
:
1725 case TargetOpcode::G_FSIN
:
1726 case TargetOpcode::G_FLOG10
:
1727 case TargetOpcode::G_FLOG
:
1728 case TargetOpcode::G_FLOG2
:
1729 case TargetOpcode::G_FRINT
:
1730 case TargetOpcode::G_FNEARBYINT
:
1731 case TargetOpcode::G_FSQRT
:
1732 case TargetOpcode::G_FEXP
:
1733 case TargetOpcode::G_FEXP2
:
1734 case TargetOpcode::G_FPOW
:
1735 case TargetOpcode::G_INTRINSIC_TRUNC
:
1736 case TargetOpcode::G_INTRINSIC_ROUND
:
1737 assert(TypeIdx
== 0);
1738 Observer
.changingInstr(MI
);
1740 for (unsigned I
= 1, E
= MI
.getNumOperands(); I
!= E
; ++I
)
1741 widenScalarSrc(MI
, WideTy
, I
, TargetOpcode::G_FPEXT
);
1743 widenScalarDst(MI
, WideTy
, 0, TargetOpcode::G_FPTRUNC
);
1744 Observer
.changedInstr(MI
);
1746 case TargetOpcode::G_INTTOPTR
:
1748 return UnableToLegalize
;
1750 Observer
.changingInstr(MI
);
1751 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ZEXT
);
1752 Observer
.changedInstr(MI
);
1754 case TargetOpcode::G_PTRTOINT
:
1756 return UnableToLegalize
;
1758 Observer
.changingInstr(MI
);
1759 widenScalarDst(MI
, WideTy
, 0);
1760 Observer
.changedInstr(MI
);
1762 case TargetOpcode::G_BUILD_VECTOR
: {
1763 Observer
.changingInstr(MI
);
1765 const LLT WideEltTy
= TypeIdx
== 1 ? WideTy
: WideTy
.getElementType();
1766 for (int I
= 1, E
= MI
.getNumOperands(); I
!= E
; ++I
)
1767 widenScalarSrc(MI
, WideEltTy
, I
, TargetOpcode::G_ANYEXT
);
1769 // Avoid changing the result vector type if the source element type was
1772 auto &TII
= *MI
.getMF()->getSubtarget().getInstrInfo();
1773 MI
.setDesc(TII
.get(TargetOpcode::G_BUILD_VECTOR_TRUNC
));
1775 widenScalarDst(MI
, WideTy
, 0);
1778 Observer
.changedInstr(MI
);
1781 case TargetOpcode::G_SEXT_INREG
:
1783 return UnableToLegalize
;
1785 Observer
.changingInstr(MI
);
1786 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
1787 widenScalarDst(MI
, WideTy
, 0, TargetOpcode::G_TRUNC
);
1788 Observer
.changedInstr(MI
);
1793 LegalizerHelper::LegalizeResult
1794 LegalizerHelper::lower(MachineInstr
&MI
, unsigned TypeIdx
, LLT Ty
) {
1795 using namespace TargetOpcode
;
1796 MIRBuilder
.setInstr(MI
);
1798 switch(MI
.getOpcode()) {
1800 return UnableToLegalize
;
1801 case TargetOpcode::G_SREM
:
1802 case TargetOpcode::G_UREM
: {
1803 Register QuotReg
= MRI
.createGenericVirtualRegister(Ty
);
1804 MIRBuilder
.buildInstr(MI
.getOpcode() == G_SREM
? G_SDIV
: G_UDIV
)
1806 .addUse(MI
.getOperand(1).getReg())
1807 .addUse(MI
.getOperand(2).getReg());
1809 Register ProdReg
= MRI
.createGenericVirtualRegister(Ty
);
1810 MIRBuilder
.buildMul(ProdReg
, QuotReg
, MI
.getOperand(2).getReg());
1811 MIRBuilder
.buildSub(MI
.getOperand(0).getReg(), MI
.getOperand(1).getReg(),
1813 MI
.eraseFromParent();
1816 case TargetOpcode::G_SMULO
:
1817 case TargetOpcode::G_UMULO
: {
1818 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
1820 Register Res
= MI
.getOperand(0).getReg();
1821 Register Overflow
= MI
.getOperand(1).getReg();
1822 Register LHS
= MI
.getOperand(2).getReg();
1823 Register RHS
= MI
.getOperand(3).getReg();
1825 MIRBuilder
.buildMul(Res
, LHS
, RHS
);
1827 unsigned Opcode
= MI
.getOpcode() == TargetOpcode::G_SMULO
1828 ? TargetOpcode::G_SMULH
1829 : TargetOpcode::G_UMULH
;
1831 Register HiPart
= MRI
.createGenericVirtualRegister(Ty
);
1832 MIRBuilder
.buildInstr(Opcode
)
1837 Register Zero
= MRI
.createGenericVirtualRegister(Ty
);
1838 MIRBuilder
.buildConstant(Zero
, 0);
1840 // For *signed* multiply, overflow is detected by checking:
1841 // (hi != (lo >> bitwidth-1))
1842 if (Opcode
== TargetOpcode::G_SMULH
) {
1843 Register Shifted
= MRI
.createGenericVirtualRegister(Ty
);
1844 Register ShiftAmt
= MRI
.createGenericVirtualRegister(Ty
);
1845 MIRBuilder
.buildConstant(ShiftAmt
, Ty
.getSizeInBits() - 1);
1846 MIRBuilder
.buildInstr(TargetOpcode::G_ASHR
)
1850 MIRBuilder
.buildICmp(CmpInst::ICMP_NE
, Overflow
, HiPart
, Shifted
);
1852 MIRBuilder
.buildICmp(CmpInst::ICMP_NE
, Overflow
, HiPart
, Zero
);
1854 MI
.eraseFromParent();
1857 case TargetOpcode::G_FNEG
: {
1858 // TODO: Handle vector types once we are able to
1861 return UnableToLegalize
;
1862 Register Res
= MI
.getOperand(0).getReg();
1864 LLVMContext
&Ctx
= MIRBuilder
.getMF().getFunction().getContext();
1865 switch (Ty
.getSizeInBits()) {
1867 ZeroTy
= Type::getHalfTy(Ctx
);
1870 ZeroTy
= Type::getFloatTy(Ctx
);
1873 ZeroTy
= Type::getDoubleTy(Ctx
);
1876 ZeroTy
= Type::getFP128Ty(Ctx
);
1879 llvm_unreachable("unexpected floating-point type");
1881 ConstantFP
&ZeroForNegation
=
1882 *cast
<ConstantFP
>(ConstantFP::getZeroValueForNegation(ZeroTy
));
1883 auto Zero
= MIRBuilder
.buildFConstant(Ty
, ZeroForNegation
);
1884 Register SubByReg
= MI
.getOperand(1).getReg();
1885 Register ZeroReg
= Zero
->getOperand(0).getReg();
1886 MIRBuilder
.buildInstr(TargetOpcode::G_FSUB
, {Res
}, {ZeroReg
, SubByReg
},
1888 MI
.eraseFromParent();
1891 case TargetOpcode::G_FSUB
: {
1892 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
1893 // First, check if G_FNEG is marked as Lower. If so, we may
1894 // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
1895 if (LI
.getAction({G_FNEG
, {Ty
}}).Action
== Lower
)
1896 return UnableToLegalize
;
1897 Register Res
= MI
.getOperand(0).getReg();
1898 Register LHS
= MI
.getOperand(1).getReg();
1899 Register RHS
= MI
.getOperand(2).getReg();
1900 Register Neg
= MRI
.createGenericVirtualRegister(Ty
);
1901 MIRBuilder
.buildInstr(TargetOpcode::G_FNEG
).addDef(Neg
).addUse(RHS
);
1902 MIRBuilder
.buildInstr(TargetOpcode::G_FADD
, {Res
}, {LHS
, Neg
}, MI
.getFlags());
1903 MI
.eraseFromParent();
1906 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS
: {
1907 Register OldValRes
= MI
.getOperand(0).getReg();
1908 Register SuccessRes
= MI
.getOperand(1).getReg();
1909 Register Addr
= MI
.getOperand(2).getReg();
1910 Register CmpVal
= MI
.getOperand(3).getReg();
1911 Register NewVal
= MI
.getOperand(4).getReg();
1912 MIRBuilder
.buildAtomicCmpXchg(OldValRes
, Addr
, CmpVal
, NewVal
,
1913 **MI
.memoperands_begin());
1914 MIRBuilder
.buildICmp(CmpInst::ICMP_EQ
, SuccessRes
, OldValRes
, CmpVal
);
1915 MI
.eraseFromParent();
1918 case TargetOpcode::G_LOAD
:
1919 case TargetOpcode::G_SEXTLOAD
:
1920 case TargetOpcode::G_ZEXTLOAD
: {
1921 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
1922 Register DstReg
= MI
.getOperand(0).getReg();
1923 Register PtrReg
= MI
.getOperand(1).getReg();
1924 LLT DstTy
= MRI
.getType(DstReg
);
1925 auto &MMO
= **MI
.memoperands_begin();
1927 if (DstTy
.getSizeInBits() == MMO
.getSizeInBits()) {
1928 if (MI
.getOpcode() == TargetOpcode::G_LOAD
) {
1929 // This load needs splitting into power of 2 sized loads.
1930 if (DstTy
.isVector())
1931 return UnableToLegalize
;
1932 if (isPowerOf2_32(DstTy
.getSizeInBits()))
1933 return UnableToLegalize
; // Don't know what we're being asked to do.
1935 // Our strategy here is to generate anyextending loads for the smaller
1936 // types up to next power-2 result type, and then combine the two larger
1937 // result values together, before truncating back down to the non-pow-2
1939 // E.g. v1 = i24 load =>
1940 // v2 = i32 load (2 byte)
1941 // v3 = i32 load (1 byte)
1942 // v4 = i32 shl v3, 16
1943 // v5 = i32 or v4, v2
1944 // v1 = i24 trunc v5
1945 // By doing this we generate the correct truncate which should get
1946 // combined away as an artifact with a matching extend.
1947 uint64_t LargeSplitSize
= PowerOf2Floor(DstTy
.getSizeInBits());
1948 uint64_t SmallSplitSize
= DstTy
.getSizeInBits() - LargeSplitSize
;
1950 MachineFunction
&MF
= MIRBuilder
.getMF();
1951 MachineMemOperand
*LargeMMO
=
1952 MF
.getMachineMemOperand(&MMO
, 0, LargeSplitSize
/ 8);
1953 MachineMemOperand
*SmallMMO
= MF
.getMachineMemOperand(
1954 &MMO
, LargeSplitSize
/ 8, SmallSplitSize
/ 8);
1956 LLT PtrTy
= MRI
.getType(PtrReg
);
1957 unsigned AnyExtSize
= NextPowerOf2(DstTy
.getSizeInBits());
1958 LLT AnyExtTy
= LLT::scalar(AnyExtSize
);
1959 Register LargeLdReg
= MRI
.createGenericVirtualRegister(AnyExtTy
);
1960 Register SmallLdReg
= MRI
.createGenericVirtualRegister(AnyExtTy
);
1962 MIRBuilder
.buildLoad(LargeLdReg
, PtrReg
, *LargeMMO
);
1965 MIRBuilder
.buildConstant(LLT::scalar(64), LargeSplitSize
/ 8);
1966 Register GEPReg
= MRI
.createGenericVirtualRegister(PtrTy
);
1967 auto SmallPtr
= MIRBuilder
.buildGEP(GEPReg
, PtrReg
, OffsetCst
.getReg(0));
1968 auto SmallLoad
= MIRBuilder
.buildLoad(SmallLdReg
, SmallPtr
.getReg(0),
1971 auto ShiftAmt
= MIRBuilder
.buildConstant(AnyExtTy
, LargeSplitSize
);
1972 auto Shift
= MIRBuilder
.buildShl(AnyExtTy
, SmallLoad
, ShiftAmt
);
1973 auto Or
= MIRBuilder
.buildOr(AnyExtTy
, Shift
, LargeLoad
);
1974 MIRBuilder
.buildTrunc(DstReg
, {Or
.getReg(0)});
1975 MI
.eraseFromParent();
1978 MIRBuilder
.buildLoad(DstReg
, PtrReg
, MMO
);
1979 MI
.eraseFromParent();
1983 if (DstTy
.isScalar()) {
1985 MRI
.createGenericVirtualRegister(LLT::scalar(MMO
.getSizeInBits()));
1986 MIRBuilder
.buildLoad(TmpReg
, PtrReg
, MMO
);
1987 switch (MI
.getOpcode()) {
1989 llvm_unreachable("Unexpected opcode");
1990 case TargetOpcode::G_LOAD
:
1991 MIRBuilder
.buildAnyExt(DstReg
, TmpReg
);
1993 case TargetOpcode::G_SEXTLOAD
:
1994 MIRBuilder
.buildSExt(DstReg
, TmpReg
);
1996 case TargetOpcode::G_ZEXTLOAD
:
1997 MIRBuilder
.buildZExt(DstReg
, TmpReg
);
2000 MI
.eraseFromParent();
2004 return UnableToLegalize
;
2006 case TargetOpcode::G_STORE
: {
2007 // Lower a non-power of 2 store into multiple pow-2 stores.
2008 // E.g. split an i24 store into an i16 store + i8 store.
2009 // We do this by first extending the stored value to the next largest power
2010 // of 2 type, and then using truncating stores to store the components.
2011 // By doing this, likewise with G_LOAD, generate an extend that can be
2012 // artifact-combined away instead of leaving behind extracts.
2013 Register SrcReg
= MI
.getOperand(0).getReg();
2014 Register PtrReg
= MI
.getOperand(1).getReg();
2015 LLT SrcTy
= MRI
.getType(SrcReg
);
2016 MachineMemOperand
&MMO
= **MI
.memoperands_begin();
2017 if (SrcTy
.getSizeInBits() != MMO
.getSizeInBits())
2018 return UnableToLegalize
;
2019 if (SrcTy
.isVector())
2020 return UnableToLegalize
;
2021 if (isPowerOf2_32(SrcTy
.getSizeInBits()))
2022 return UnableToLegalize
; // Don't know what we're being asked to do.
2024 // Extend to the next pow-2.
2025 const LLT ExtendTy
= LLT::scalar(NextPowerOf2(SrcTy
.getSizeInBits()));
2026 auto ExtVal
= MIRBuilder
.buildAnyExt(ExtendTy
, SrcReg
);
2028 // Obtain the smaller value by shifting away the larger value.
2029 uint64_t LargeSplitSize
= PowerOf2Floor(SrcTy
.getSizeInBits());
2030 uint64_t SmallSplitSize
= SrcTy
.getSizeInBits() - LargeSplitSize
;
2031 auto ShiftAmt
= MIRBuilder
.buildConstant(ExtendTy
, LargeSplitSize
);
2032 auto SmallVal
= MIRBuilder
.buildLShr(ExtendTy
, ExtVal
, ShiftAmt
);
2034 // Generate the GEP and truncating stores.
2035 LLT PtrTy
= MRI
.getType(PtrReg
);
2037 MIRBuilder
.buildConstant(LLT::scalar(64), LargeSplitSize
/ 8);
2038 Register GEPReg
= MRI
.createGenericVirtualRegister(PtrTy
);
2039 auto SmallPtr
= MIRBuilder
.buildGEP(GEPReg
, PtrReg
, OffsetCst
.getReg(0));
2041 MachineFunction
&MF
= MIRBuilder
.getMF();
2042 MachineMemOperand
*LargeMMO
=
2043 MF
.getMachineMemOperand(&MMO
, 0, LargeSplitSize
/ 8);
2044 MachineMemOperand
*SmallMMO
=
2045 MF
.getMachineMemOperand(&MMO
, LargeSplitSize
/ 8, SmallSplitSize
/ 8);
2046 MIRBuilder
.buildStore(ExtVal
.getReg(0), PtrReg
, *LargeMMO
);
2047 MIRBuilder
.buildStore(SmallVal
.getReg(0), SmallPtr
.getReg(0), *SmallMMO
);
2048 MI
.eraseFromParent();
2051 case TargetOpcode::G_CTLZ_ZERO_UNDEF
:
2052 case TargetOpcode::G_CTTZ_ZERO_UNDEF
:
2053 case TargetOpcode::G_CTLZ
:
2054 case TargetOpcode::G_CTTZ
:
2055 case TargetOpcode::G_CTPOP
:
2056 return lowerBitCount(MI
, TypeIdx
, Ty
);
2058 Register Res
= MI
.getOperand(0).getReg();
2059 Register CarryOut
= MI
.getOperand(1).getReg();
2060 Register LHS
= MI
.getOperand(2).getReg();
2061 Register RHS
= MI
.getOperand(3).getReg();
2063 MIRBuilder
.buildAdd(Res
, LHS
, RHS
);
2064 MIRBuilder
.buildICmp(CmpInst::ICMP_ULT
, CarryOut
, Res
, RHS
);
2066 MI
.eraseFromParent();
2070 Register Res
= MI
.getOperand(0).getReg();
2071 Register CarryOut
= MI
.getOperand(1).getReg();
2072 Register LHS
= MI
.getOperand(2).getReg();
2073 Register RHS
= MI
.getOperand(3).getReg();
2074 Register CarryIn
= MI
.getOperand(4).getReg();
2076 Register TmpRes
= MRI
.createGenericVirtualRegister(Ty
);
2077 Register ZExtCarryIn
= MRI
.createGenericVirtualRegister(Ty
);
2079 MIRBuilder
.buildAdd(TmpRes
, LHS
, RHS
);
2080 MIRBuilder
.buildZExt(ZExtCarryIn
, CarryIn
);
2081 MIRBuilder
.buildAdd(Res
, TmpRes
, ZExtCarryIn
);
2082 MIRBuilder
.buildICmp(CmpInst::ICMP_ULT
, CarryOut
, Res
, LHS
);
2084 MI
.eraseFromParent();
2088 Register Res
= MI
.getOperand(0).getReg();
2089 Register BorrowOut
= MI
.getOperand(1).getReg();
2090 Register LHS
= MI
.getOperand(2).getReg();
2091 Register RHS
= MI
.getOperand(3).getReg();
2093 MIRBuilder
.buildSub(Res
, LHS
, RHS
);
2094 MIRBuilder
.buildICmp(CmpInst::ICMP_ULT
, BorrowOut
, LHS
, RHS
);
2096 MI
.eraseFromParent();
2100 Register Res
= MI
.getOperand(0).getReg();
2101 Register BorrowOut
= MI
.getOperand(1).getReg();
2102 Register LHS
= MI
.getOperand(2).getReg();
2103 Register RHS
= MI
.getOperand(3).getReg();
2104 Register BorrowIn
= MI
.getOperand(4).getReg();
2106 Register TmpRes
= MRI
.createGenericVirtualRegister(Ty
);
2107 Register ZExtBorrowIn
= MRI
.createGenericVirtualRegister(Ty
);
2108 Register LHS_EQ_RHS
= MRI
.createGenericVirtualRegister(LLT::scalar(1));
2109 Register LHS_ULT_RHS
= MRI
.createGenericVirtualRegister(LLT::scalar(1));
2111 MIRBuilder
.buildSub(TmpRes
, LHS
, RHS
);
2112 MIRBuilder
.buildZExt(ZExtBorrowIn
, BorrowIn
);
2113 MIRBuilder
.buildSub(Res
, TmpRes
, ZExtBorrowIn
);
2114 MIRBuilder
.buildICmp(CmpInst::ICMP_EQ
, LHS_EQ_RHS
, LHS
, RHS
);
2115 MIRBuilder
.buildICmp(CmpInst::ICMP_ULT
, LHS_ULT_RHS
, LHS
, RHS
);
2116 MIRBuilder
.buildSelect(BorrowOut
, LHS_EQ_RHS
, BorrowIn
, LHS_ULT_RHS
);
2118 MI
.eraseFromParent();
2122 return lowerUITOFP(MI
, TypeIdx
, Ty
);
2124 return lowerSITOFP(MI
, TypeIdx
, Ty
);
2129 return lowerMinMax(MI
, TypeIdx
, Ty
);
2131 return lowerFCopySign(MI
, TypeIdx
, Ty
);
2134 return lowerFMinNumMaxNum(MI
);
2135 case G_UNMERGE_VALUES
:
2136 return lowerUnmergeValues(MI
);
2137 case TargetOpcode::G_SEXT_INREG
: {
2138 assert(MI
.getOperand(2).isImm() && "Expected immediate");
2139 int64_t SizeInBits
= MI
.getOperand(2).getImm();
2141 Register DstReg
= MI
.getOperand(0).getReg();
2142 Register SrcReg
= MI
.getOperand(1).getReg();
2143 LLT DstTy
= MRI
.getType(DstReg
);
2144 Register TmpRes
= MRI
.createGenericVirtualRegister(DstTy
);
2146 auto MIBSz
= MIRBuilder
.buildConstant(DstTy
, DstTy
.getScalarSizeInBits() - SizeInBits
);
2147 MIRBuilder
.buildInstr(TargetOpcode::G_SHL
, {TmpRes
}, {SrcReg
, MIBSz
->getOperand(0).getReg()});
2148 MIRBuilder
.buildInstr(TargetOpcode::G_ASHR
, {DstReg
}, {TmpRes
, MIBSz
->getOperand(0).getReg()});
2149 MI
.eraseFromParent();
2152 case G_SHUFFLE_VECTOR
:
2153 return lowerShuffleVector(MI
);
2157 LegalizerHelper::LegalizeResult
LegalizerHelper::fewerElementsVectorImplicitDef(
2158 MachineInstr
&MI
, unsigned TypeIdx
, LLT NarrowTy
) {
2159 SmallVector
<Register
, 2> DstRegs
;
2161 unsigned NarrowSize
= NarrowTy
.getSizeInBits();
2162 Register DstReg
= MI
.getOperand(0).getReg();
2163 unsigned Size
= MRI
.getType(DstReg
).getSizeInBits();
2164 int NumParts
= Size
/ NarrowSize
;
2165 // FIXME: Don't know how to handle the situation where the small vectors
2166 // aren't all the same size yet.
2167 if (Size
% NarrowSize
!= 0)
2168 return UnableToLegalize
;
2170 for (int i
= 0; i
< NumParts
; ++i
) {
2171 Register TmpReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
2172 MIRBuilder
.buildUndef(TmpReg
);
2173 DstRegs
.push_back(TmpReg
);
2176 if (NarrowTy
.isVector())
2177 MIRBuilder
.buildConcatVectors(DstReg
, DstRegs
);
2179 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
2181 MI
.eraseFromParent();
2185 LegalizerHelper::LegalizeResult
2186 LegalizerHelper::fewerElementsVectorBasic(MachineInstr
&MI
, unsigned TypeIdx
,
2188 const unsigned Opc
= MI
.getOpcode();
2189 const unsigned NumOps
= MI
.getNumOperands() - 1;
2190 const unsigned NarrowSize
= NarrowTy
.getSizeInBits();
2191 const Register DstReg
= MI
.getOperand(0).getReg();
2192 const unsigned Flags
= MI
.getFlags();
2193 const LLT DstTy
= MRI
.getType(DstReg
);
2194 const unsigned Size
= DstTy
.getSizeInBits();
2195 const int NumParts
= Size
/ NarrowSize
;
2196 const LLT EltTy
= DstTy
.getElementType();
2197 const unsigned EltSize
= EltTy
.getSizeInBits();
2198 const unsigned BitsForNumParts
= NarrowSize
* NumParts
;
2200 // Check if we have any leftovers. If we do, then only handle the case where
2201 // the leftover is one element.
2202 if (BitsForNumParts
!= Size
&& BitsForNumParts
+ EltSize
!= Size
)
2203 return UnableToLegalize
;
2205 if (BitsForNumParts
!= Size
) {
2206 Register AccumDstReg
= MRI
.createGenericVirtualRegister(DstTy
);
2207 MIRBuilder
.buildUndef(AccumDstReg
);
2209 // Handle the pieces which evenly divide into the requested type with
2210 // extract/op/insert sequence.
2211 for (unsigned Offset
= 0; Offset
< BitsForNumParts
; Offset
+= NarrowSize
) {
2212 SmallVector
<SrcOp
, 4> SrcOps
;
2213 for (unsigned I
= 1, E
= MI
.getNumOperands(); I
!= E
; ++I
) {
2214 Register PartOpReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
2215 MIRBuilder
.buildExtract(PartOpReg
, MI
.getOperand(I
).getReg(), Offset
);
2216 SrcOps
.push_back(PartOpReg
);
2219 Register PartDstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
2220 MIRBuilder
.buildInstr(Opc
, {PartDstReg
}, SrcOps
, Flags
);
2222 Register PartInsertReg
= MRI
.createGenericVirtualRegister(DstTy
);
2223 MIRBuilder
.buildInsert(PartInsertReg
, AccumDstReg
, PartDstReg
, Offset
);
2224 AccumDstReg
= PartInsertReg
;
2227 // Handle the remaining element sized leftover piece.
2228 SmallVector
<SrcOp
, 4> SrcOps
;
2229 for (unsigned I
= 1, E
= MI
.getNumOperands(); I
!= E
; ++I
) {
2230 Register PartOpReg
= MRI
.createGenericVirtualRegister(EltTy
);
2231 MIRBuilder
.buildExtract(PartOpReg
, MI
.getOperand(I
).getReg(),
2233 SrcOps
.push_back(PartOpReg
);
2236 Register PartDstReg
= MRI
.createGenericVirtualRegister(EltTy
);
2237 MIRBuilder
.buildInstr(Opc
, {PartDstReg
}, SrcOps
, Flags
);
2238 MIRBuilder
.buildInsert(DstReg
, AccumDstReg
, PartDstReg
, BitsForNumParts
);
2239 MI
.eraseFromParent();
2244 SmallVector
<Register
, 2> DstRegs
, Src0Regs
, Src1Regs
, Src2Regs
;
2246 extractParts(MI
.getOperand(1).getReg(), NarrowTy
, NumParts
, Src0Regs
);
2249 extractParts(MI
.getOperand(2).getReg(), NarrowTy
, NumParts
, Src1Regs
);
2252 extractParts(MI
.getOperand(3).getReg(), NarrowTy
, NumParts
, Src2Regs
);
2254 for (int i
= 0; i
< NumParts
; ++i
) {
2255 Register DstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
2258 MIRBuilder
.buildInstr(Opc
, {DstReg
}, {Src0Regs
[i
]}, Flags
);
2259 else if (NumOps
== 2) {
2260 MIRBuilder
.buildInstr(Opc
, {DstReg
}, {Src0Regs
[i
], Src1Regs
[i
]}, Flags
);
2261 } else if (NumOps
== 3) {
2262 MIRBuilder
.buildInstr(Opc
, {DstReg
},
2263 {Src0Regs
[i
], Src1Regs
[i
], Src2Regs
[i
]}, Flags
);
2266 DstRegs
.push_back(DstReg
);
2269 if (NarrowTy
.isVector())
2270 MIRBuilder
.buildConcatVectors(DstReg
, DstRegs
);
2272 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
2274 MI
.eraseFromParent();
2278 // Handle splitting vector operations which need to have the same number of
2279 // elements in each type index, but each type index may have a different element
2282 // e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
2283 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2284 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2286 // Also handles some irregular breakdown cases, e.g.
2287 // e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
2288 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2289 // s64 = G_SHL s64, s32
2290 LegalizerHelper::LegalizeResult
2291 LegalizerHelper::fewerElementsVectorMultiEltType(
2292 MachineInstr
&MI
, unsigned TypeIdx
, LLT NarrowTyArg
) {
2294 return UnableToLegalize
;
2296 const LLT NarrowTy0
= NarrowTyArg
;
2297 const unsigned NewNumElts
=
2298 NarrowTy0
.isVector() ? NarrowTy0
.getNumElements() : 1;
2300 const Register DstReg
= MI
.getOperand(0).getReg();
2301 LLT DstTy
= MRI
.getType(DstReg
);
2304 // All of the operands need to have the same number of elements, so if we can
2305 // determine a type breakdown for the result type, we can for all of the
2307 int NumParts
= getNarrowTypeBreakDown(DstTy
, NarrowTy0
, LeftoverTy0
).first
;
2309 return UnableToLegalize
;
2311 SmallVector
<MachineInstrBuilder
, 4> NewInsts
;
2313 SmallVector
<Register
, 4> DstRegs
, LeftoverDstRegs
;
2314 SmallVector
<Register
, 4> PartRegs
, LeftoverRegs
;
2316 for (unsigned I
= 1, E
= MI
.getNumOperands(); I
!= E
; ++I
) {
2318 Register SrcReg
= MI
.getOperand(I
).getReg();
2319 LLT SrcTyI
= MRI
.getType(SrcReg
);
2320 LLT NarrowTyI
= LLT::scalarOrVector(NewNumElts
, SrcTyI
.getScalarType());
2323 // Split this operand into the requested typed registers, and any leftover
2324 // required to reproduce the original type.
2325 if (!extractParts(SrcReg
, SrcTyI
, NarrowTyI
, LeftoverTyI
, PartRegs
,
2327 return UnableToLegalize
;
2330 // For the first operand, create an instruction for each part and setup
2332 for (Register PartReg
: PartRegs
) {
2333 Register PartDstReg
= MRI
.createGenericVirtualRegister(NarrowTy0
);
2334 NewInsts
.push_back(MIRBuilder
.buildInstrNoInsert(MI
.getOpcode())
2337 DstRegs
.push_back(PartDstReg
);
2340 for (Register LeftoverReg
: LeftoverRegs
) {
2341 Register PartDstReg
= MRI
.createGenericVirtualRegister(LeftoverTy0
);
2342 NewInsts
.push_back(MIRBuilder
.buildInstrNoInsert(MI
.getOpcode())
2344 .addUse(LeftoverReg
));
2345 LeftoverDstRegs
.push_back(PartDstReg
);
2348 assert(NewInsts
.size() == PartRegs
.size() + LeftoverRegs
.size());
2350 // Add the newly created operand splits to the existing instructions. The
2351 // odd-sized pieces are ordered after the requested NarrowTyArg sized
2353 unsigned InstCount
= 0;
2354 for (unsigned J
= 0, JE
= PartRegs
.size(); J
!= JE
; ++J
)
2355 NewInsts
[InstCount
++].addUse(PartRegs
[J
]);
2356 for (unsigned J
= 0, JE
= LeftoverRegs
.size(); J
!= JE
; ++J
)
2357 NewInsts
[InstCount
++].addUse(LeftoverRegs
[J
]);
2361 LeftoverRegs
.clear();
2364 // Insert the newly built operations and rebuild the result register.
2365 for (auto &MIB
: NewInsts
)
2366 MIRBuilder
.insertInstr(MIB
);
2368 insertParts(DstReg
, DstTy
, NarrowTy0
, DstRegs
, LeftoverTy0
, LeftoverDstRegs
);
2370 MI
.eraseFromParent();
2374 LegalizerHelper::LegalizeResult
2375 LegalizerHelper::fewerElementsVectorCasts(MachineInstr
&MI
, unsigned TypeIdx
,
2378 return UnableToLegalize
;
2380 Register DstReg
= MI
.getOperand(0).getReg();
2381 Register SrcReg
= MI
.getOperand(1).getReg();
2382 LLT DstTy
= MRI
.getType(DstReg
);
2383 LLT SrcTy
= MRI
.getType(SrcReg
);
2385 LLT NarrowTy0
= NarrowTy
;
2389 if (NarrowTy
.isVector()) {
2390 // Uneven breakdown not handled.
2391 NumParts
= DstTy
.getNumElements() / NarrowTy
.getNumElements();
2392 if (NumParts
* NarrowTy
.getNumElements() != DstTy
.getNumElements())
2393 return UnableToLegalize
;
2395 NarrowTy1
= LLT::vector(NumParts
, SrcTy
.getElementType().getSizeInBits());
2397 NumParts
= DstTy
.getNumElements();
2398 NarrowTy1
= SrcTy
.getElementType();
2401 SmallVector
<Register
, 4> SrcRegs
, DstRegs
;
2402 extractParts(SrcReg
, NarrowTy1
, NumParts
, SrcRegs
);
2404 for (unsigned I
= 0; I
< NumParts
; ++I
) {
2405 Register DstReg
= MRI
.createGenericVirtualRegister(NarrowTy0
);
2406 MachineInstr
*NewInst
= MIRBuilder
.buildInstr(MI
.getOpcode())
2408 .addUse(SrcRegs
[I
]);
2410 NewInst
->setFlags(MI
.getFlags());
2411 DstRegs
.push_back(DstReg
);
2414 if (NarrowTy
.isVector())
2415 MIRBuilder
.buildConcatVectors(DstReg
, DstRegs
);
2417 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
2419 MI
.eraseFromParent();
2423 LegalizerHelper::LegalizeResult
2424 LegalizerHelper::fewerElementsVectorCmp(MachineInstr
&MI
, unsigned TypeIdx
,
2426 Register DstReg
= MI
.getOperand(0).getReg();
2427 Register Src0Reg
= MI
.getOperand(2).getReg();
2428 LLT DstTy
= MRI
.getType(DstReg
);
2429 LLT SrcTy
= MRI
.getType(Src0Reg
);
2432 LLT NarrowTy0
, NarrowTy1
;
2435 unsigned NewElts
= NarrowTy
.isVector() ? NarrowTy
.getNumElements() : 1;
2436 unsigned OldElts
= DstTy
.getNumElements();
2438 NarrowTy0
= NarrowTy
;
2439 NumParts
= NarrowTy
.isVector() ? (OldElts
/ NewElts
) : DstTy
.getNumElements();
2440 NarrowTy1
= NarrowTy
.isVector() ?
2441 LLT::vector(NarrowTy
.getNumElements(), SrcTy
.getScalarSizeInBits()) :
2442 SrcTy
.getElementType();
2445 unsigned NewElts
= NarrowTy
.isVector() ? NarrowTy
.getNumElements() : 1;
2446 unsigned OldElts
= SrcTy
.getNumElements();
2448 NumParts
= NarrowTy
.isVector() ? (OldElts
/ NewElts
) :
2449 NarrowTy
.getNumElements();
2450 NarrowTy0
= LLT::vector(NarrowTy
.getNumElements(),
2451 DstTy
.getScalarSizeInBits());
2452 NarrowTy1
= NarrowTy
;
2455 // FIXME: Don't know how to handle the situation where the small vectors
2456 // aren't all the same size yet.
2457 if (NarrowTy1
.isVector() &&
2458 NarrowTy1
.getNumElements() * NumParts
!= DstTy
.getNumElements())
2459 return UnableToLegalize
;
2461 CmpInst::Predicate Pred
2462 = static_cast<CmpInst::Predicate
>(MI
.getOperand(1).getPredicate());
2464 SmallVector
<Register
, 2> Src1Regs
, Src2Regs
, DstRegs
;
2465 extractParts(MI
.getOperand(2).getReg(), NarrowTy1
, NumParts
, Src1Regs
);
2466 extractParts(MI
.getOperand(3).getReg(), NarrowTy1
, NumParts
, Src2Regs
);
2468 for (unsigned I
= 0; I
< NumParts
; ++I
) {
2469 Register DstReg
= MRI
.createGenericVirtualRegister(NarrowTy0
);
2470 DstRegs
.push_back(DstReg
);
2472 if (MI
.getOpcode() == TargetOpcode::G_ICMP
)
2473 MIRBuilder
.buildICmp(Pred
, DstReg
, Src1Regs
[I
], Src2Regs
[I
]);
2475 MachineInstr
*NewCmp
2476 = MIRBuilder
.buildFCmp(Pred
, DstReg
, Src1Regs
[I
], Src2Regs
[I
]);
2477 NewCmp
->setFlags(MI
.getFlags());
2481 if (NarrowTy1
.isVector())
2482 MIRBuilder
.buildConcatVectors(DstReg
, DstRegs
);
2484 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
2486 MI
.eraseFromParent();
2490 LegalizerHelper::LegalizeResult
2491 LegalizerHelper::fewerElementsVectorSelect(MachineInstr
&MI
, unsigned TypeIdx
,
2493 Register DstReg
= MI
.getOperand(0).getReg();
2494 Register CondReg
= MI
.getOperand(1).getReg();
2496 unsigned NumParts
= 0;
2497 LLT NarrowTy0
, NarrowTy1
;
2499 LLT DstTy
= MRI
.getType(DstReg
);
2500 LLT CondTy
= MRI
.getType(CondReg
);
2501 unsigned Size
= DstTy
.getSizeInBits();
2503 assert(TypeIdx
== 0 || CondTy
.isVector());
2506 NarrowTy0
= NarrowTy
;
2509 unsigned NarrowSize
= NarrowTy0
.getSizeInBits();
2510 // FIXME: Don't know how to handle the situation where the small vectors
2511 // aren't all the same size yet.
2512 if (Size
% NarrowSize
!= 0)
2513 return UnableToLegalize
;
2515 NumParts
= Size
/ NarrowSize
;
2517 // Need to break down the condition type
2518 if (CondTy
.isVector()) {
2519 if (CondTy
.getNumElements() == NumParts
)
2520 NarrowTy1
= CondTy
.getElementType();
2522 NarrowTy1
= LLT::vector(CondTy
.getNumElements() / NumParts
,
2523 CondTy
.getScalarSizeInBits());
2526 NumParts
= CondTy
.getNumElements();
2527 if (NarrowTy
.isVector()) {
2528 // TODO: Handle uneven breakdown.
2529 if (NumParts
* NarrowTy
.getNumElements() != CondTy
.getNumElements())
2530 return UnableToLegalize
;
2532 return UnableToLegalize
;
2534 NarrowTy0
= DstTy
.getElementType();
2535 NarrowTy1
= NarrowTy
;
2539 SmallVector
<Register
, 2> DstRegs
, Src0Regs
, Src1Regs
, Src2Regs
;
2540 if (CondTy
.isVector())
2541 extractParts(MI
.getOperand(1).getReg(), NarrowTy1
, NumParts
, Src0Regs
);
2543 extractParts(MI
.getOperand(2).getReg(), NarrowTy0
, NumParts
, Src1Regs
);
2544 extractParts(MI
.getOperand(3).getReg(), NarrowTy0
, NumParts
, Src2Regs
);
2546 for (unsigned i
= 0; i
< NumParts
; ++i
) {
2547 Register DstReg
= MRI
.createGenericVirtualRegister(NarrowTy0
);
2548 MIRBuilder
.buildSelect(DstReg
, CondTy
.isVector() ? Src0Regs
[i
] : CondReg
,
2549 Src1Regs
[i
], Src2Regs
[i
]);
2550 DstRegs
.push_back(DstReg
);
2553 if (NarrowTy0
.isVector())
2554 MIRBuilder
.buildConcatVectors(DstReg
, DstRegs
);
2556 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
2558 MI
.eraseFromParent();
2562 LegalizerHelper::LegalizeResult
2563 LegalizerHelper::fewerElementsVectorPhi(MachineInstr
&MI
, unsigned TypeIdx
,
2565 const Register DstReg
= MI
.getOperand(0).getReg();
2566 LLT PhiTy
= MRI
.getType(DstReg
);
2569 // All of the operands need to have the same number of elements, so if we can
2570 // determine a type breakdown for the result type, we can for all of the
2572 int NumParts
, NumLeftover
;
2573 std::tie(NumParts
, NumLeftover
)
2574 = getNarrowTypeBreakDown(PhiTy
, NarrowTy
, LeftoverTy
);
2576 return UnableToLegalize
;
2578 SmallVector
<Register
, 4> DstRegs
, LeftoverDstRegs
;
2579 SmallVector
<MachineInstrBuilder
, 4> NewInsts
;
2581 const int TotalNumParts
= NumParts
+ NumLeftover
;
2583 // Insert the new phis in the result block first.
2584 for (int I
= 0; I
!= TotalNumParts
; ++I
) {
2585 LLT Ty
= I
< NumParts
? NarrowTy
: LeftoverTy
;
2586 Register PartDstReg
= MRI
.createGenericVirtualRegister(Ty
);
2587 NewInsts
.push_back(MIRBuilder
.buildInstr(TargetOpcode::G_PHI
)
2588 .addDef(PartDstReg
));
2590 DstRegs
.push_back(PartDstReg
);
2592 LeftoverDstRegs
.push_back(PartDstReg
);
2595 MachineBasicBlock
*MBB
= MI
.getParent();
2596 MIRBuilder
.setInsertPt(*MBB
, MBB
->getFirstNonPHI());
2597 insertParts(DstReg
, PhiTy
, NarrowTy
, DstRegs
, LeftoverTy
, LeftoverDstRegs
);
2599 SmallVector
<Register
, 4> PartRegs
, LeftoverRegs
;
2601 // Insert code to extract the incoming values in each predecessor block.
2602 for (unsigned I
= 1, E
= MI
.getNumOperands(); I
!= E
; I
+= 2) {
2604 LeftoverRegs
.clear();
2606 Register SrcReg
= MI
.getOperand(I
).getReg();
2607 MachineBasicBlock
&OpMBB
= *MI
.getOperand(I
+ 1).getMBB();
2608 MIRBuilder
.setInsertPt(OpMBB
, OpMBB
.getFirstTerminator());
2611 if (!extractParts(SrcReg
, PhiTy
, NarrowTy
, Unused
, PartRegs
,
2613 return UnableToLegalize
;
2615 // Add the newly created operand splits to the existing instructions. The
2616 // odd-sized pieces are ordered after the requested NarrowTyArg sized
2618 for (int J
= 0; J
!= TotalNumParts
; ++J
) {
2619 MachineInstrBuilder MIB
= NewInsts
[J
];
2620 MIB
.addUse(J
< NumParts
? PartRegs
[J
] : LeftoverRegs
[J
- NumParts
]);
2625 MI
.eraseFromParent();
2629 LegalizerHelper::LegalizeResult
2630 LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr
&MI
,
2634 return UnableToLegalize
;
2636 const int NumDst
= MI
.getNumOperands() - 1;
2637 const Register SrcReg
= MI
.getOperand(NumDst
).getReg();
2638 LLT SrcTy
= MRI
.getType(SrcReg
);
2640 LLT DstTy
= MRI
.getType(MI
.getOperand(0).getReg());
2642 // TODO: Create sequence of extracts.
2643 if (DstTy
== NarrowTy
)
2644 return UnableToLegalize
;
2646 LLT GCDTy
= getGCDType(SrcTy
, NarrowTy
);
2647 if (DstTy
== GCDTy
) {
2648 // This would just be a copy of the same unmerge.
2649 // TODO: Create extracts, pad with undef and create intermediate merges.
2650 return UnableToLegalize
;
2653 auto Unmerge
= MIRBuilder
.buildUnmerge(GCDTy
, SrcReg
);
2654 const int NumUnmerge
= Unmerge
->getNumOperands() - 1;
2655 const int PartsPerUnmerge
= NumDst
/ NumUnmerge
;
2657 for (int I
= 0; I
!= NumUnmerge
; ++I
) {
2658 auto MIB
= MIRBuilder
.buildInstr(TargetOpcode::G_UNMERGE_VALUES
);
2660 for (int J
= 0; J
!= PartsPerUnmerge
; ++J
)
2661 MIB
.addDef(MI
.getOperand(I
* PartsPerUnmerge
+ J
).getReg());
2662 MIB
.addUse(Unmerge
.getReg(I
));
2665 MI
.eraseFromParent();
2669 LegalizerHelper::LegalizeResult
2670 LegalizerHelper::reduceLoadStoreWidth(MachineInstr
&MI
, unsigned TypeIdx
,
2672 // FIXME: Don't know how to handle secondary types yet.
2674 return UnableToLegalize
;
2676 MachineMemOperand
*MMO
= *MI
.memoperands_begin();
2678 // This implementation doesn't work for atomics. Give up instead of doing
2679 // something invalid.
2680 if (MMO
->getOrdering() != AtomicOrdering::NotAtomic
||
2681 MMO
->getFailureOrdering() != AtomicOrdering::NotAtomic
)
2682 return UnableToLegalize
;
2684 bool IsLoad
= MI
.getOpcode() == TargetOpcode::G_LOAD
;
2685 Register ValReg
= MI
.getOperand(0).getReg();
2686 Register AddrReg
= MI
.getOperand(1).getReg();
2687 LLT ValTy
= MRI
.getType(ValReg
);
2690 int NumLeftover
= -1;
2692 SmallVector
<Register
, 8> NarrowRegs
, NarrowLeftoverRegs
;
2694 std::tie(NumParts
, NumLeftover
) = getNarrowTypeBreakDown(ValTy
, NarrowTy
, LeftoverTy
);
2696 if (extractParts(ValReg
, ValTy
, NarrowTy
, LeftoverTy
, NarrowRegs
,
2697 NarrowLeftoverRegs
)) {
2698 NumParts
= NarrowRegs
.size();
2699 NumLeftover
= NarrowLeftoverRegs
.size();
2704 return UnableToLegalize
;
2706 const LLT OffsetTy
= LLT::scalar(MRI
.getType(AddrReg
).getScalarSizeInBits());
2708 unsigned TotalSize
= ValTy
.getSizeInBits();
2710 // Split the load/store into PartTy sized pieces starting at Offset. If this
2711 // is a load, return the new registers in ValRegs. For a store, each elements
2712 // of ValRegs should be PartTy. Returns the next offset that needs to be
2714 auto splitTypePieces
= [=](LLT PartTy
, SmallVectorImpl
<Register
> &ValRegs
,
2715 unsigned Offset
) -> unsigned {
2716 MachineFunction
&MF
= MIRBuilder
.getMF();
2717 unsigned PartSize
= PartTy
.getSizeInBits();
2718 for (unsigned Idx
= 0, E
= NumParts
; Idx
!= E
&& Offset
< TotalSize
;
2719 Offset
+= PartSize
, ++Idx
) {
2720 unsigned ByteSize
= PartSize
/ 8;
2721 unsigned ByteOffset
= Offset
/ 8;
2722 Register NewAddrReg
;
2724 MIRBuilder
.materializeGEP(NewAddrReg
, AddrReg
, OffsetTy
, ByteOffset
);
2726 MachineMemOperand
*NewMMO
=
2727 MF
.getMachineMemOperand(MMO
, ByteOffset
, ByteSize
);
2730 Register Dst
= MRI
.createGenericVirtualRegister(PartTy
);
2731 ValRegs
.push_back(Dst
);
2732 MIRBuilder
.buildLoad(Dst
, NewAddrReg
, *NewMMO
);
2734 MIRBuilder
.buildStore(ValRegs
[Idx
], NewAddrReg
, *NewMMO
);
2741 unsigned HandledOffset
= splitTypePieces(NarrowTy
, NarrowRegs
, 0);
2743 // Handle the rest of the register if this isn't an even type breakdown.
2744 if (LeftoverTy
.isValid())
2745 splitTypePieces(LeftoverTy
, NarrowLeftoverRegs
, HandledOffset
);
2748 insertParts(ValReg
, ValTy
, NarrowTy
, NarrowRegs
,
2749 LeftoverTy
, NarrowLeftoverRegs
);
2752 MI
.eraseFromParent();
2756 LegalizerHelper::LegalizeResult
2757 LegalizerHelper::fewerElementsVector(MachineInstr
&MI
, unsigned TypeIdx
,
2759 using namespace TargetOpcode
;
2761 MIRBuilder
.setInstr(MI
);
2762 switch (MI
.getOpcode()) {
2763 case G_IMPLICIT_DEF
:
2764 return fewerElementsVectorImplicitDef(MI
, TypeIdx
, NarrowTy
);
2778 case G_FCANONICALIZE
:
2792 case G_INTRINSIC_ROUND
:
2793 case G_INTRINSIC_TRUNC
:
2805 case G_FMINNUM_IEEE
:
2806 case G_FMAXNUM_IEEE
:
2809 return fewerElementsVectorBasic(MI
, TypeIdx
, NarrowTy
);
2814 case G_CTLZ_ZERO_UNDEF
:
2816 case G_CTTZ_ZERO_UNDEF
:
2819 return fewerElementsVectorMultiEltType(MI
, TypeIdx
, NarrowTy
);
2831 case G_ADDRSPACE_CAST
:
2832 return fewerElementsVectorCasts(MI
, TypeIdx
, NarrowTy
);
2835 return fewerElementsVectorCmp(MI
, TypeIdx
, NarrowTy
);
2837 return fewerElementsVectorSelect(MI
, TypeIdx
, NarrowTy
);
2839 return fewerElementsVectorPhi(MI
, TypeIdx
, NarrowTy
);
2840 case G_UNMERGE_VALUES
:
2841 return fewerElementsVectorUnmergeValues(MI
, TypeIdx
, NarrowTy
);
2844 return reduceLoadStoreWidth(MI
, TypeIdx
, NarrowTy
);
2846 return UnableToLegalize
;
2850 LegalizerHelper::LegalizeResult
2851 LegalizerHelper::narrowScalarShiftByConstant(MachineInstr
&MI
, const APInt
&Amt
,
2852 const LLT HalfTy
, const LLT AmtTy
) {
2854 Register InL
= MRI
.createGenericVirtualRegister(HalfTy
);
2855 Register InH
= MRI
.createGenericVirtualRegister(HalfTy
);
2856 MIRBuilder
.buildUnmerge({InL
, InH
}, MI
.getOperand(1).getReg());
2858 if (Amt
.isNullValue()) {
2859 MIRBuilder
.buildMerge(MI
.getOperand(0).getReg(), {InL
, InH
});
2860 MI
.eraseFromParent();
2865 unsigned NVTBits
= HalfTy
.getSizeInBits();
2866 unsigned VTBits
= 2 * NVTBits
;
2868 SrcOp
Lo(Register(0)), Hi(Register(0));
2869 if (MI
.getOpcode() == TargetOpcode::G_SHL
) {
2870 if (Amt
.ugt(VTBits
)) {
2871 Lo
= Hi
= MIRBuilder
.buildConstant(NVT
, 0);
2872 } else if (Amt
.ugt(NVTBits
)) {
2873 Lo
= MIRBuilder
.buildConstant(NVT
, 0);
2874 Hi
= MIRBuilder
.buildShl(NVT
, InL
,
2875 MIRBuilder
.buildConstant(AmtTy
, Amt
- NVTBits
));
2876 } else if (Amt
== NVTBits
) {
2877 Lo
= MIRBuilder
.buildConstant(NVT
, 0);
2880 Lo
= MIRBuilder
.buildShl(NVT
, InL
, MIRBuilder
.buildConstant(AmtTy
, Amt
));
2882 MIRBuilder
.buildShl(NVT
, InH
, MIRBuilder
.buildConstant(AmtTy
, Amt
));
2883 auto OrRHS
= MIRBuilder
.buildLShr(
2884 NVT
, InL
, MIRBuilder
.buildConstant(AmtTy
, -Amt
+ NVTBits
));
2885 Hi
= MIRBuilder
.buildOr(NVT
, OrLHS
, OrRHS
);
2887 } else if (MI
.getOpcode() == TargetOpcode::G_LSHR
) {
2888 if (Amt
.ugt(VTBits
)) {
2889 Lo
= Hi
= MIRBuilder
.buildConstant(NVT
, 0);
2890 } else if (Amt
.ugt(NVTBits
)) {
2891 Lo
= MIRBuilder
.buildLShr(NVT
, InH
,
2892 MIRBuilder
.buildConstant(AmtTy
, Amt
- NVTBits
));
2893 Hi
= MIRBuilder
.buildConstant(NVT
, 0);
2894 } else if (Amt
== NVTBits
) {
2896 Hi
= MIRBuilder
.buildConstant(NVT
, 0);
2898 auto ShiftAmtConst
= MIRBuilder
.buildConstant(AmtTy
, Amt
);
2900 auto OrLHS
= MIRBuilder
.buildLShr(NVT
, InL
, ShiftAmtConst
);
2901 auto OrRHS
= MIRBuilder
.buildShl(
2902 NVT
, InH
, MIRBuilder
.buildConstant(AmtTy
, -Amt
+ NVTBits
));
2904 Lo
= MIRBuilder
.buildOr(NVT
, OrLHS
, OrRHS
);
2905 Hi
= MIRBuilder
.buildLShr(NVT
, InH
, ShiftAmtConst
);
2908 if (Amt
.ugt(VTBits
)) {
2909 Hi
= Lo
= MIRBuilder
.buildAShr(
2910 NVT
, InH
, MIRBuilder
.buildConstant(AmtTy
, NVTBits
- 1));
2911 } else if (Amt
.ugt(NVTBits
)) {
2912 Lo
= MIRBuilder
.buildAShr(NVT
, InH
,
2913 MIRBuilder
.buildConstant(AmtTy
, Amt
- NVTBits
));
2914 Hi
= MIRBuilder
.buildAShr(NVT
, InH
,
2915 MIRBuilder
.buildConstant(AmtTy
, NVTBits
- 1));
2916 } else if (Amt
== NVTBits
) {
2918 Hi
= MIRBuilder
.buildAShr(NVT
, InH
,
2919 MIRBuilder
.buildConstant(AmtTy
, NVTBits
- 1));
2921 auto ShiftAmtConst
= MIRBuilder
.buildConstant(AmtTy
, Amt
);
2923 auto OrLHS
= MIRBuilder
.buildLShr(NVT
, InL
, ShiftAmtConst
);
2924 auto OrRHS
= MIRBuilder
.buildShl(
2925 NVT
, InH
, MIRBuilder
.buildConstant(AmtTy
, -Amt
+ NVTBits
));
2927 Lo
= MIRBuilder
.buildOr(NVT
, OrLHS
, OrRHS
);
2928 Hi
= MIRBuilder
.buildAShr(NVT
, InH
, ShiftAmtConst
);
2932 MIRBuilder
.buildMerge(MI
.getOperand(0).getReg(), {Lo
.getReg(), Hi
.getReg()});
2933 MI
.eraseFromParent();
2938 // TODO: Optimize if constant shift amount.
2939 LegalizerHelper::LegalizeResult
2940 LegalizerHelper::narrowScalarShift(MachineInstr
&MI
, unsigned TypeIdx
,
2943 Observer
.changingInstr(MI
);
2944 narrowScalarSrc(MI
, RequestedTy
, 2);
2945 Observer
.changedInstr(MI
);
2949 Register DstReg
= MI
.getOperand(0).getReg();
2950 LLT DstTy
= MRI
.getType(DstReg
);
2951 if (DstTy
.isVector())
2952 return UnableToLegalize
;
2954 Register Amt
= MI
.getOperand(2).getReg();
2955 LLT ShiftAmtTy
= MRI
.getType(Amt
);
2956 const unsigned DstEltSize
= DstTy
.getScalarSizeInBits();
2957 if (DstEltSize
% 2 != 0)
2958 return UnableToLegalize
;
2960 // Ignore the input type. We can only go to exactly half the size of the
2961 // input. If that isn't small enough, the resulting pieces will be further
2963 const unsigned NewBitSize
= DstEltSize
/ 2;
2964 const LLT HalfTy
= LLT::scalar(NewBitSize
);
2965 const LLT CondTy
= LLT::scalar(1);
2967 if (const MachineInstr
*KShiftAmt
=
2968 getOpcodeDef(TargetOpcode::G_CONSTANT
, Amt
, MRI
)) {
2969 return narrowScalarShiftByConstant(
2970 MI
, KShiftAmt
->getOperand(1).getCImm()->getValue(), HalfTy
, ShiftAmtTy
);
2973 // TODO: Expand with known bits.
2975 // Handle the fully general expansion by an unknown amount.
2976 auto NewBits
= MIRBuilder
.buildConstant(ShiftAmtTy
, NewBitSize
);
2978 Register InL
= MRI
.createGenericVirtualRegister(HalfTy
);
2979 Register InH
= MRI
.createGenericVirtualRegister(HalfTy
);
2980 MIRBuilder
.buildUnmerge({InL
, InH
}, MI
.getOperand(1).getReg());
2982 auto AmtExcess
= MIRBuilder
.buildSub(ShiftAmtTy
, Amt
, NewBits
);
2983 auto AmtLack
= MIRBuilder
.buildSub(ShiftAmtTy
, NewBits
, Amt
);
2985 auto Zero
= MIRBuilder
.buildConstant(ShiftAmtTy
, 0);
2986 auto IsShort
= MIRBuilder
.buildICmp(ICmpInst::ICMP_ULT
, CondTy
, Amt
, NewBits
);
2987 auto IsZero
= MIRBuilder
.buildICmp(ICmpInst::ICMP_EQ
, CondTy
, Amt
, Zero
);
2989 Register ResultRegs
[2];
2990 switch (MI
.getOpcode()) {
2991 case TargetOpcode::G_SHL
: {
2992 // Short: ShAmt < NewBitSize
2993 auto LoS
= MIRBuilder
.buildShl(HalfTy
, InH
, Amt
);
2995 auto OrLHS
= MIRBuilder
.buildShl(HalfTy
, InH
, Amt
);
2996 auto OrRHS
= MIRBuilder
.buildLShr(HalfTy
, InL
, AmtLack
);
2997 auto HiS
= MIRBuilder
.buildOr(HalfTy
, OrLHS
, OrRHS
);
2999 // Long: ShAmt >= NewBitSize
3000 auto LoL
= MIRBuilder
.buildConstant(HalfTy
, 0); // Lo part is zero.
3001 auto HiL
= MIRBuilder
.buildShl(HalfTy
, InL
, AmtExcess
); // Hi from Lo part.
3003 auto Lo
= MIRBuilder
.buildSelect(HalfTy
, IsShort
, LoS
, LoL
);
3004 auto Hi
= MIRBuilder
.buildSelect(
3005 HalfTy
, IsZero
, InH
, MIRBuilder
.buildSelect(HalfTy
, IsShort
, HiS
, HiL
));
3007 ResultRegs
[0] = Lo
.getReg(0);
3008 ResultRegs
[1] = Hi
.getReg(0);
3011 case TargetOpcode::G_LSHR
: {
3012 // Short: ShAmt < NewBitSize
3013 auto HiS
= MIRBuilder
.buildLShr(HalfTy
, InH
, Amt
);
3015 auto OrLHS
= MIRBuilder
.buildLShr(HalfTy
, InL
, Amt
);
3016 auto OrRHS
= MIRBuilder
.buildShl(HalfTy
, InH
, AmtLack
);
3017 auto LoS
= MIRBuilder
.buildOr(HalfTy
, OrLHS
, OrRHS
);
3019 // Long: ShAmt >= NewBitSize
3020 auto HiL
= MIRBuilder
.buildConstant(HalfTy
, 0); // Hi part is zero.
3021 auto LoL
= MIRBuilder
.buildLShr(HalfTy
, InH
, AmtExcess
); // Lo from Hi part.
3023 auto Lo
= MIRBuilder
.buildSelect(
3024 HalfTy
, IsZero
, InL
, MIRBuilder
.buildSelect(HalfTy
, IsShort
, LoS
, LoL
));
3025 auto Hi
= MIRBuilder
.buildSelect(HalfTy
, IsShort
, HiS
, HiL
);
3027 ResultRegs
[0] = Lo
.getReg(0);
3028 ResultRegs
[1] = Hi
.getReg(0);
3031 case TargetOpcode::G_ASHR
: {
3032 // Short: ShAmt < NewBitSize
3033 auto HiS
= MIRBuilder
.buildAShr(HalfTy
, InH
, Amt
);
3035 auto OrLHS
= MIRBuilder
.buildLShr(HalfTy
, InL
, Amt
);
3036 auto OrRHS
= MIRBuilder
.buildLShr(HalfTy
, InH
, AmtLack
);
3037 auto LoS
= MIRBuilder
.buildOr(HalfTy
, OrLHS
, OrRHS
);
3039 // Long: ShAmt >= NewBitSize
3042 auto HiL
= MIRBuilder
.buildAShr(
3043 HalfTy
, InH
, MIRBuilder
.buildConstant(ShiftAmtTy
, NewBitSize
- 1));
3045 auto LoL
= MIRBuilder
.buildAShr(HalfTy
, InH
, AmtExcess
); // Lo from Hi part.
3047 auto Lo
= MIRBuilder
.buildSelect(
3048 HalfTy
, IsZero
, InL
, MIRBuilder
.buildSelect(HalfTy
, IsShort
, LoS
, LoL
));
3050 auto Hi
= MIRBuilder
.buildSelect(HalfTy
, IsShort
, HiS
, HiL
);
3052 ResultRegs
[0] = Lo
.getReg(0);
3053 ResultRegs
[1] = Hi
.getReg(0);
3057 llvm_unreachable("not a shift");
3060 MIRBuilder
.buildMerge(DstReg
, ResultRegs
);
3061 MI
.eraseFromParent();
3065 LegalizerHelper::LegalizeResult
3066 LegalizerHelper::moreElementsVectorPhi(MachineInstr
&MI
, unsigned TypeIdx
,
3068 assert(TypeIdx
== 0 && "Expecting only Idx 0");
3070 Observer
.changingInstr(MI
);
3071 for (unsigned I
= 1, E
= MI
.getNumOperands(); I
!= E
; I
+= 2) {
3072 MachineBasicBlock
&OpMBB
= *MI
.getOperand(I
+ 1).getMBB();
3073 MIRBuilder
.setInsertPt(OpMBB
, OpMBB
.getFirstTerminator());
3074 moreElementsVectorSrc(MI
, MoreTy
, I
);
3077 MachineBasicBlock
&MBB
= *MI
.getParent();
3078 MIRBuilder
.setInsertPt(MBB
, --MBB
.getFirstNonPHI());
3079 moreElementsVectorDst(MI
, MoreTy
, 0);
3080 Observer
.changedInstr(MI
);
3084 LegalizerHelper::LegalizeResult
3085 LegalizerHelper::moreElementsVector(MachineInstr
&MI
, unsigned TypeIdx
,
3087 MIRBuilder
.setInstr(MI
);
3088 unsigned Opc
= MI
.getOpcode();
3090 case TargetOpcode::G_IMPLICIT_DEF
:
3091 case TargetOpcode::G_LOAD
: {
3093 return UnableToLegalize
;
3094 Observer
.changingInstr(MI
);
3095 moreElementsVectorDst(MI
, MoreTy
, 0);
3096 Observer
.changedInstr(MI
);
3099 case TargetOpcode::G_STORE
:
3101 return UnableToLegalize
;
3102 Observer
.changingInstr(MI
);
3103 moreElementsVectorSrc(MI
, MoreTy
, 0);
3104 Observer
.changedInstr(MI
);
3106 case TargetOpcode::G_AND
:
3107 case TargetOpcode::G_OR
:
3108 case TargetOpcode::G_XOR
:
3109 case TargetOpcode::G_SMIN
:
3110 case TargetOpcode::G_SMAX
:
3111 case TargetOpcode::G_UMIN
:
3112 case TargetOpcode::G_UMAX
: {
3113 Observer
.changingInstr(MI
);
3114 moreElementsVectorSrc(MI
, MoreTy
, 1);
3115 moreElementsVectorSrc(MI
, MoreTy
, 2);
3116 moreElementsVectorDst(MI
, MoreTy
, 0);
3117 Observer
.changedInstr(MI
);
3120 case TargetOpcode::G_EXTRACT
:
3122 return UnableToLegalize
;
3123 Observer
.changingInstr(MI
);
3124 moreElementsVectorSrc(MI
, MoreTy
, 1);
3125 Observer
.changedInstr(MI
);
3127 case TargetOpcode::G_INSERT
:
3129 return UnableToLegalize
;
3130 Observer
.changingInstr(MI
);
3131 moreElementsVectorSrc(MI
, MoreTy
, 1);
3132 moreElementsVectorDst(MI
, MoreTy
, 0);
3133 Observer
.changedInstr(MI
);
3135 case TargetOpcode::G_SELECT
:
3137 return UnableToLegalize
;
3138 if (MRI
.getType(MI
.getOperand(1).getReg()).isVector())
3139 return UnableToLegalize
;
3141 Observer
.changingInstr(MI
);
3142 moreElementsVectorSrc(MI
, MoreTy
, 2);
3143 moreElementsVectorSrc(MI
, MoreTy
, 3);
3144 moreElementsVectorDst(MI
, MoreTy
, 0);
3145 Observer
.changedInstr(MI
);
3147 case TargetOpcode::G_PHI
:
3148 return moreElementsVectorPhi(MI
, TypeIdx
, MoreTy
);
3150 return UnableToLegalize
;
3154 void LegalizerHelper::multiplyRegisters(SmallVectorImpl
<Register
> &DstRegs
,
3155 ArrayRef
<Register
> Src1Regs
,
3156 ArrayRef
<Register
> Src2Regs
,
3158 MachineIRBuilder
&B
= MIRBuilder
;
3159 unsigned SrcParts
= Src1Regs
.size();
3160 unsigned DstParts
= DstRegs
.size();
3162 unsigned DstIdx
= 0; // Low bits of the result.
3163 Register FactorSum
=
3164 B
.buildMul(NarrowTy
, Src1Regs
[DstIdx
], Src2Regs
[DstIdx
]).getReg(0);
3165 DstRegs
[DstIdx
] = FactorSum
;
3167 unsigned CarrySumPrevDstIdx
;
3168 SmallVector
<Register
, 4> Factors
;
3170 for (DstIdx
= 1; DstIdx
< DstParts
; DstIdx
++) {
3171 // Collect low parts of muls for DstIdx.
3172 for (unsigned i
= DstIdx
+ 1 < SrcParts
? 0 : DstIdx
- SrcParts
+ 1;
3173 i
<= std::min(DstIdx
, SrcParts
- 1); ++i
) {
3174 MachineInstrBuilder Mul
=
3175 B
.buildMul(NarrowTy
, Src1Regs
[DstIdx
- i
], Src2Regs
[i
]);
3176 Factors
.push_back(Mul
.getReg(0));
3178 // Collect high parts of muls from previous DstIdx.
3179 for (unsigned i
= DstIdx
< SrcParts
? 0 : DstIdx
- SrcParts
;
3180 i
<= std::min(DstIdx
- 1, SrcParts
- 1); ++i
) {
3181 MachineInstrBuilder Umulh
=
3182 B
.buildUMulH(NarrowTy
, Src1Regs
[DstIdx
- 1 - i
], Src2Regs
[i
]);
3183 Factors
.push_back(Umulh
.getReg(0));
3185 // Add CarrySum from additons calculated for previous DstIdx.
3187 Factors
.push_back(CarrySumPrevDstIdx
);
3191 // Add all factors and accumulate all carries into CarrySum.
3192 if (DstIdx
!= DstParts
- 1) {
3193 MachineInstrBuilder Uaddo
=
3194 B
.buildUAddo(NarrowTy
, LLT::scalar(1), Factors
[0], Factors
[1]);
3195 FactorSum
= Uaddo
.getReg(0);
3196 CarrySum
= B
.buildZExt(NarrowTy
, Uaddo
.getReg(1)).getReg(0);
3197 for (unsigned i
= 2; i
< Factors
.size(); ++i
) {
3198 MachineInstrBuilder Uaddo
=
3199 B
.buildUAddo(NarrowTy
, LLT::scalar(1), FactorSum
, Factors
[i
]);
3200 FactorSum
= Uaddo
.getReg(0);
3201 MachineInstrBuilder Carry
= B
.buildZExt(NarrowTy
, Uaddo
.getReg(1));
3202 CarrySum
= B
.buildAdd(NarrowTy
, CarrySum
, Carry
).getReg(0);
3205 // Since value for the next index is not calculated, neither is CarrySum.
3206 FactorSum
= B
.buildAdd(NarrowTy
, Factors
[0], Factors
[1]).getReg(0);
3207 for (unsigned i
= 2; i
< Factors
.size(); ++i
)
3208 FactorSum
= B
.buildAdd(NarrowTy
, FactorSum
, Factors
[i
]).getReg(0);
3211 CarrySumPrevDstIdx
= CarrySum
;
3212 DstRegs
[DstIdx
] = FactorSum
;
3217 LegalizerHelper::LegalizeResult
3218 LegalizerHelper::narrowScalarMul(MachineInstr
&MI
, LLT NarrowTy
) {
3219 Register DstReg
= MI
.getOperand(0).getReg();
3220 Register Src1
= MI
.getOperand(1).getReg();
3221 Register Src2
= MI
.getOperand(2).getReg();
3223 LLT Ty
= MRI
.getType(DstReg
);
3225 return UnableToLegalize
;
3227 unsigned SrcSize
= MRI
.getType(Src1
).getSizeInBits();
3228 unsigned DstSize
= Ty
.getSizeInBits();
3229 unsigned NarrowSize
= NarrowTy
.getSizeInBits();
3230 if (DstSize
% NarrowSize
!= 0 || SrcSize
% NarrowSize
!= 0)
3231 return UnableToLegalize
;
3233 unsigned NumDstParts
= DstSize
/ NarrowSize
;
3234 unsigned NumSrcParts
= SrcSize
/ NarrowSize
;
3235 bool IsMulHigh
= MI
.getOpcode() == TargetOpcode::G_UMULH
;
3236 unsigned DstTmpParts
= NumDstParts
* (IsMulHigh
? 2 : 1);
3238 SmallVector
<Register
, 2> Src1Parts
, Src2Parts
, DstTmpRegs
;
3239 extractParts(Src1
, NarrowTy
, NumSrcParts
, Src1Parts
);
3240 extractParts(Src2
, NarrowTy
, NumSrcParts
, Src2Parts
);
3241 DstTmpRegs
.resize(DstTmpParts
);
3242 multiplyRegisters(DstTmpRegs
, Src1Parts
, Src2Parts
, NarrowTy
);
3244 // Take only high half of registers if this is high mul.
3245 ArrayRef
<Register
> DstRegs(
3246 IsMulHigh
? &DstTmpRegs
[DstTmpParts
/ 2] : &DstTmpRegs
[0], NumDstParts
);
3247 MIRBuilder
.buildMerge(DstReg
, DstRegs
);
3248 MI
.eraseFromParent();
3252 LegalizerHelper::LegalizeResult
3253 LegalizerHelper::narrowScalarExtract(MachineInstr
&MI
, unsigned TypeIdx
,
3256 return UnableToLegalize
;
3258 uint64_t NarrowSize
= NarrowTy
.getSizeInBits();
3260 int64_t SizeOp1
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
3261 // FIXME: add support for when SizeOp1 isn't an exact multiple of
3263 if (SizeOp1
% NarrowSize
!= 0)
3264 return UnableToLegalize
;
3265 int NumParts
= SizeOp1
/ NarrowSize
;
3267 SmallVector
<Register
, 2> SrcRegs
, DstRegs
;
3268 SmallVector
<uint64_t, 2> Indexes
;
3269 extractParts(MI
.getOperand(1).getReg(), NarrowTy
, NumParts
, SrcRegs
);
3271 Register OpReg
= MI
.getOperand(0).getReg();
3272 uint64_t OpStart
= MI
.getOperand(2).getImm();
3273 uint64_t OpSize
= MRI
.getType(OpReg
).getSizeInBits();
3274 for (int i
= 0; i
< NumParts
; ++i
) {
3275 unsigned SrcStart
= i
* NarrowSize
;
3277 if (SrcStart
+ NarrowSize
<= OpStart
|| SrcStart
>= OpStart
+ OpSize
) {
3278 // No part of the extract uses this subregister, ignore it.
3280 } else if (SrcStart
== OpStart
&& NarrowTy
== MRI
.getType(OpReg
)) {
3281 // The entire subregister is extracted, forward the value.
3282 DstRegs
.push_back(SrcRegs
[i
]);
3286 // OpSegStart is where this destination segment would start in OpReg if it
3287 // extended infinitely in both directions.
3288 int64_t ExtractOffset
;
3290 if (OpStart
< SrcStart
) {
3292 SegSize
= std::min(NarrowSize
, OpStart
+ OpSize
- SrcStart
);
3294 ExtractOffset
= OpStart
- SrcStart
;
3295 SegSize
= std::min(SrcStart
+ NarrowSize
- OpStart
, OpSize
);
3298 Register SegReg
= SrcRegs
[i
];
3299 if (ExtractOffset
!= 0 || SegSize
!= NarrowSize
) {
3300 // A genuine extract is needed.
3301 SegReg
= MRI
.createGenericVirtualRegister(LLT::scalar(SegSize
));
3302 MIRBuilder
.buildExtract(SegReg
, SrcRegs
[i
], ExtractOffset
);
3305 DstRegs
.push_back(SegReg
);
3308 Register DstReg
= MI
.getOperand(0).getReg();
3309 if(MRI
.getType(DstReg
).isVector())
3310 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
3312 MIRBuilder
.buildMerge(DstReg
, DstRegs
);
3313 MI
.eraseFromParent();
3317 LegalizerHelper::LegalizeResult
3318 LegalizerHelper::narrowScalarInsert(MachineInstr
&MI
, unsigned TypeIdx
,
3320 // FIXME: Don't know how to handle secondary types yet.
3322 return UnableToLegalize
;
3324 uint64_t SizeOp0
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
3325 uint64_t NarrowSize
= NarrowTy
.getSizeInBits();
3327 // FIXME: add support for when SizeOp0 isn't an exact multiple of
3329 if (SizeOp0
% NarrowSize
!= 0)
3330 return UnableToLegalize
;
3332 int NumParts
= SizeOp0
/ NarrowSize
;
3334 SmallVector
<Register
, 2> SrcRegs
, DstRegs
;
3335 SmallVector
<uint64_t, 2> Indexes
;
3336 extractParts(MI
.getOperand(1).getReg(), NarrowTy
, NumParts
, SrcRegs
);
3338 Register OpReg
= MI
.getOperand(2).getReg();
3339 uint64_t OpStart
= MI
.getOperand(3).getImm();
3340 uint64_t OpSize
= MRI
.getType(OpReg
).getSizeInBits();
3341 for (int i
= 0; i
< NumParts
; ++i
) {
3342 unsigned DstStart
= i
* NarrowSize
;
3344 if (DstStart
+ NarrowSize
<= OpStart
|| DstStart
>= OpStart
+ OpSize
) {
3345 // No part of the insert affects this subregister, forward the original.
3346 DstRegs
.push_back(SrcRegs
[i
]);
3348 } else if (DstStart
== OpStart
&& NarrowTy
== MRI
.getType(OpReg
)) {
3349 // The entire subregister is defined by this insert, forward the new
3351 DstRegs
.push_back(OpReg
);
3355 // OpSegStart is where this destination segment would start in OpReg if it
3356 // extended infinitely in both directions.
3357 int64_t ExtractOffset
, InsertOffset
;
3359 if (OpStart
< DstStart
) {
3361 ExtractOffset
= DstStart
- OpStart
;
3362 SegSize
= std::min(NarrowSize
, OpStart
+ OpSize
- DstStart
);
3364 InsertOffset
= OpStart
- DstStart
;
3367 std::min(NarrowSize
- InsertOffset
, OpStart
+ OpSize
- DstStart
);
3370 Register SegReg
= OpReg
;
3371 if (ExtractOffset
!= 0 || SegSize
!= OpSize
) {
3372 // A genuine extract is needed.
3373 SegReg
= MRI
.createGenericVirtualRegister(LLT::scalar(SegSize
));
3374 MIRBuilder
.buildExtract(SegReg
, OpReg
, ExtractOffset
);
3377 Register DstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
3378 MIRBuilder
.buildInsert(DstReg
, SrcRegs
[i
], SegReg
, InsertOffset
);
3379 DstRegs
.push_back(DstReg
);
3382 assert(DstRegs
.size() == (unsigned)NumParts
&& "not all parts covered");
3383 Register DstReg
= MI
.getOperand(0).getReg();
3384 if(MRI
.getType(DstReg
).isVector())
3385 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
3387 MIRBuilder
.buildMerge(DstReg
, DstRegs
);
3388 MI
.eraseFromParent();
3392 LegalizerHelper::LegalizeResult
3393 LegalizerHelper::narrowScalarBasic(MachineInstr
&MI
, unsigned TypeIdx
,
3395 Register DstReg
= MI
.getOperand(0).getReg();
3396 LLT DstTy
= MRI
.getType(DstReg
);
3398 assert(MI
.getNumOperands() == 3 && TypeIdx
== 0);
3400 SmallVector
<Register
, 4> DstRegs
, DstLeftoverRegs
;
3401 SmallVector
<Register
, 4> Src0Regs
, Src0LeftoverRegs
;
3402 SmallVector
<Register
, 4> Src1Regs
, Src1LeftoverRegs
;
3404 if (!extractParts(MI
.getOperand(1).getReg(), DstTy
, NarrowTy
, LeftoverTy
,
3405 Src0Regs
, Src0LeftoverRegs
))
3406 return UnableToLegalize
;
3409 if (!extractParts(MI
.getOperand(2).getReg(), DstTy
, NarrowTy
, Unused
,
3410 Src1Regs
, Src1LeftoverRegs
))
3411 llvm_unreachable("inconsistent extractParts result");
3413 for (unsigned I
= 0, E
= Src1Regs
.size(); I
!= E
; ++I
) {
3414 auto Inst
= MIRBuilder
.buildInstr(MI
.getOpcode(), {NarrowTy
},
3415 {Src0Regs
[I
], Src1Regs
[I
]});
3416 DstRegs
.push_back(Inst
->getOperand(0).getReg());
3419 for (unsigned I
= 0, E
= Src1LeftoverRegs
.size(); I
!= E
; ++I
) {
3420 auto Inst
= MIRBuilder
.buildInstr(
3422 {LeftoverTy
}, {Src0LeftoverRegs
[I
], Src1LeftoverRegs
[I
]});
3423 DstLeftoverRegs
.push_back(Inst
->getOperand(0).getReg());
3426 insertParts(DstReg
, DstTy
, NarrowTy
, DstRegs
,
3427 LeftoverTy
, DstLeftoverRegs
);
3429 MI
.eraseFromParent();
3433 LegalizerHelper::LegalizeResult
3434 LegalizerHelper::narrowScalarSelect(MachineInstr
&MI
, unsigned TypeIdx
,
3437 return UnableToLegalize
;
3439 Register CondReg
= MI
.getOperand(1).getReg();
3440 LLT CondTy
= MRI
.getType(CondReg
);
3441 if (CondTy
.isVector()) // TODO: Handle vselect
3442 return UnableToLegalize
;
3444 Register DstReg
= MI
.getOperand(0).getReg();
3445 LLT DstTy
= MRI
.getType(DstReg
);
3447 SmallVector
<Register
, 4> DstRegs
, DstLeftoverRegs
;
3448 SmallVector
<Register
, 4> Src1Regs
, Src1LeftoverRegs
;
3449 SmallVector
<Register
, 4> Src2Regs
, Src2LeftoverRegs
;
3451 if (!extractParts(MI
.getOperand(2).getReg(), DstTy
, NarrowTy
, LeftoverTy
,
3452 Src1Regs
, Src1LeftoverRegs
))
3453 return UnableToLegalize
;
3456 if (!extractParts(MI
.getOperand(3).getReg(), DstTy
, NarrowTy
, Unused
,
3457 Src2Regs
, Src2LeftoverRegs
))
3458 llvm_unreachable("inconsistent extractParts result");
3460 for (unsigned I
= 0, E
= Src1Regs
.size(); I
!= E
; ++I
) {
3461 auto Select
= MIRBuilder
.buildSelect(NarrowTy
,
3462 CondReg
, Src1Regs
[I
], Src2Regs
[I
]);
3463 DstRegs
.push_back(Select
->getOperand(0).getReg());
3466 for (unsigned I
= 0, E
= Src1LeftoverRegs
.size(); I
!= E
; ++I
) {
3467 auto Select
= MIRBuilder
.buildSelect(
3468 LeftoverTy
, CondReg
, Src1LeftoverRegs
[I
], Src2LeftoverRegs
[I
]);
3469 DstLeftoverRegs
.push_back(Select
->getOperand(0).getReg());
3472 insertParts(DstReg
, DstTy
, NarrowTy
, DstRegs
,
3473 LeftoverTy
, DstLeftoverRegs
);
3475 MI
.eraseFromParent();
3479 LegalizerHelper::LegalizeResult
3480 LegalizerHelper::lowerBitCount(MachineInstr
&MI
, unsigned TypeIdx
, LLT Ty
) {
3481 unsigned Opc
= MI
.getOpcode();
3482 auto &TII
= *MI
.getMF()->getSubtarget().getInstrInfo();
3483 auto isSupported
= [this](const LegalityQuery
&Q
) {
3484 auto QAction
= LI
.getAction(Q
).Action
;
3485 return QAction
== Legal
|| QAction
== Libcall
|| QAction
== Custom
;
3489 return UnableToLegalize
;
3490 case TargetOpcode::G_CTLZ_ZERO_UNDEF
: {
3491 // This trivially expands to CTLZ.
3492 Observer
.changingInstr(MI
);
3493 MI
.setDesc(TII
.get(TargetOpcode::G_CTLZ
));
3494 Observer
.changedInstr(MI
);
3497 case TargetOpcode::G_CTLZ
: {
3498 Register SrcReg
= MI
.getOperand(1).getReg();
3499 unsigned Len
= Ty
.getSizeInBits();
3500 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF
, {Ty
, Ty
}})) {
3501 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
3502 auto MIBCtlzZU
= MIRBuilder
.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF
,
3504 auto MIBZero
= MIRBuilder
.buildConstant(Ty
, 0);
3505 auto MIBLen
= MIRBuilder
.buildConstant(Ty
, Len
);
3506 auto MIBICmp
= MIRBuilder
.buildICmp(CmpInst::ICMP_EQ
, LLT::scalar(1),
3508 MIRBuilder
.buildSelect(MI
.getOperand(0).getReg(), MIBICmp
, MIBLen
,
3510 MI
.eraseFromParent();
3513 // for now, we do this:
3514 // NewLen = NextPowerOf2(Len);
3515 // x = x | (x >> 1);
3516 // x = x | (x >> 2);
3518 // x = x | (x >>16);
3519 // x = x | (x >>32); // for 64-bit input
3521 // return Len - popcount(x);
3523 // Ref: "Hacker's Delight" by Henry Warren
3524 Register Op
= SrcReg
;
3525 unsigned NewLen
= PowerOf2Ceil(Len
);
3526 for (unsigned i
= 0; (1U << i
) <= (NewLen
/ 2); ++i
) {
3527 auto MIBShiftAmt
= MIRBuilder
.buildConstant(Ty
, 1ULL << i
);
3528 auto MIBOp
= MIRBuilder
.buildInstr(
3529 TargetOpcode::G_OR
, {Ty
},
3530 {Op
, MIRBuilder
.buildInstr(TargetOpcode::G_LSHR
, {Ty
},
3531 {Op
, MIBShiftAmt
})});
3532 Op
= MIBOp
->getOperand(0).getReg();
3534 auto MIBPop
= MIRBuilder
.buildInstr(TargetOpcode::G_CTPOP
, {Ty
}, {Op
});
3535 MIRBuilder
.buildInstr(TargetOpcode::G_SUB
, {MI
.getOperand(0).getReg()},
3536 {MIRBuilder
.buildConstant(Ty
, Len
), MIBPop
});
3537 MI
.eraseFromParent();
3540 case TargetOpcode::G_CTTZ_ZERO_UNDEF
: {
3541 // This trivially expands to CTTZ.
3542 Observer
.changingInstr(MI
);
3543 MI
.setDesc(TII
.get(TargetOpcode::G_CTTZ
));
3544 Observer
.changedInstr(MI
);
3547 case TargetOpcode::G_CTTZ
: {
3548 Register SrcReg
= MI
.getOperand(1).getReg();
3549 unsigned Len
= Ty
.getSizeInBits();
3550 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF
, {Ty
, Ty
}})) {
3551 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
3553 auto MIBCttzZU
= MIRBuilder
.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF
,
3555 auto MIBZero
= MIRBuilder
.buildConstant(Ty
, 0);
3556 auto MIBLen
= MIRBuilder
.buildConstant(Ty
, Len
);
3557 auto MIBICmp
= MIRBuilder
.buildICmp(CmpInst::ICMP_EQ
, LLT::scalar(1),
3559 MIRBuilder
.buildSelect(MI
.getOperand(0).getReg(), MIBICmp
, MIBLen
,
3561 MI
.eraseFromParent();
3564 // for now, we use: { return popcount(~x & (x - 1)); }
3565 // unless the target has ctlz but not ctpop, in which case we use:
3566 // { return 32 - nlz(~x & (x-1)); }
3567 // Ref: "Hacker's Delight" by Henry Warren
3568 auto MIBCstNeg1
= MIRBuilder
.buildConstant(Ty
, -1);
3570 MIRBuilder
.buildInstr(TargetOpcode::G_XOR
, {Ty
}, {SrcReg
, MIBCstNeg1
});
3571 auto MIBTmp
= MIRBuilder
.buildInstr(
3572 TargetOpcode::G_AND
, {Ty
},
3573 {MIBNot
, MIRBuilder
.buildInstr(TargetOpcode::G_ADD
, {Ty
},
3574 {SrcReg
, MIBCstNeg1
})});
3575 if (!isSupported({TargetOpcode::G_CTPOP
, {Ty
, Ty
}}) &&
3576 isSupported({TargetOpcode::G_CTLZ
, {Ty
, Ty
}})) {
3577 auto MIBCstLen
= MIRBuilder
.buildConstant(Ty
, Len
);
3578 MIRBuilder
.buildInstr(
3579 TargetOpcode::G_SUB
, {MI
.getOperand(0).getReg()},
3581 MIRBuilder
.buildInstr(TargetOpcode::G_CTLZ
, {Ty
}, {MIBTmp
})});
3582 MI
.eraseFromParent();
3585 MI
.setDesc(TII
.get(TargetOpcode::G_CTPOP
));
3586 MI
.getOperand(1).setReg(MIBTmp
->getOperand(0).getReg());
3592 // Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
3594 LegalizerHelper::LegalizeResult
3595 LegalizerHelper::lowerU64ToF32BitOps(MachineInstr
&MI
) {
3596 Register Dst
= MI
.getOperand(0).getReg();
3597 Register Src
= MI
.getOperand(1).getReg();
3598 const LLT S64
= LLT::scalar(64);
3599 const LLT S32
= LLT::scalar(32);
3600 const LLT S1
= LLT::scalar(1);
3602 assert(MRI
.getType(Src
) == S64
&& MRI
.getType(Dst
) == S32
);
3604 // unsigned cul2f(ulong u) {
3605 // uint lz = clz(u);
3606 // uint e = (u != 0) ? 127U + 63U - lz : 0;
3607 // u = (u << lz) & 0x7fffffffffffffffUL;
3608 // ulong t = u & 0xffffffffffUL;
3609 // uint v = (e << 23) | (uint)(u >> 40);
3610 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
3611 // return as_float(v + r);
3614 auto Zero32
= MIRBuilder
.buildConstant(S32
, 0);
3615 auto Zero64
= MIRBuilder
.buildConstant(S64
, 0);
3617 auto LZ
= MIRBuilder
.buildCTLZ_ZERO_UNDEF(S32
, Src
);
3619 auto K
= MIRBuilder
.buildConstant(S32
, 127U + 63U);
3620 auto Sub
= MIRBuilder
.buildSub(S32
, K
, LZ
);
3622 auto NotZero
= MIRBuilder
.buildICmp(CmpInst::ICMP_NE
, S1
, Src
, Zero64
);
3623 auto E
= MIRBuilder
.buildSelect(S32
, NotZero
, Sub
, Zero32
);
3625 auto Mask0
= MIRBuilder
.buildConstant(S64
, (-1ULL) >> 1);
3626 auto ShlLZ
= MIRBuilder
.buildShl(S64
, Src
, LZ
);
3628 auto U
= MIRBuilder
.buildAnd(S64
, ShlLZ
, Mask0
);
3630 auto Mask1
= MIRBuilder
.buildConstant(S64
, 0xffffffffffULL
);
3631 auto T
= MIRBuilder
.buildAnd(S64
, U
, Mask1
);
3633 auto UShl
= MIRBuilder
.buildLShr(S64
, U
, MIRBuilder
.buildConstant(S64
, 40));
3634 auto ShlE
= MIRBuilder
.buildShl(S32
, E
, MIRBuilder
.buildConstant(S32
, 23));
3635 auto V
= MIRBuilder
.buildOr(S32
, ShlE
, MIRBuilder
.buildTrunc(S32
, UShl
));
3637 auto C
= MIRBuilder
.buildConstant(S64
, 0x8000000000ULL
);
3638 auto RCmp
= MIRBuilder
.buildICmp(CmpInst::ICMP_UGT
, S1
, T
, C
);
3639 auto TCmp
= MIRBuilder
.buildICmp(CmpInst::ICMP_EQ
, S1
, T
, C
);
3640 auto One
= MIRBuilder
.buildConstant(S32
, 1);
3642 auto VTrunc1
= MIRBuilder
.buildAnd(S32
, V
, One
);
3643 auto Select0
= MIRBuilder
.buildSelect(S32
, TCmp
, VTrunc1
, Zero32
);
3644 auto R
= MIRBuilder
.buildSelect(S32
, RCmp
, One
, Select0
);
3645 MIRBuilder
.buildAdd(Dst
, V
, R
);
3650 LegalizerHelper::LegalizeResult
3651 LegalizerHelper::lowerUITOFP(MachineInstr
&MI
, unsigned TypeIdx
, LLT Ty
) {
3652 Register Dst
= MI
.getOperand(0).getReg();
3653 Register Src
= MI
.getOperand(1).getReg();
3654 LLT DstTy
= MRI
.getType(Dst
);
3655 LLT SrcTy
= MRI
.getType(Src
);
3657 if (SrcTy
!= LLT::scalar(64))
3658 return UnableToLegalize
;
3660 if (DstTy
== LLT::scalar(32)) {
3661 // TODO: SelectionDAG has several alternative expansions to port which may
3662 // be more reasonble depending on the available instructions. If a target
3663 // has sitofp, does not have CTLZ, or can efficiently use f64 as an
3664 // intermediate type, this is probably worse.
3665 return lowerU64ToF32BitOps(MI
);
3668 return UnableToLegalize
;
3671 LegalizerHelper::LegalizeResult
3672 LegalizerHelper::lowerSITOFP(MachineInstr
&MI
, unsigned TypeIdx
, LLT Ty
) {
3673 Register Dst
= MI
.getOperand(0).getReg();
3674 Register Src
= MI
.getOperand(1).getReg();
3675 LLT DstTy
= MRI
.getType(Dst
);
3676 LLT SrcTy
= MRI
.getType(Src
);
3678 const LLT S64
= LLT::scalar(64);
3679 const LLT S32
= LLT::scalar(32);
3680 const LLT S1
= LLT::scalar(1);
3683 return UnableToLegalize
;
3686 // signed cl2f(long l) {
3687 // long s = l >> 63;
3688 // float r = cul2f((l + s) ^ s);
3689 // return s ? -r : r;
3692 auto SignBit
= MIRBuilder
.buildConstant(S64
, 63);
3693 auto S
= MIRBuilder
.buildAShr(S64
, L
, SignBit
);
3695 auto LPlusS
= MIRBuilder
.buildAdd(S64
, L
, S
);
3696 auto Xor
= MIRBuilder
.buildXor(S64
, LPlusS
, S
);
3697 auto R
= MIRBuilder
.buildUITOFP(S32
, Xor
);
3699 auto RNeg
= MIRBuilder
.buildFNeg(S32
, R
);
3700 auto SignNotZero
= MIRBuilder
.buildICmp(CmpInst::ICMP_NE
, S1
, S
,
3701 MIRBuilder
.buildConstant(S64
, 0));
3702 MIRBuilder
.buildSelect(Dst
, SignNotZero
, RNeg
, R
);
3706 return UnableToLegalize
;
3709 static CmpInst::Predicate
minMaxToCompare(unsigned Opc
) {
3711 case TargetOpcode::G_SMIN
:
3712 return CmpInst::ICMP_SLT
;
3713 case TargetOpcode::G_SMAX
:
3714 return CmpInst::ICMP_SGT
;
3715 case TargetOpcode::G_UMIN
:
3716 return CmpInst::ICMP_ULT
;
3717 case TargetOpcode::G_UMAX
:
3718 return CmpInst::ICMP_UGT
;
3720 llvm_unreachable("not in integer min/max");
3724 LegalizerHelper::LegalizeResult
3725 LegalizerHelper::lowerMinMax(MachineInstr
&MI
, unsigned TypeIdx
, LLT Ty
) {
3726 Register Dst
= MI
.getOperand(0).getReg();
3727 Register Src0
= MI
.getOperand(1).getReg();
3728 Register Src1
= MI
.getOperand(2).getReg();
3730 const CmpInst::Predicate Pred
= minMaxToCompare(MI
.getOpcode());
3731 LLT CmpType
= MRI
.getType(Dst
).changeElementSize(1);
3733 auto Cmp
= MIRBuilder
.buildICmp(Pred
, CmpType
, Src0
, Src1
);
3734 MIRBuilder
.buildSelect(Dst
, Cmp
, Src0
, Src1
);
3736 MI
.eraseFromParent();
3740 LegalizerHelper::LegalizeResult
3741 LegalizerHelper::lowerFCopySign(MachineInstr
&MI
, unsigned TypeIdx
, LLT Ty
) {
3742 Register Dst
= MI
.getOperand(0).getReg();
3743 Register Src0
= MI
.getOperand(1).getReg();
3744 Register Src1
= MI
.getOperand(2).getReg();
3746 const LLT Src0Ty
= MRI
.getType(Src0
);
3747 const LLT Src1Ty
= MRI
.getType(Src1
);
3749 const int Src0Size
= Src0Ty
.getScalarSizeInBits();
3750 const int Src1Size
= Src1Ty
.getScalarSizeInBits();
3752 auto SignBitMask
= MIRBuilder
.buildConstant(
3753 Src0Ty
, APInt::getSignMask(Src0Size
));
3755 auto NotSignBitMask
= MIRBuilder
.buildConstant(
3756 Src0Ty
, APInt::getLowBitsSet(Src0Size
, Src0Size
- 1));
3758 auto And0
= MIRBuilder
.buildAnd(Src0Ty
, Src0
, NotSignBitMask
);
3761 if (Src0Ty
== Src1Ty
) {
3762 auto And1
= MIRBuilder
.buildAnd(Src1Ty
, Src0
, SignBitMask
);
3763 Or
= MIRBuilder
.buildOr(Dst
, And0
, And1
);
3764 } else if (Src0Size
> Src1Size
) {
3765 auto ShiftAmt
= MIRBuilder
.buildConstant(Src0Ty
, Src0Size
- Src1Size
);
3766 auto Zext
= MIRBuilder
.buildZExt(Src0Ty
, Src1
);
3767 auto Shift
= MIRBuilder
.buildShl(Src0Ty
, Zext
, ShiftAmt
);
3768 auto And1
= MIRBuilder
.buildAnd(Src0Ty
, Shift
, SignBitMask
);
3769 Or
= MIRBuilder
.buildOr(Dst
, And0
, And1
);
3771 auto ShiftAmt
= MIRBuilder
.buildConstant(Src1Ty
, Src1Size
- Src0Size
);
3772 auto Shift
= MIRBuilder
.buildLShr(Src1Ty
, Src1
, ShiftAmt
);
3773 auto Trunc
= MIRBuilder
.buildTrunc(Src0Ty
, Shift
);
3774 auto And1
= MIRBuilder
.buildAnd(Src0Ty
, Trunc
, SignBitMask
);
3775 Or
= MIRBuilder
.buildOr(Dst
, And0
, And1
);
3778 // Be careful about setting nsz/nnan/ninf on every instruction, since the
3779 // constants are a nan and -0.0, but the final result should preserve
3781 if (unsigned Flags
= MI
.getFlags())
3782 Or
->setFlags(Flags
);
3784 MI
.eraseFromParent();
3788 LegalizerHelper::LegalizeResult
3789 LegalizerHelper::lowerFMinNumMaxNum(MachineInstr
&MI
) {
3790 unsigned NewOp
= MI
.getOpcode() == TargetOpcode::G_FMINNUM
?
3791 TargetOpcode::G_FMINNUM_IEEE
: TargetOpcode::G_FMAXNUM_IEEE
;
3793 Register Dst
= MI
.getOperand(0).getReg();
3794 Register Src0
= MI
.getOperand(1).getReg();
3795 Register Src1
= MI
.getOperand(2).getReg();
3796 LLT Ty
= MRI
.getType(Dst
);
3798 if (!MI
.getFlag(MachineInstr::FmNoNans
)) {
3799 // Insert canonicalizes if it's possible we need to quiet to get correct
3802 // Note this must be done here, and not as an optimization combine in the
3803 // absence of a dedicate quiet-snan instruction as we're using an
3804 // omni-purpose G_FCANONICALIZE.
3805 if (!isKnownNeverSNaN(Src0
, MRI
))
3806 Src0
= MIRBuilder
.buildFCanonicalize(Ty
, Src0
, MI
.getFlags()).getReg(0);
3808 if (!isKnownNeverSNaN(Src1
, MRI
))
3809 Src1
= MIRBuilder
.buildFCanonicalize(Ty
, Src1
, MI
.getFlags()).getReg(0);
3812 // If there are no nans, it's safe to simply replace this with the non-IEEE
3814 MIRBuilder
.buildInstr(NewOp
, {Dst
}, {Src0
, Src1
}, MI
.getFlags());
3815 MI
.eraseFromParent();
3819 LegalizerHelper::LegalizeResult
3820 LegalizerHelper::lowerUnmergeValues(MachineInstr
&MI
) {
3821 const unsigned NumDst
= MI
.getNumOperands() - 1;
3822 const Register SrcReg
= MI
.getOperand(NumDst
).getReg();
3823 LLT SrcTy
= MRI
.getType(SrcReg
);
3825 Register Dst0Reg
= MI
.getOperand(0).getReg();
3826 LLT DstTy
= MRI
.getType(Dst0Reg
);
3829 // Expand scalarizing unmerge as bitcast to integer and shift.
3830 if (!DstTy
.isVector() && SrcTy
.isVector() &&
3831 SrcTy
.getElementType() == DstTy
) {
3832 LLT IntTy
= LLT::scalar(SrcTy
.getSizeInBits());
3833 Register Cast
= MIRBuilder
.buildBitcast(IntTy
, SrcReg
).getReg(0);
3835 MIRBuilder
.buildTrunc(Dst0Reg
, Cast
);
3837 const unsigned DstSize
= DstTy
.getSizeInBits();
3838 unsigned Offset
= DstSize
;
3839 for (unsigned I
= 1; I
!= NumDst
; ++I
, Offset
+= DstSize
) {
3840 auto ShiftAmt
= MIRBuilder
.buildConstant(IntTy
, Offset
);
3841 auto Shift
= MIRBuilder
.buildLShr(IntTy
, Cast
, ShiftAmt
);
3842 MIRBuilder
.buildTrunc(MI
.getOperand(I
), Shift
);
3845 MI
.eraseFromParent();
3849 return UnableToLegalize
;
3852 LegalizerHelper::LegalizeResult
3853 LegalizerHelper::lowerShuffleVector(MachineInstr
&MI
) {
3854 Register DstReg
= MI
.getOperand(0).getReg();
3855 Register Src0Reg
= MI
.getOperand(1).getReg();
3856 Register Src1Reg
= MI
.getOperand(2).getReg();
3857 LLT Src0Ty
= MRI
.getType(Src0Reg
);
3858 LLT DstTy
= MRI
.getType(DstReg
);
3859 LLT IdxTy
= LLT::scalar(32);
3861 const Constant
*ShufMask
= MI
.getOperand(3).getShuffleMask();
3863 SmallVector
<int, 32> Mask
;
3864 ShuffleVectorInst::getShuffleMask(ShufMask
, Mask
);
3866 if (DstTy
.isScalar()) {
3867 if (Src0Ty
.isVector())
3868 return UnableToLegalize
;
3870 // This is just a SELECT.
3871 assert(Mask
.size() == 1 && "Expected a single mask element");
3873 if (Mask
[0] < 0 || Mask
[0] > 1)
3874 Val
= MIRBuilder
.buildUndef(DstTy
).getReg(0);
3876 Val
= Mask
[0] == 0 ? Src0Reg
: Src1Reg
;
3877 MIRBuilder
.buildCopy(DstReg
, Val
);
3878 MI
.eraseFromParent();
3883 SmallVector
<Register
, 32> BuildVec
;
3884 LLT EltTy
= DstTy
.getElementType();
3886 for (int Idx
: Mask
) {
3888 if (!Undef
.isValid())
3889 Undef
= MIRBuilder
.buildUndef(EltTy
).getReg(0);
3890 BuildVec
.push_back(Undef
);
3894 if (Src0Ty
.isScalar()) {
3895 BuildVec
.push_back(Idx
== 0 ? Src0Reg
: Src1Reg
);
3897 int NumElts
= Src0Ty
.getNumElements();
3898 Register SrcVec
= Idx
< NumElts
? Src0Reg
: Src1Reg
;
3899 int ExtractIdx
= Idx
< NumElts
? Idx
: Idx
- NumElts
;
3900 auto IdxK
= MIRBuilder
.buildConstant(IdxTy
, ExtractIdx
);
3901 auto Extract
= MIRBuilder
.buildExtractVectorElement(EltTy
, SrcVec
, IdxK
);
3902 BuildVec
.push_back(Extract
.getReg(0));
3906 MIRBuilder
.buildBuildVector(DstReg
, BuildVec
);
3907 MI
.eraseFromParent();