1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 /// \file This file implements the LegalizerHelper class to legalize
11 /// individual instructions and the LegalizeMachineIR wrapper pass for the
12 /// primary legalization.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
17 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/TargetInstrInfo.h"
21 #include "llvm/CodeGen/TargetLowering.h"
22 #include "llvm/CodeGen/TargetSubtargetInfo.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/MathExtras.h"
25 #include "llvm/Support/raw_ostream.h"
27 #define DEBUG_TYPE "legalizer"
30 using namespace LegalizeActions
;
32 LegalizerHelper::LegalizerHelper(MachineFunction
&MF
)
33 : MRI(MF
.getRegInfo()), LI(*MF
.getSubtarget().getLegalizerInfo()) {
37 LegalizerHelper::LegalizerHelper(MachineFunction
&MF
, const LegalizerInfo
&LI
)
38 : MRI(MF
.getRegInfo()), LI(LI
) {
41 LegalizerHelper::LegalizeResult
42 LegalizerHelper::legalizeInstrStep(MachineInstr
&MI
) {
43 LLVM_DEBUG(dbgs() << "Legalizing: "; MI
.print(dbgs()));
45 auto Step
= LI
.getAction(MI
, MRI
);
46 switch (Step
.Action
) {
48 LLVM_DEBUG(dbgs() << ".. Already legal\n");
51 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
54 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
55 return narrowScalar(MI
, Step
.TypeIdx
, Step
.NewType
);
57 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
58 return widenScalar(MI
, Step
.TypeIdx
, Step
.NewType
);
60 LLVM_DEBUG(dbgs() << ".. Lower\n");
61 return lower(MI
, Step
.TypeIdx
, Step
.NewType
);
63 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
64 return fewerElementsVector(MI
, Step
.TypeIdx
, Step
.NewType
);
66 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
67 return LI
.legalizeCustom(MI
, MRI
, MIRBuilder
) ? Legalized
70 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
71 return UnableToLegalize
;
75 void LegalizerHelper::extractParts(unsigned Reg
, LLT Ty
, int NumParts
,
76 SmallVectorImpl
<unsigned> &VRegs
) {
77 for (int i
= 0; i
< NumParts
; ++i
)
78 VRegs
.push_back(MRI
.createGenericVirtualRegister(Ty
));
79 MIRBuilder
.buildUnmerge(VRegs
, Reg
);
82 static RTLIB::Libcall
getRTLibDesc(unsigned Opcode
, unsigned Size
) {
84 case TargetOpcode::G_SDIV
:
85 assert(Size
== 32 && "Unsupported size");
86 return RTLIB::SDIV_I32
;
87 case TargetOpcode::G_UDIV
:
88 assert(Size
== 32 && "Unsupported size");
89 return RTLIB::UDIV_I32
;
90 case TargetOpcode::G_SREM
:
91 assert(Size
== 32 && "Unsupported size");
92 return RTLIB::SREM_I32
;
93 case TargetOpcode::G_UREM
:
94 assert(Size
== 32 && "Unsupported size");
95 return RTLIB::UREM_I32
;
96 case TargetOpcode::G_FADD
:
97 assert((Size
== 32 || Size
== 64) && "Unsupported size");
98 return Size
== 64 ? RTLIB::ADD_F64
: RTLIB::ADD_F32
;
99 case TargetOpcode::G_FSUB
:
100 assert((Size
== 32 || Size
== 64) && "Unsupported size");
101 return Size
== 64 ? RTLIB::SUB_F64
: RTLIB::SUB_F32
;
102 case TargetOpcode::G_FMUL
:
103 assert((Size
== 32 || Size
== 64) && "Unsupported size");
104 return Size
== 64 ? RTLIB::MUL_F64
: RTLIB::MUL_F32
;
105 case TargetOpcode::G_FDIV
:
106 assert((Size
== 32 || Size
== 64) && "Unsupported size");
107 return Size
== 64 ? RTLIB::DIV_F64
: RTLIB::DIV_F32
;
108 case TargetOpcode::G_FREM
:
109 return Size
== 64 ? RTLIB::REM_F64
: RTLIB::REM_F32
;
110 case TargetOpcode::G_FPOW
:
111 return Size
== 64 ? RTLIB::POW_F64
: RTLIB::POW_F32
;
112 case TargetOpcode::G_FMA
:
113 assert((Size
== 32 || Size
== 64) && "Unsupported size");
114 return Size
== 64 ? RTLIB::FMA_F64
: RTLIB::FMA_F32
;
116 llvm_unreachable("Unknown libcall function");
119 LegalizerHelper::LegalizeResult
120 llvm::createLibcall(MachineIRBuilder
&MIRBuilder
, RTLIB::Libcall Libcall
,
121 const CallLowering::ArgInfo
&Result
,
122 ArrayRef
<CallLowering::ArgInfo
> Args
) {
123 auto &CLI
= *MIRBuilder
.getMF().getSubtarget().getCallLowering();
124 auto &TLI
= *MIRBuilder
.getMF().getSubtarget().getTargetLowering();
125 const char *Name
= TLI
.getLibcallName(Libcall
);
127 MIRBuilder
.getMF().getFrameInfo().setHasCalls(true);
128 if (!CLI
.lowerCall(MIRBuilder
, TLI
.getLibcallCallingConv(Libcall
),
129 MachineOperand::CreateES(Name
), Result
, Args
))
130 return LegalizerHelper::UnableToLegalize
;
132 return LegalizerHelper::Legalized
;
135 // Useful for libcalls where all operands have the same type.
136 static LegalizerHelper::LegalizeResult
137 simpleLibcall(MachineInstr
&MI
, MachineIRBuilder
&MIRBuilder
, unsigned Size
,
139 auto Libcall
= getRTLibDesc(MI
.getOpcode(), Size
);
141 SmallVector
<CallLowering::ArgInfo
, 3> Args
;
142 for (unsigned i
= 1; i
< MI
.getNumOperands(); i
++)
143 Args
.push_back({MI
.getOperand(i
).getReg(), OpType
});
144 return createLibcall(MIRBuilder
, Libcall
, {MI
.getOperand(0).getReg(), OpType
},
148 static RTLIB::Libcall
getConvRTLibDesc(unsigned Opcode
, Type
*ToType
,
150 auto ToMVT
= MVT::getVT(ToType
);
151 auto FromMVT
= MVT::getVT(FromType
);
154 case TargetOpcode::G_FPEXT
:
155 return RTLIB::getFPEXT(FromMVT
, ToMVT
);
156 case TargetOpcode::G_FPTRUNC
:
157 return RTLIB::getFPROUND(FromMVT
, ToMVT
);
158 case TargetOpcode::G_FPTOSI
:
159 return RTLIB::getFPTOSINT(FromMVT
, ToMVT
);
160 case TargetOpcode::G_FPTOUI
:
161 return RTLIB::getFPTOUINT(FromMVT
, ToMVT
);
162 case TargetOpcode::G_SITOFP
:
163 return RTLIB::getSINTTOFP(FromMVT
, ToMVT
);
164 case TargetOpcode::G_UITOFP
:
165 return RTLIB::getUINTTOFP(FromMVT
, ToMVT
);
167 llvm_unreachable("Unsupported libcall function");
170 static LegalizerHelper::LegalizeResult
171 conversionLibcall(MachineInstr
&MI
, MachineIRBuilder
&MIRBuilder
, Type
*ToType
,
173 RTLIB::Libcall Libcall
= getConvRTLibDesc(MI
.getOpcode(), ToType
, FromType
);
174 return createLibcall(MIRBuilder
, Libcall
, {MI
.getOperand(0).getReg(), ToType
},
175 {{MI
.getOperand(1).getReg(), FromType
}});
178 LegalizerHelper::LegalizeResult
179 LegalizerHelper::libcall(MachineInstr
&MI
) {
180 LLT LLTy
= MRI
.getType(MI
.getOperand(0).getReg());
181 unsigned Size
= LLTy
.getSizeInBits();
182 auto &Ctx
= MIRBuilder
.getMF().getFunction().getContext();
184 MIRBuilder
.setInstr(MI
);
186 switch (MI
.getOpcode()) {
188 return UnableToLegalize
;
189 case TargetOpcode::G_SDIV
:
190 case TargetOpcode::G_UDIV
:
191 case TargetOpcode::G_SREM
:
192 case TargetOpcode::G_UREM
: {
193 Type
*HLTy
= Type::getInt32Ty(Ctx
);
194 auto Status
= simpleLibcall(MI
, MIRBuilder
, Size
, HLTy
);
195 if (Status
!= Legalized
)
199 case TargetOpcode::G_FADD
:
200 case TargetOpcode::G_FSUB
:
201 case TargetOpcode::G_FMUL
:
202 case TargetOpcode::G_FDIV
:
203 case TargetOpcode::G_FMA
:
204 case TargetOpcode::G_FPOW
:
205 case TargetOpcode::G_FREM
: {
206 Type
*HLTy
= Size
== 64 ? Type::getDoubleTy(Ctx
) : Type::getFloatTy(Ctx
);
207 auto Status
= simpleLibcall(MI
, MIRBuilder
, Size
, HLTy
);
208 if (Status
!= Legalized
)
212 case TargetOpcode::G_FPEXT
: {
213 // FIXME: Support other floating point types (half, fp128 etc)
214 unsigned FromSize
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
215 unsigned ToSize
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
216 if (ToSize
!= 64 || FromSize
!= 32)
217 return UnableToLegalize
;
218 LegalizeResult Status
= conversionLibcall(
219 MI
, MIRBuilder
, Type::getDoubleTy(Ctx
), Type::getFloatTy(Ctx
));
220 if (Status
!= Legalized
)
224 case TargetOpcode::G_FPTRUNC
: {
225 // FIXME: Support other floating point types (half, fp128 etc)
226 unsigned FromSize
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
227 unsigned ToSize
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
228 if (ToSize
!= 32 || FromSize
!= 64)
229 return UnableToLegalize
;
230 LegalizeResult Status
= conversionLibcall(
231 MI
, MIRBuilder
, Type::getFloatTy(Ctx
), Type::getDoubleTy(Ctx
));
232 if (Status
!= Legalized
)
236 case TargetOpcode::G_FPTOSI
:
237 case TargetOpcode::G_FPTOUI
: {
238 // FIXME: Support other types
239 unsigned FromSize
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
240 unsigned ToSize
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
241 if (ToSize
!= 32 || (FromSize
!= 32 && FromSize
!= 64))
242 return UnableToLegalize
;
243 LegalizeResult Status
= conversionLibcall(
244 MI
, MIRBuilder
, Type::getInt32Ty(Ctx
),
245 FromSize
== 64 ? Type::getDoubleTy(Ctx
) : Type::getFloatTy(Ctx
));
246 if (Status
!= Legalized
)
250 case TargetOpcode::G_SITOFP
:
251 case TargetOpcode::G_UITOFP
: {
252 // FIXME: Support other types
253 unsigned FromSize
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
254 unsigned ToSize
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
255 if (FromSize
!= 32 || (ToSize
!= 32 && ToSize
!= 64))
256 return UnableToLegalize
;
257 LegalizeResult Status
= conversionLibcall(
259 ToSize
== 64 ? Type::getDoubleTy(Ctx
) : Type::getFloatTy(Ctx
),
260 Type::getInt32Ty(Ctx
));
261 if (Status
!= Legalized
)
267 MI
.eraseFromParent();
271 LegalizerHelper::LegalizeResult
LegalizerHelper::narrowScalar(MachineInstr
&MI
,
274 // FIXME: Don't know how to handle secondary types yet.
275 if (TypeIdx
!= 0 && MI
.getOpcode() != TargetOpcode::G_EXTRACT
)
276 return UnableToLegalize
;
278 MIRBuilder
.setInstr(MI
);
280 uint64_t SizeOp0
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
281 uint64_t NarrowSize
= NarrowTy
.getSizeInBits();
283 switch (MI
.getOpcode()) {
285 return UnableToLegalize
;
286 case TargetOpcode::G_IMPLICIT_DEF
: {
287 // FIXME: add support for when SizeOp0 isn't an exact multiple of
289 if (SizeOp0
% NarrowSize
!= 0)
290 return UnableToLegalize
;
291 int NumParts
= SizeOp0
/ NarrowSize
;
293 SmallVector
<unsigned, 2> DstRegs
;
294 for (int i
= 0; i
< NumParts
; ++i
)
296 MIRBuilder
.buildUndef(NarrowTy
)->getOperand(0).getReg());
297 MIRBuilder
.buildMerge(MI
.getOperand(0).getReg(), DstRegs
);
298 MI
.eraseFromParent();
301 case TargetOpcode::G_ADD
: {
302 // FIXME: add support for when SizeOp0 isn't an exact multiple of
304 if (SizeOp0
% NarrowSize
!= 0)
305 return UnableToLegalize
;
306 // Expand in terms of carry-setting/consuming G_ADDE instructions.
307 int NumParts
= SizeOp0
/ NarrowTy
.getSizeInBits();
309 SmallVector
<unsigned, 2> Src1Regs
, Src2Regs
, DstRegs
;
310 extractParts(MI
.getOperand(1).getReg(), NarrowTy
, NumParts
, Src1Regs
);
311 extractParts(MI
.getOperand(2).getReg(), NarrowTy
, NumParts
, Src2Regs
);
313 unsigned CarryIn
= MRI
.createGenericVirtualRegister(LLT::scalar(1));
314 MIRBuilder
.buildConstant(CarryIn
, 0);
316 for (int i
= 0; i
< NumParts
; ++i
) {
317 unsigned DstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
318 unsigned CarryOut
= MRI
.createGenericVirtualRegister(LLT::scalar(1));
320 MIRBuilder
.buildUAdde(DstReg
, CarryOut
, Src1Regs
[i
],
321 Src2Regs
[i
], CarryIn
);
323 DstRegs
.push_back(DstReg
);
326 unsigned DstReg
= MI
.getOperand(0).getReg();
327 MIRBuilder
.buildMerge(DstReg
, DstRegs
);
328 MI
.eraseFromParent();
331 case TargetOpcode::G_EXTRACT
: {
333 return UnableToLegalize
;
335 int64_t SizeOp1
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
336 // FIXME: add support for when SizeOp1 isn't an exact multiple of
338 if (SizeOp1
% NarrowSize
!= 0)
339 return UnableToLegalize
;
340 int NumParts
= SizeOp1
/ NarrowSize
;
342 SmallVector
<unsigned, 2> SrcRegs
, DstRegs
;
343 SmallVector
<uint64_t, 2> Indexes
;
344 extractParts(MI
.getOperand(1).getReg(), NarrowTy
, NumParts
, SrcRegs
);
346 unsigned OpReg
= MI
.getOperand(0).getReg();
347 uint64_t OpStart
= MI
.getOperand(2).getImm();
348 uint64_t OpSize
= MRI
.getType(OpReg
).getSizeInBits();
349 for (int i
= 0; i
< NumParts
; ++i
) {
350 unsigned SrcStart
= i
* NarrowSize
;
352 if (SrcStart
+ NarrowSize
<= OpStart
|| SrcStart
>= OpStart
+ OpSize
) {
353 // No part of the extract uses this subregister, ignore it.
355 } else if (SrcStart
== OpStart
&& NarrowTy
== MRI
.getType(OpReg
)) {
356 // The entire subregister is extracted, forward the value.
357 DstRegs
.push_back(SrcRegs
[i
]);
361 // OpSegStart is where this destination segment would start in OpReg if it
362 // extended infinitely in both directions.
363 int64_t ExtractOffset
;
365 if (OpStart
< SrcStart
) {
367 SegSize
= std::min(NarrowSize
, OpStart
+ OpSize
- SrcStart
);
369 ExtractOffset
= OpStart
- SrcStart
;
370 SegSize
= std::min(SrcStart
+ NarrowSize
- OpStart
, OpSize
);
373 unsigned SegReg
= SrcRegs
[i
];
374 if (ExtractOffset
!= 0 || SegSize
!= NarrowSize
) {
375 // A genuine extract is needed.
376 SegReg
= MRI
.createGenericVirtualRegister(LLT::scalar(SegSize
));
377 MIRBuilder
.buildExtract(SegReg
, SrcRegs
[i
], ExtractOffset
);
380 DstRegs
.push_back(SegReg
);
383 MIRBuilder
.buildMerge(MI
.getOperand(0).getReg(), DstRegs
);
384 MI
.eraseFromParent();
387 case TargetOpcode::G_INSERT
: {
388 // FIXME: add support for when SizeOp0 isn't an exact multiple of
390 if (SizeOp0
% NarrowSize
!= 0)
391 return UnableToLegalize
;
393 int NumParts
= SizeOp0
/ NarrowSize
;
395 SmallVector
<unsigned, 2> SrcRegs
, DstRegs
;
396 SmallVector
<uint64_t, 2> Indexes
;
397 extractParts(MI
.getOperand(1).getReg(), NarrowTy
, NumParts
, SrcRegs
);
399 unsigned OpReg
= MI
.getOperand(2).getReg();
400 uint64_t OpStart
= MI
.getOperand(3).getImm();
401 uint64_t OpSize
= MRI
.getType(OpReg
).getSizeInBits();
402 for (int i
= 0; i
< NumParts
; ++i
) {
403 unsigned DstStart
= i
* NarrowSize
;
405 if (DstStart
+ NarrowSize
<= OpStart
|| DstStart
>= OpStart
+ OpSize
) {
406 // No part of the insert affects this subregister, forward the original.
407 DstRegs
.push_back(SrcRegs
[i
]);
409 } else if (DstStart
== OpStart
&& NarrowTy
== MRI
.getType(OpReg
)) {
410 // The entire subregister is defined by this insert, forward the new
412 DstRegs
.push_back(OpReg
);
416 // OpSegStart is where this destination segment would start in OpReg if it
417 // extended infinitely in both directions.
418 int64_t ExtractOffset
, InsertOffset
;
420 if (OpStart
< DstStart
) {
422 ExtractOffset
= DstStart
- OpStart
;
423 SegSize
= std::min(NarrowSize
, OpStart
+ OpSize
- DstStart
);
425 InsertOffset
= OpStart
- DstStart
;
428 std::min(NarrowSize
- InsertOffset
, OpStart
+ OpSize
- DstStart
);
431 unsigned SegReg
= OpReg
;
432 if (ExtractOffset
!= 0 || SegSize
!= OpSize
) {
433 // A genuine extract is needed.
434 SegReg
= MRI
.createGenericVirtualRegister(LLT::scalar(SegSize
));
435 MIRBuilder
.buildExtract(SegReg
, OpReg
, ExtractOffset
);
438 unsigned DstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
439 MIRBuilder
.buildInsert(DstReg
, SrcRegs
[i
], SegReg
, InsertOffset
);
440 DstRegs
.push_back(DstReg
);
443 assert(DstRegs
.size() == (unsigned)NumParts
&& "not all parts covered");
444 MIRBuilder
.buildMerge(MI
.getOperand(0).getReg(), DstRegs
);
445 MI
.eraseFromParent();
448 case TargetOpcode::G_LOAD
: {
449 // FIXME: add support for when SizeOp0 isn't an exact multiple of
451 if (SizeOp0
% NarrowSize
!= 0)
452 return UnableToLegalize
;
454 const auto &MMO
= **MI
.memoperands_begin();
455 // This implementation doesn't work for atomics. Give up instead of doing
456 // something invalid.
457 if (MMO
.getOrdering() != AtomicOrdering::NotAtomic
||
458 MMO
.getFailureOrdering() != AtomicOrdering::NotAtomic
)
459 return UnableToLegalize
;
461 int NumParts
= SizeOp0
/ NarrowSize
;
462 LLT OffsetTy
= LLT::scalar(
463 MRI
.getType(MI
.getOperand(1).getReg()).getScalarSizeInBits());
465 SmallVector
<unsigned, 2> DstRegs
;
466 for (int i
= 0; i
< NumParts
; ++i
) {
467 unsigned DstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
469 unsigned Adjustment
= i
* NarrowSize
/ 8;
471 MachineMemOperand
*SplitMMO
= MIRBuilder
.getMF().getMachineMemOperand(
472 MMO
.getPointerInfo().getWithOffset(Adjustment
), MMO
.getFlags(),
473 NarrowSize
/ 8, i
== 0 ? MMO
.getAlignment() : NarrowSize
/ 8,
474 MMO
.getAAInfo(), MMO
.getRanges(), MMO
.getSyncScopeID(),
475 MMO
.getOrdering(), MMO
.getFailureOrdering());
477 MIRBuilder
.materializeGEP(SrcReg
, MI
.getOperand(1).getReg(), OffsetTy
,
480 MIRBuilder
.buildLoad(DstReg
, SrcReg
, *SplitMMO
);
482 DstRegs
.push_back(DstReg
);
484 unsigned DstReg
= MI
.getOperand(0).getReg();
485 MIRBuilder
.buildMerge(DstReg
, DstRegs
);
486 MI
.eraseFromParent();
489 case TargetOpcode::G_STORE
: {
490 // FIXME: add support for when SizeOp0 isn't an exact multiple of
492 if (SizeOp0
% NarrowSize
!= 0)
493 return UnableToLegalize
;
495 const auto &MMO
= **MI
.memoperands_begin();
496 // This implementation doesn't work for atomics. Give up instead of doing
497 // something invalid.
498 if (MMO
.getOrdering() != AtomicOrdering::NotAtomic
||
499 MMO
.getFailureOrdering() != AtomicOrdering::NotAtomic
)
500 return UnableToLegalize
;
502 int NumParts
= SizeOp0
/ NarrowSize
;
503 LLT OffsetTy
= LLT::scalar(
504 MRI
.getType(MI
.getOperand(1).getReg()).getScalarSizeInBits());
506 SmallVector
<unsigned, 2> SrcRegs
;
507 extractParts(MI
.getOperand(0).getReg(), NarrowTy
, NumParts
, SrcRegs
);
509 for (int i
= 0; i
< NumParts
; ++i
) {
511 unsigned Adjustment
= i
* NarrowSize
/ 8;
513 MachineMemOperand
*SplitMMO
= MIRBuilder
.getMF().getMachineMemOperand(
514 MMO
.getPointerInfo().getWithOffset(Adjustment
), MMO
.getFlags(),
515 NarrowSize
/ 8, i
== 0 ? MMO
.getAlignment() : NarrowSize
/ 8,
516 MMO
.getAAInfo(), MMO
.getRanges(), MMO
.getSyncScopeID(),
517 MMO
.getOrdering(), MMO
.getFailureOrdering());
519 MIRBuilder
.materializeGEP(DstReg
, MI
.getOperand(1).getReg(), OffsetTy
,
522 MIRBuilder
.buildStore(SrcRegs
[i
], DstReg
, *SplitMMO
);
524 MI
.eraseFromParent();
527 case TargetOpcode::G_CONSTANT
: {
528 // FIXME: add support for when SizeOp0 isn't an exact multiple of
530 if (SizeOp0
% NarrowSize
!= 0)
531 return UnableToLegalize
;
532 int NumParts
= SizeOp0
/ NarrowSize
;
533 const APInt
&Cst
= MI
.getOperand(1).getCImm()->getValue();
534 LLVMContext
&Ctx
= MIRBuilder
.getMF().getFunction().getContext();
536 SmallVector
<unsigned, 2> DstRegs
;
537 for (int i
= 0; i
< NumParts
; ++i
) {
538 unsigned DstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
540 ConstantInt::get(Ctx
, Cst
.lshr(NarrowSize
* i
).trunc(NarrowSize
));
541 MIRBuilder
.buildConstant(DstReg
, *CI
);
542 DstRegs
.push_back(DstReg
);
544 unsigned DstReg
= MI
.getOperand(0).getReg();
545 MIRBuilder
.buildMerge(DstReg
, DstRegs
);
546 MI
.eraseFromParent();
549 case TargetOpcode::G_OR
: {
550 // Legalize bitwise operation:
551 // A = BinOp<Ty> B, C
553 // B1, ..., BN = G_UNMERGE_VALUES B
554 // C1, ..., CN = G_UNMERGE_VALUES C
555 // A1 = BinOp<Ty/N> B1, C2
557 // AN = BinOp<Ty/N> BN, CN
558 // A = G_MERGE_VALUES A1, ..., AN
560 // FIXME: add support for when SizeOp0 isn't an exact multiple of
562 if (SizeOp0
% NarrowSize
!= 0)
563 return UnableToLegalize
;
564 int NumParts
= SizeOp0
/ NarrowSize
;
566 // List the registers where the destination will be scattered.
567 SmallVector
<unsigned, 2> DstRegs
;
568 // List the registers where the first argument will be split.
569 SmallVector
<unsigned, 2> SrcsReg1
;
570 // List the registers where the second argument will be split.
571 SmallVector
<unsigned, 2> SrcsReg2
;
572 // Create all the temporary registers.
573 for (int i
= 0; i
< NumParts
; ++i
) {
574 unsigned DstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
575 unsigned SrcReg1
= MRI
.createGenericVirtualRegister(NarrowTy
);
576 unsigned SrcReg2
= MRI
.createGenericVirtualRegister(NarrowTy
);
578 DstRegs
.push_back(DstReg
);
579 SrcsReg1
.push_back(SrcReg1
);
580 SrcsReg2
.push_back(SrcReg2
);
582 // Explode the big arguments into smaller chunks.
583 MIRBuilder
.buildUnmerge(SrcsReg1
, MI
.getOperand(1).getReg());
584 MIRBuilder
.buildUnmerge(SrcsReg2
, MI
.getOperand(2).getReg());
586 // Do the operation on each small part.
587 for (int i
= 0; i
< NumParts
; ++i
)
588 MIRBuilder
.buildOr(DstRegs
[i
], SrcsReg1
[i
], SrcsReg2
[i
]);
590 // Gather the destination registers into the final destination.
591 unsigned DstReg
= MI
.getOperand(0).getReg();
592 MIRBuilder
.buildMerge(DstReg
, DstRegs
);
593 MI
.eraseFromParent();
599 void LegalizerHelper::widenScalarSrc(MachineInstr
&MI
, LLT WideTy
,
600 unsigned OpIdx
, unsigned ExtOpcode
) {
601 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
602 auto ExtB
= MIRBuilder
.buildInstr(ExtOpcode
, WideTy
, MO
.getReg());
603 MO
.setReg(ExtB
->getOperand(0).getReg());
606 void LegalizerHelper::widenScalarDst(MachineInstr
&MI
, LLT WideTy
,
607 unsigned OpIdx
, unsigned TruncOpcode
) {
608 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
609 unsigned DstExt
= MRI
.createGenericVirtualRegister(WideTy
);
610 MIRBuilder
.setInsertPt(MIRBuilder
.getMBB(), ++MIRBuilder
.getInsertPt());
611 MIRBuilder
.buildInstr(TruncOpcode
, MO
.getReg(), DstExt
);
615 LegalizerHelper::LegalizeResult
616 LegalizerHelper::widenScalar(MachineInstr
&MI
, unsigned TypeIdx
, LLT WideTy
) {
617 MIRBuilder
.setInstr(MI
);
619 switch (MI
.getOpcode()) {
621 return UnableToLegalize
;
622 case TargetOpcode::G_UADDO
:
623 case TargetOpcode::G_USUBO
: {
625 return UnableToLegalize
; // TODO
626 auto LHSZext
= MIRBuilder
.buildInstr(TargetOpcode::G_ZEXT
, WideTy
,
627 MI
.getOperand(2).getReg());
628 auto RHSZext
= MIRBuilder
.buildInstr(TargetOpcode::G_ZEXT
, WideTy
,
629 MI
.getOperand(3).getReg());
630 unsigned Opcode
= MI
.getOpcode() == TargetOpcode::G_UADDO
631 ? TargetOpcode::G_ADD
632 : TargetOpcode::G_SUB
;
633 // Do the arithmetic in the larger type.
634 auto NewOp
= MIRBuilder
.buildInstr(Opcode
, WideTy
, LHSZext
, RHSZext
);
635 LLT OrigTy
= MRI
.getType(MI
.getOperand(0).getReg());
636 APInt Mask
= APInt::getAllOnesValue(OrigTy
.getSizeInBits());
637 auto AndOp
= MIRBuilder
.buildInstr(
638 TargetOpcode::G_AND
, WideTy
, NewOp
,
639 MIRBuilder
.buildConstant(WideTy
, Mask
.getZExtValue()));
640 // There is no overflow if the AndOp is the same as NewOp.
641 MIRBuilder
.buildICmp(CmpInst::ICMP_NE
, MI
.getOperand(1).getReg(), NewOp
,
643 // Now trunc the NewOp to the original result.
644 MIRBuilder
.buildTrunc(MI
.getOperand(0).getReg(), NewOp
);
645 MI
.eraseFromParent();
648 case TargetOpcode::G_CTTZ
:
649 case TargetOpcode::G_CTTZ_ZERO_UNDEF
:
650 case TargetOpcode::G_CTLZ
:
651 case TargetOpcode::G_CTLZ_ZERO_UNDEF
:
652 case TargetOpcode::G_CTPOP
: {
653 // First ZEXT the input.
654 auto MIBSrc
= MIRBuilder
.buildZExt(WideTy
, MI
.getOperand(1).getReg());
655 LLT CurTy
= MRI
.getType(MI
.getOperand(0).getReg());
656 if (MI
.getOpcode() == TargetOpcode::G_CTTZ
) {
657 // The count is the same in the larger type except if the original
658 // value was zero. This can be handled by setting the bit just off
659 // the top of the original type.
661 APInt::getOneBitSet(WideTy
.getSizeInBits(), CurTy
.getSizeInBits());
662 MIBSrc
= MIRBuilder
.buildInstr(
663 TargetOpcode::G_OR
, WideTy
, MIBSrc
,
664 MIRBuilder
.buildConstant(WideTy
, TopBit
.getSExtValue()));
666 // Perform the operation at the larger size.
667 auto MIBNewOp
= MIRBuilder
.buildInstr(MI
.getOpcode(), WideTy
, MIBSrc
);
668 // This is already the correct result for CTPOP and CTTZs
669 if (MI
.getOpcode() == TargetOpcode::G_CTLZ
||
670 MI
.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF
) {
671 // The correct result is NewOp - (Difference in widety and current ty).
672 unsigned SizeDiff
= WideTy
.getSizeInBits() - CurTy
.getSizeInBits();
674 MIRBuilder
.buildInstr(TargetOpcode::G_SUB
, WideTy
, MIBNewOp
,
675 MIRBuilder
.buildConstant(WideTy
, SizeDiff
));
677 auto &TII
= *MI
.getMF()->getSubtarget().getInstrInfo();
678 // Make the original instruction a trunc now, and update it's source.
679 MI
.setDesc(TII
.get(TargetOpcode::G_TRUNC
));
680 MI
.getOperand(1).setReg(MIBNewOp
->getOperand(0).getReg());
681 MIRBuilder
.recordInsertion(&MI
);
685 case TargetOpcode::G_ADD
:
686 case TargetOpcode::G_AND
:
687 case TargetOpcode::G_MUL
:
688 case TargetOpcode::G_OR
:
689 case TargetOpcode::G_XOR
:
690 case TargetOpcode::G_SUB
:
691 // Perform operation at larger width (any extension is fine here, high bits
692 // don't affect the result) and then truncate the result back to the
694 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
695 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ANYEXT
);
696 widenScalarDst(MI
, WideTy
);
697 MIRBuilder
.recordInsertion(&MI
);
700 case TargetOpcode::G_SHL
:
701 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
702 // The "number of bits to shift" operand must preserve its value as an
704 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ZEXT
);
705 widenScalarDst(MI
, WideTy
);
706 MIRBuilder
.recordInsertion(&MI
);
709 case TargetOpcode::G_SDIV
:
710 case TargetOpcode::G_SREM
:
711 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_SEXT
);
712 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_SEXT
);
713 widenScalarDst(MI
, WideTy
);
714 MIRBuilder
.recordInsertion(&MI
);
717 case TargetOpcode::G_ASHR
:
718 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_SEXT
);
719 // The "number of bits to shift" operand must preserve its value as an
721 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ZEXT
);
722 widenScalarDst(MI
, WideTy
);
723 MIRBuilder
.recordInsertion(&MI
);
726 case TargetOpcode::G_UDIV
:
727 case TargetOpcode::G_UREM
:
728 case TargetOpcode::G_LSHR
:
729 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ZEXT
);
730 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ZEXT
);
731 widenScalarDst(MI
, WideTy
);
732 MIRBuilder
.recordInsertion(&MI
);
735 case TargetOpcode::G_SELECT
:
737 return UnableToLegalize
;
738 // Perform operation at larger width (any extension is fine here, high bits
739 // don't affect the result) and then truncate the result back to the
741 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ANYEXT
);
742 widenScalarSrc(MI
, WideTy
, 3, TargetOpcode::G_ANYEXT
);
743 widenScalarDst(MI
, WideTy
);
744 MIRBuilder
.recordInsertion(&MI
);
747 case TargetOpcode::G_FPTOSI
:
748 case TargetOpcode::G_FPTOUI
:
750 return UnableToLegalize
;
751 widenScalarDst(MI
, WideTy
);
752 MIRBuilder
.recordInsertion(&MI
);
755 case TargetOpcode::G_SITOFP
:
757 return UnableToLegalize
;
758 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_SEXT
);
759 MIRBuilder
.recordInsertion(&MI
);
762 case TargetOpcode::G_UITOFP
:
764 return UnableToLegalize
;
765 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ZEXT
);
766 MIRBuilder
.recordInsertion(&MI
);
769 case TargetOpcode::G_INSERT
:
771 return UnableToLegalize
;
772 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
773 widenScalarDst(MI
, WideTy
);
774 MIRBuilder
.recordInsertion(&MI
);
777 case TargetOpcode::G_LOAD
:
778 // For some types like i24, we might try to widen to i32. To properly handle
779 // this we should be using a dedicated extending load, until then avoid
780 // trying to legalize.
781 if (alignTo(MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits(), 8) !=
782 WideTy
.getSizeInBits())
783 return UnableToLegalize
;
785 case TargetOpcode::G_SEXTLOAD
:
786 case TargetOpcode::G_ZEXTLOAD
:
787 widenScalarDst(MI
, WideTy
);
788 MIRBuilder
.recordInsertion(&MI
);
791 case TargetOpcode::G_STORE
: {
792 if (MRI
.getType(MI
.getOperand(0).getReg()) != LLT::scalar(1) ||
793 WideTy
!= LLT::scalar(8))
794 return UnableToLegalize
;
796 widenScalarSrc(MI
, WideTy
, 0, TargetOpcode::G_ZEXT
);
797 MIRBuilder
.recordInsertion(&MI
);
800 case TargetOpcode::G_CONSTANT
: {
801 MachineOperand
&SrcMO
= MI
.getOperand(1);
802 LLVMContext
&Ctx
= MIRBuilder
.getMF().getFunction().getContext();
803 const APInt
&Val
= SrcMO
.getCImm()->getValue().sext(WideTy
.getSizeInBits());
804 SrcMO
.setCImm(ConstantInt::get(Ctx
, Val
));
806 widenScalarDst(MI
, WideTy
);
807 MIRBuilder
.recordInsertion(&MI
);
810 case TargetOpcode::G_FCONSTANT
: {
811 MachineOperand
&SrcMO
= MI
.getOperand(1);
812 LLVMContext
&Ctx
= MIRBuilder
.getMF().getFunction().getContext();
813 APFloat Val
= SrcMO
.getFPImm()->getValueAPF();
815 switch (WideTy
.getSizeInBits()) {
817 Val
.convert(APFloat::IEEEsingle(), APFloat::rmTowardZero
, &LosesInfo
);
820 Val
.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero
, &LosesInfo
);
823 llvm_unreachable("Unhandled fp widen type");
825 SrcMO
.setFPImm(ConstantFP::get(Ctx
, Val
));
827 widenScalarDst(MI
, WideTy
, 0, TargetOpcode::G_FPTRUNC
);
828 MIRBuilder
.recordInsertion(&MI
);
831 case TargetOpcode::G_BRCOND
:
832 widenScalarSrc(MI
, WideTy
, 0, TargetOpcode::G_ANYEXT
);
833 MIRBuilder
.recordInsertion(&MI
);
836 case TargetOpcode::G_FCMP
:
838 widenScalarDst(MI
, WideTy
);
840 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_FPEXT
);
841 widenScalarSrc(MI
, WideTy
, 3, TargetOpcode::G_FPEXT
);
843 MIRBuilder
.recordInsertion(&MI
);
846 case TargetOpcode::G_ICMP
:
848 widenScalarDst(MI
, WideTy
);
850 unsigned ExtOpcode
= CmpInst::isSigned(static_cast<CmpInst::Predicate
>(
851 MI
.getOperand(1).getPredicate()))
852 ? TargetOpcode::G_SEXT
853 : TargetOpcode::G_ZEXT
;
854 widenScalarSrc(MI
, WideTy
, 2, ExtOpcode
);
855 widenScalarSrc(MI
, WideTy
, 3, ExtOpcode
);
857 MIRBuilder
.recordInsertion(&MI
);
860 case TargetOpcode::G_GEP
:
861 assert(TypeIdx
== 1 && "unable to legalize pointer of GEP");
862 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_SEXT
);
863 MIRBuilder
.recordInsertion(&MI
);
866 case TargetOpcode::G_PHI
: {
867 assert(TypeIdx
== 0 && "Expecting only Idx 0");
869 for (unsigned I
= 1; I
< MI
.getNumOperands(); I
+= 2) {
870 MachineBasicBlock
&OpMBB
= *MI
.getOperand(I
+ 1).getMBB();
871 MIRBuilder
.setInsertPt(OpMBB
, OpMBB
.getFirstTerminator());
872 widenScalarSrc(MI
, WideTy
, I
, TargetOpcode::G_ANYEXT
);
875 MachineBasicBlock
&MBB
= *MI
.getParent();
876 MIRBuilder
.setInsertPt(MBB
, --MBB
.getFirstNonPHI());
877 widenScalarDst(MI
, WideTy
);
878 MIRBuilder
.recordInsertion(&MI
);
884 LegalizerHelper::LegalizeResult
885 LegalizerHelper::lower(MachineInstr
&MI
, unsigned TypeIdx
, LLT Ty
) {
886 using namespace TargetOpcode
;
887 MIRBuilder
.setInstr(MI
);
889 switch(MI
.getOpcode()) {
891 return UnableToLegalize
;
892 case TargetOpcode::G_SREM
:
893 case TargetOpcode::G_UREM
: {
894 unsigned QuotReg
= MRI
.createGenericVirtualRegister(Ty
);
895 MIRBuilder
.buildInstr(MI
.getOpcode() == G_SREM
? G_SDIV
: G_UDIV
)
897 .addUse(MI
.getOperand(1).getReg())
898 .addUse(MI
.getOperand(2).getReg());
900 unsigned ProdReg
= MRI
.createGenericVirtualRegister(Ty
);
901 MIRBuilder
.buildMul(ProdReg
, QuotReg
, MI
.getOperand(2).getReg());
902 MIRBuilder
.buildSub(MI
.getOperand(0).getReg(), MI
.getOperand(1).getReg(),
904 MI
.eraseFromParent();
907 case TargetOpcode::G_SMULO
:
908 case TargetOpcode::G_UMULO
: {
909 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
911 unsigned Res
= MI
.getOperand(0).getReg();
912 unsigned Overflow
= MI
.getOperand(1).getReg();
913 unsigned LHS
= MI
.getOperand(2).getReg();
914 unsigned RHS
= MI
.getOperand(3).getReg();
916 MIRBuilder
.buildMul(Res
, LHS
, RHS
);
918 unsigned Opcode
= MI
.getOpcode() == TargetOpcode::G_SMULO
919 ? TargetOpcode::G_SMULH
920 : TargetOpcode::G_UMULH
;
922 unsigned HiPart
= MRI
.createGenericVirtualRegister(Ty
);
923 MIRBuilder
.buildInstr(Opcode
)
928 unsigned Zero
= MRI
.createGenericVirtualRegister(Ty
);
929 MIRBuilder
.buildConstant(Zero
, 0);
931 // For *signed* multiply, overflow is detected by checking:
932 // (hi != (lo >> bitwidth-1))
933 if (Opcode
== TargetOpcode::G_SMULH
) {
934 unsigned Shifted
= MRI
.createGenericVirtualRegister(Ty
);
935 unsigned ShiftAmt
= MRI
.createGenericVirtualRegister(Ty
);
936 MIRBuilder
.buildConstant(ShiftAmt
, Ty
.getSizeInBits() - 1);
937 MIRBuilder
.buildInstr(TargetOpcode::G_ASHR
)
941 MIRBuilder
.buildICmp(CmpInst::ICMP_NE
, Overflow
, HiPart
, Shifted
);
943 MIRBuilder
.buildICmp(CmpInst::ICMP_NE
, Overflow
, HiPart
, Zero
);
945 MI
.eraseFromParent();
948 case TargetOpcode::G_FNEG
: {
949 // TODO: Handle vector types once we are able to
952 return UnableToLegalize
;
953 unsigned Res
= MI
.getOperand(0).getReg();
955 LLVMContext
&Ctx
= MIRBuilder
.getMF().getFunction().getContext();
956 switch (Ty
.getSizeInBits()) {
958 ZeroTy
= Type::getHalfTy(Ctx
);
961 ZeroTy
= Type::getFloatTy(Ctx
);
964 ZeroTy
= Type::getDoubleTy(Ctx
);
967 ZeroTy
= Type::getFP128Ty(Ctx
);
970 llvm_unreachable("unexpected floating-point type");
972 ConstantFP
&ZeroForNegation
=
973 *cast
<ConstantFP
>(ConstantFP::getZeroValueForNegation(ZeroTy
));
974 auto Zero
= MIRBuilder
.buildFConstant(Ty
, ZeroForNegation
);
975 MIRBuilder
.buildInstr(TargetOpcode::G_FSUB
)
977 .addUse(Zero
->getOperand(0).getReg())
978 .addUse(MI
.getOperand(1).getReg());
979 MI
.eraseFromParent();
982 case TargetOpcode::G_FSUB
: {
983 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
984 // First, check if G_FNEG is marked as Lower. If so, we may
985 // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
986 if (LI
.getAction({G_FNEG
, {Ty
}}).Action
== Lower
)
987 return UnableToLegalize
;
988 unsigned Res
= MI
.getOperand(0).getReg();
989 unsigned LHS
= MI
.getOperand(1).getReg();
990 unsigned RHS
= MI
.getOperand(2).getReg();
991 unsigned Neg
= MRI
.createGenericVirtualRegister(Ty
);
992 MIRBuilder
.buildInstr(TargetOpcode::G_FNEG
).addDef(Neg
).addUse(RHS
);
993 MIRBuilder
.buildInstr(TargetOpcode::G_FADD
)
997 MI
.eraseFromParent();
1000 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS
: {
1001 unsigned OldValRes
= MI
.getOperand(0).getReg();
1002 unsigned SuccessRes
= MI
.getOperand(1).getReg();
1003 unsigned Addr
= MI
.getOperand(2).getReg();
1004 unsigned CmpVal
= MI
.getOperand(3).getReg();
1005 unsigned NewVal
= MI
.getOperand(4).getReg();
1006 MIRBuilder
.buildAtomicCmpXchg(OldValRes
, Addr
, CmpVal
, NewVal
,
1007 **MI
.memoperands_begin());
1008 MIRBuilder
.buildICmp(CmpInst::ICMP_EQ
, SuccessRes
, OldValRes
, CmpVal
);
1009 MI
.eraseFromParent();
1012 case TargetOpcode::G_LOAD
:
1013 case TargetOpcode::G_SEXTLOAD
:
1014 case TargetOpcode::G_ZEXTLOAD
: {
1015 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
1016 unsigned DstReg
= MI
.getOperand(0).getReg();
1017 unsigned PtrReg
= MI
.getOperand(1).getReg();
1018 LLT DstTy
= MRI
.getType(DstReg
);
1019 auto &MMO
= **MI
.memoperands_begin();
1021 if (DstTy
.getSizeInBits() == MMO
.getSize() /* in bytes */ * 8) {
1022 // In the case of G_LOAD, this was a non-extending load already and we're
1023 // about to lower to the same instruction.
1024 if (MI
.getOpcode() == TargetOpcode::G_LOAD
)
1025 return UnableToLegalize
;
1026 MIRBuilder
.buildLoad(DstReg
, PtrReg
, MMO
);
1027 MI
.eraseFromParent();
1031 if (DstTy
.isScalar()) {
1032 unsigned TmpReg
= MRI
.createGenericVirtualRegister(
1033 LLT::scalar(MMO
.getSize() /* in bytes */ * 8));
1034 MIRBuilder
.buildLoad(TmpReg
, PtrReg
, MMO
);
1035 switch (MI
.getOpcode()) {
1037 llvm_unreachable("Unexpected opcode");
1038 case TargetOpcode::G_LOAD
:
1039 MIRBuilder
.buildAnyExt(DstReg
, TmpReg
);
1041 case TargetOpcode::G_SEXTLOAD
:
1042 MIRBuilder
.buildSExt(DstReg
, TmpReg
);
1044 case TargetOpcode::G_ZEXTLOAD
:
1045 MIRBuilder
.buildZExt(DstReg
, TmpReg
);
1048 MI
.eraseFromParent();
1052 return UnableToLegalize
;
1054 case TargetOpcode::G_CTLZ_ZERO_UNDEF
:
1055 case TargetOpcode::G_CTTZ_ZERO_UNDEF
:
1056 case TargetOpcode::G_CTLZ
:
1057 case TargetOpcode::G_CTTZ
:
1058 case TargetOpcode::G_CTPOP
:
1059 return lowerBitCount(MI
, TypeIdx
, Ty
);
1063 LegalizerHelper::LegalizeResult
1064 LegalizerHelper::fewerElementsVector(MachineInstr
&MI
, unsigned TypeIdx
,
1066 // FIXME: Don't know how to handle secondary types yet.
1068 return UnableToLegalize
;
1069 switch (MI
.getOpcode()) {
1071 return UnableToLegalize
;
1072 case TargetOpcode::G_ADD
: {
1073 unsigned NarrowSize
= NarrowTy
.getSizeInBits();
1074 unsigned DstReg
= MI
.getOperand(0).getReg();
1075 unsigned Size
= MRI
.getType(DstReg
).getSizeInBits();
1076 int NumParts
= Size
/ NarrowSize
;
1077 // FIXME: Don't know how to handle the situation where the small vectors
1078 // aren't all the same size yet.
1079 if (Size
% NarrowSize
!= 0)
1080 return UnableToLegalize
;
1082 MIRBuilder
.setInstr(MI
);
1084 SmallVector
<unsigned, 2> Src1Regs
, Src2Regs
, DstRegs
;
1085 extractParts(MI
.getOperand(1).getReg(), NarrowTy
, NumParts
, Src1Regs
);
1086 extractParts(MI
.getOperand(2).getReg(), NarrowTy
, NumParts
, Src2Regs
);
1088 for (int i
= 0; i
< NumParts
; ++i
) {
1089 unsigned DstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
1090 MIRBuilder
.buildAdd(DstReg
, Src1Regs
[i
], Src2Regs
[i
]);
1091 DstRegs
.push_back(DstReg
);
1094 MIRBuilder
.buildMerge(DstReg
, DstRegs
);
1095 MI
.eraseFromParent();
1101 LegalizerHelper::LegalizeResult
1102 LegalizerHelper::lowerBitCount(MachineInstr
&MI
, unsigned TypeIdx
, LLT Ty
) {
1103 unsigned Opc
= MI
.getOpcode();
1104 auto &TII
= *MI
.getMF()->getSubtarget().getInstrInfo();
1105 auto isLegalOrCustom
= [this](const LegalityQuery
&Q
) {
1106 auto QAction
= LI
.getAction(Q
).Action
;
1107 return QAction
== Legal
|| QAction
== Custom
;
1111 return UnableToLegalize
;
1112 case TargetOpcode::G_CTLZ_ZERO_UNDEF
: {
1113 // This trivially expands to CTLZ.
1114 MI
.setDesc(TII
.get(TargetOpcode::G_CTLZ
));
1115 MIRBuilder
.recordInsertion(&MI
);
1118 case TargetOpcode::G_CTLZ
: {
1119 unsigned SrcReg
= MI
.getOperand(1).getReg();
1120 unsigned Len
= Ty
.getSizeInBits();
1121 if (isLegalOrCustom({TargetOpcode::G_CTLZ_ZERO_UNDEF
, {Ty
}})) {
1122 // If CTLZ_ZERO_UNDEF is legal or custom, emit that and a select with
1125 MIRBuilder
.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF
, Ty
, SrcReg
);
1126 auto MIBZero
= MIRBuilder
.buildConstant(Ty
, 0);
1127 auto MIBLen
= MIRBuilder
.buildConstant(Ty
, Len
);
1128 auto MIBICmp
= MIRBuilder
.buildICmp(CmpInst::ICMP_EQ
, LLT::scalar(1),
1130 MIRBuilder
.buildSelect(MI
.getOperand(0).getReg(), MIBICmp
, MIBLen
,
1132 MI
.eraseFromParent();
1135 // for now, we do this:
1136 // NewLen = NextPowerOf2(Len);
1137 // x = x | (x >> 1);
1138 // x = x | (x >> 2);
1140 // x = x | (x >>16);
1141 // x = x | (x >>32); // for 64-bit input
1143 // return Len - popcount(x);
1145 // Ref: "Hacker's Delight" by Henry Warren
1146 unsigned Op
= SrcReg
;
1147 unsigned NewLen
= PowerOf2Ceil(Len
);
1148 for (unsigned i
= 0; (1U << i
) <= (NewLen
/ 2); ++i
) {
1149 auto MIBShiftAmt
= MIRBuilder
.buildConstant(Ty
, 1ULL << i
);
1150 auto MIBOp
= MIRBuilder
.buildInstr(
1151 TargetOpcode::G_OR
, Ty
, Op
,
1152 MIRBuilder
.buildInstr(TargetOpcode::G_LSHR
, Ty
, Op
, MIBShiftAmt
));
1153 Op
= MIBOp
->getOperand(0).getReg();
1155 auto MIBPop
= MIRBuilder
.buildInstr(TargetOpcode::G_CTPOP
, Ty
, Op
);
1156 MIRBuilder
.buildInstr(TargetOpcode::G_SUB
, MI
.getOperand(0).getReg(),
1157 MIRBuilder
.buildConstant(Ty
, Len
), MIBPop
);
1158 MI
.eraseFromParent();
1161 case TargetOpcode::G_CTTZ_ZERO_UNDEF
: {
1162 // This trivially expands to CTTZ.
1163 MI
.setDesc(TII
.get(TargetOpcode::G_CTTZ
));
1164 MIRBuilder
.recordInsertion(&MI
);
1167 case TargetOpcode::G_CTTZ
: {
1168 unsigned SrcReg
= MI
.getOperand(1).getReg();
1169 unsigned Len
= Ty
.getSizeInBits();
1170 if (isLegalOrCustom({TargetOpcode::G_CTTZ_ZERO_UNDEF
, {Ty
}})) {
1171 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
1174 MIRBuilder
.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF
, Ty
, SrcReg
);
1175 auto MIBZero
= MIRBuilder
.buildConstant(Ty
, 0);
1176 auto MIBLen
= MIRBuilder
.buildConstant(Ty
, Len
);
1177 auto MIBICmp
= MIRBuilder
.buildICmp(CmpInst::ICMP_EQ
, LLT::scalar(1),
1179 MIRBuilder
.buildSelect(MI
.getOperand(0).getReg(), MIBICmp
, MIBLen
,
1181 MI
.eraseFromParent();
1184 // for now, we use: { return popcount(~x & (x - 1)); }
1185 // unless the target has ctlz but not ctpop, in which case we use:
1186 // { return 32 - nlz(~x & (x-1)); }
1187 // Ref: "Hacker's Delight" by Henry Warren
1188 auto MIBCstNeg1
= MIRBuilder
.buildConstant(Ty
, -1);
1190 MIRBuilder
.buildInstr(TargetOpcode::G_XOR
, Ty
, SrcReg
, MIBCstNeg1
);
1191 auto MIBTmp
= MIRBuilder
.buildInstr(
1192 TargetOpcode::G_AND
, Ty
, MIBNot
,
1193 MIRBuilder
.buildInstr(TargetOpcode::G_ADD
, Ty
, SrcReg
, MIBCstNeg1
));
1194 if (!isLegalOrCustom({TargetOpcode::G_CTPOP
, {Ty
}}) &&
1195 isLegalOrCustom({TargetOpcode::G_CTLZ
, {Ty
}})) {
1196 auto MIBCstLen
= MIRBuilder
.buildConstant(Ty
, Len
);
1197 MIRBuilder
.buildInstr(
1198 TargetOpcode::G_SUB
, MI
.getOperand(0).getReg(),
1200 MIRBuilder
.buildInstr(TargetOpcode::G_CTLZ
, Ty
, MIBTmp
));
1201 MI
.eraseFromParent();
1204 MI
.setDesc(TII
.get(TargetOpcode::G_CTPOP
));
1205 MI
.getOperand(1).setReg(MIBTmp
->getOperand(0).getReg());