1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// \file This file implements the LegalizerHelper class to legalize
10 /// individual instructions and the LegalizeMachineIR wrapper pass for the
11 /// primary legalization.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
16 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
17 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/TargetInstrInfo.h"
21 #include "llvm/CodeGen/TargetLowering.h"
22 #include "llvm/CodeGen/TargetSubtargetInfo.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/MathExtras.h"
25 #include "llvm/Support/raw_ostream.h"
27 #define DEBUG_TYPE "legalizer"
30 using namespace LegalizeActions
;
32 /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
34 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
35 /// with any leftover piece as type \p LeftoverTy
37 /// Returns -1 if the breakdown is not satisfiable.
38 static int getNarrowTypeBreakDown(LLT OrigTy
, LLT NarrowTy
, LLT
&LeftoverTy
) {
39 assert(!LeftoverTy
.isValid() && "this is an out argument");
41 unsigned Size
= OrigTy
.getSizeInBits();
42 unsigned NarrowSize
= NarrowTy
.getSizeInBits();
43 unsigned NumParts
= Size
/ NarrowSize
;
44 unsigned LeftoverSize
= Size
- NumParts
* NarrowSize
;
45 assert(Size
> NarrowSize
);
47 if (LeftoverSize
== 0)
50 if (NarrowTy
.isVector()) {
51 unsigned EltSize
= OrigTy
.getScalarSizeInBits();
52 if (LeftoverSize
% EltSize
!= 0)
54 LeftoverTy
= LLT::scalarOrVector(LeftoverSize
/ EltSize
, EltSize
);
56 LeftoverTy
= LLT::scalar(LeftoverSize
);
62 LegalizerHelper::LegalizerHelper(MachineFunction
&MF
,
63 GISelChangeObserver
&Observer
,
64 MachineIRBuilder
&Builder
)
65 : MIRBuilder(Builder
), MRI(MF
.getRegInfo()),
66 LI(*MF
.getSubtarget().getLegalizerInfo()), Observer(Observer
) {
68 MIRBuilder
.setChangeObserver(Observer
);
71 LegalizerHelper::LegalizerHelper(MachineFunction
&MF
, const LegalizerInfo
&LI
,
72 GISelChangeObserver
&Observer
,
74 : MIRBuilder(B
), MRI(MF
.getRegInfo()), LI(LI
), Observer(Observer
) {
76 MIRBuilder
.setChangeObserver(Observer
);
78 LegalizerHelper::LegalizeResult
79 LegalizerHelper::legalizeInstrStep(MachineInstr
&MI
) {
80 LLVM_DEBUG(dbgs() << "Legalizing: "; MI
.print(dbgs()));
82 auto Step
= LI
.getAction(MI
, MRI
);
83 switch (Step
.Action
) {
85 LLVM_DEBUG(dbgs() << ".. Already legal\n");
88 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
91 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
92 return narrowScalar(MI
, Step
.TypeIdx
, Step
.NewType
);
94 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
95 return widenScalar(MI
, Step
.TypeIdx
, Step
.NewType
);
97 LLVM_DEBUG(dbgs() << ".. Lower\n");
98 return lower(MI
, Step
.TypeIdx
, Step
.NewType
);
100 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
101 return fewerElementsVector(MI
, Step
.TypeIdx
, Step
.NewType
);
103 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
104 return moreElementsVector(MI
, Step
.TypeIdx
, Step
.NewType
);
106 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
107 return LI
.legalizeCustom(MI
, MRI
, MIRBuilder
, Observer
) ? Legalized
110 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
111 return UnableToLegalize
;
115 void LegalizerHelper::extractParts(unsigned Reg
, LLT Ty
, int NumParts
,
116 SmallVectorImpl
<unsigned> &VRegs
) {
117 for (int i
= 0; i
< NumParts
; ++i
)
118 VRegs
.push_back(MRI
.createGenericVirtualRegister(Ty
));
119 MIRBuilder
.buildUnmerge(VRegs
, Reg
);
122 bool LegalizerHelper::extractParts(unsigned Reg
, LLT RegTy
,
123 LLT MainTy
, LLT
&LeftoverTy
,
124 SmallVectorImpl
<unsigned> &VRegs
,
125 SmallVectorImpl
<unsigned> &LeftoverRegs
) {
126 assert(!LeftoverTy
.isValid() && "this is an out argument");
128 unsigned RegSize
= RegTy
.getSizeInBits();
129 unsigned MainSize
= MainTy
.getSizeInBits();
130 unsigned NumParts
= RegSize
/ MainSize
;
131 unsigned LeftoverSize
= RegSize
- NumParts
* MainSize
;
133 // Use an unmerge when possible.
134 if (LeftoverSize
== 0) {
135 for (unsigned I
= 0; I
< NumParts
; ++I
)
136 VRegs
.push_back(MRI
.createGenericVirtualRegister(MainTy
));
137 MIRBuilder
.buildUnmerge(VRegs
, Reg
);
141 if (MainTy
.isVector()) {
142 unsigned EltSize
= MainTy
.getScalarSizeInBits();
143 if (LeftoverSize
% EltSize
!= 0)
145 LeftoverTy
= LLT::scalarOrVector(LeftoverSize
/ EltSize
, EltSize
);
147 LeftoverTy
= LLT::scalar(LeftoverSize
);
150 // For irregular sizes, extract the individual parts.
151 for (unsigned I
= 0; I
!= NumParts
; ++I
) {
152 unsigned NewReg
= MRI
.createGenericVirtualRegister(MainTy
);
153 VRegs
.push_back(NewReg
);
154 MIRBuilder
.buildExtract(NewReg
, Reg
, MainSize
* I
);
157 for (unsigned Offset
= MainSize
* NumParts
; Offset
< RegSize
;
158 Offset
+= LeftoverSize
) {
159 unsigned NewReg
= MRI
.createGenericVirtualRegister(LeftoverTy
);
160 LeftoverRegs
.push_back(NewReg
);
161 MIRBuilder
.buildExtract(NewReg
, Reg
, Offset
);
167 void LegalizerHelper::insertParts(unsigned DstReg
,
168 LLT ResultTy
, LLT PartTy
,
169 ArrayRef
<unsigned> PartRegs
,
171 ArrayRef
<unsigned> LeftoverRegs
) {
172 if (!LeftoverTy
.isValid()) {
173 assert(LeftoverRegs
.empty());
175 if (!ResultTy
.isVector()) {
176 MIRBuilder
.buildMerge(DstReg
, PartRegs
);
180 if (PartTy
.isVector())
181 MIRBuilder
.buildConcatVectors(DstReg
, PartRegs
);
183 MIRBuilder
.buildBuildVector(DstReg
, PartRegs
);
187 unsigned PartSize
= PartTy
.getSizeInBits();
188 unsigned LeftoverPartSize
= LeftoverTy
.getSizeInBits();
190 unsigned CurResultReg
= MRI
.createGenericVirtualRegister(ResultTy
);
191 MIRBuilder
.buildUndef(CurResultReg
);
194 for (unsigned PartReg
: PartRegs
) {
195 unsigned NewResultReg
= MRI
.createGenericVirtualRegister(ResultTy
);
196 MIRBuilder
.buildInsert(NewResultReg
, CurResultReg
, PartReg
, Offset
);
197 CurResultReg
= NewResultReg
;
201 for (unsigned I
= 0, E
= LeftoverRegs
.size(); I
!= E
; ++I
) {
202 // Use the original output register for the final insert to avoid a copy.
203 unsigned NewResultReg
= (I
+ 1 == E
) ?
204 DstReg
: MRI
.createGenericVirtualRegister(ResultTy
);
206 MIRBuilder
.buildInsert(NewResultReg
, CurResultReg
, LeftoverRegs
[I
], Offset
);
207 CurResultReg
= NewResultReg
;
208 Offset
+= LeftoverPartSize
;
212 static RTLIB::Libcall
getRTLibDesc(unsigned Opcode
, unsigned Size
) {
214 case TargetOpcode::G_SDIV
:
215 assert((Size
== 32 || Size
== 64) && "Unsupported size");
216 return Size
== 64 ? RTLIB::SDIV_I64
: RTLIB::SDIV_I32
;
217 case TargetOpcode::G_UDIV
:
218 assert((Size
== 32 || Size
== 64) && "Unsupported size");
219 return Size
== 64 ? RTLIB::UDIV_I64
: RTLIB::UDIV_I32
;
220 case TargetOpcode::G_SREM
:
221 assert((Size
== 32 || Size
== 64) && "Unsupported size");
222 return Size
== 64 ? RTLIB::SREM_I64
: RTLIB::SREM_I32
;
223 case TargetOpcode::G_UREM
:
224 assert((Size
== 32 || Size
== 64) && "Unsupported size");
225 return Size
== 64 ? RTLIB::UREM_I64
: RTLIB::UREM_I32
;
226 case TargetOpcode::G_CTLZ_ZERO_UNDEF
:
227 assert(Size
== 32 && "Unsupported size");
228 return RTLIB::CTLZ_I32
;
229 case TargetOpcode::G_FADD
:
230 assert((Size
== 32 || Size
== 64) && "Unsupported size");
231 return Size
== 64 ? RTLIB::ADD_F64
: RTLIB::ADD_F32
;
232 case TargetOpcode::G_FSUB
:
233 assert((Size
== 32 || Size
== 64) && "Unsupported size");
234 return Size
== 64 ? RTLIB::SUB_F64
: RTLIB::SUB_F32
;
235 case TargetOpcode::G_FMUL
:
236 assert((Size
== 32 || Size
== 64) && "Unsupported size");
237 return Size
== 64 ? RTLIB::MUL_F64
: RTLIB::MUL_F32
;
238 case TargetOpcode::G_FDIV
:
239 assert((Size
== 32 || Size
== 64) && "Unsupported size");
240 return Size
== 64 ? RTLIB::DIV_F64
: RTLIB::DIV_F32
;
241 case TargetOpcode::G_FEXP
:
242 assert((Size
== 32 || Size
== 64) && "Unsupported size");
243 return Size
== 64 ? RTLIB::EXP_F64
: RTLIB::EXP_F32
;
244 case TargetOpcode::G_FREM
:
245 return Size
== 64 ? RTLIB::REM_F64
: RTLIB::REM_F32
;
246 case TargetOpcode::G_FPOW
:
247 return Size
== 64 ? RTLIB::POW_F64
: RTLIB::POW_F32
;
248 case TargetOpcode::G_FMA
:
249 assert((Size
== 32 || Size
== 64) && "Unsupported size");
250 return Size
== 64 ? RTLIB::FMA_F64
: RTLIB::FMA_F32
;
251 case TargetOpcode::G_FSIN
:
252 assert((Size
== 32 || Size
== 64 || Size
== 128) && "Unsupported size");
253 return Size
== 128 ? RTLIB::SIN_F128
254 : Size
== 64 ? RTLIB::SIN_F64
: RTLIB::SIN_F32
;
255 case TargetOpcode::G_FCOS
:
256 assert((Size
== 32 || Size
== 64 || Size
== 128) && "Unsupported size");
257 return Size
== 128 ? RTLIB::COS_F128
258 : Size
== 64 ? RTLIB::COS_F64
: RTLIB::COS_F32
;
259 case TargetOpcode::G_FLOG10
:
260 assert((Size
== 32 || Size
== 64 || Size
== 128) && "Unsupported size");
261 return Size
== 128 ? RTLIB::LOG10_F128
262 : Size
== 64 ? RTLIB::LOG10_F64
: RTLIB::LOG10_F32
;
263 case TargetOpcode::G_FLOG
:
264 assert((Size
== 32 || Size
== 64 || Size
== 128) && "Unsupported size");
265 return Size
== 128 ? RTLIB::LOG_F128
266 : Size
== 64 ? RTLIB::LOG_F64
: RTLIB::LOG_F32
;
267 case TargetOpcode::G_FLOG2
:
268 assert((Size
== 32 || Size
== 64 || Size
== 128) && "Unsupported size");
269 return Size
== 128 ? RTLIB::LOG2_F128
270 : Size
== 64 ? RTLIB::LOG2_F64
: RTLIB::LOG2_F32
;
272 llvm_unreachable("Unknown libcall function");
275 LegalizerHelper::LegalizeResult
276 llvm::createLibcall(MachineIRBuilder
&MIRBuilder
, RTLIB::Libcall Libcall
,
277 const CallLowering::ArgInfo
&Result
,
278 ArrayRef
<CallLowering::ArgInfo
> Args
) {
279 auto &CLI
= *MIRBuilder
.getMF().getSubtarget().getCallLowering();
280 auto &TLI
= *MIRBuilder
.getMF().getSubtarget().getTargetLowering();
281 const char *Name
= TLI
.getLibcallName(Libcall
);
283 MIRBuilder
.getMF().getFrameInfo().setHasCalls(true);
284 if (!CLI
.lowerCall(MIRBuilder
, TLI
.getLibcallCallingConv(Libcall
),
285 MachineOperand::CreateES(Name
), Result
, Args
))
286 return LegalizerHelper::UnableToLegalize
;
288 return LegalizerHelper::Legalized
;
291 // Useful for libcalls where all operands have the same type.
292 static LegalizerHelper::LegalizeResult
293 simpleLibcall(MachineInstr
&MI
, MachineIRBuilder
&MIRBuilder
, unsigned Size
,
295 auto Libcall
= getRTLibDesc(MI
.getOpcode(), Size
);
297 SmallVector
<CallLowering::ArgInfo
, 3> Args
;
298 for (unsigned i
= 1; i
< MI
.getNumOperands(); i
++)
299 Args
.push_back({MI
.getOperand(i
).getReg(), OpType
});
300 return createLibcall(MIRBuilder
, Libcall
, {MI
.getOperand(0).getReg(), OpType
},
304 static RTLIB::Libcall
getConvRTLibDesc(unsigned Opcode
, Type
*ToType
,
306 auto ToMVT
= MVT::getVT(ToType
);
307 auto FromMVT
= MVT::getVT(FromType
);
310 case TargetOpcode::G_FPEXT
:
311 return RTLIB::getFPEXT(FromMVT
, ToMVT
);
312 case TargetOpcode::G_FPTRUNC
:
313 return RTLIB::getFPROUND(FromMVT
, ToMVT
);
314 case TargetOpcode::G_FPTOSI
:
315 return RTLIB::getFPTOSINT(FromMVT
, ToMVT
);
316 case TargetOpcode::G_FPTOUI
:
317 return RTLIB::getFPTOUINT(FromMVT
, ToMVT
);
318 case TargetOpcode::G_SITOFP
:
319 return RTLIB::getSINTTOFP(FromMVT
, ToMVT
);
320 case TargetOpcode::G_UITOFP
:
321 return RTLIB::getUINTTOFP(FromMVT
, ToMVT
);
323 llvm_unreachable("Unsupported libcall function");
326 static LegalizerHelper::LegalizeResult
327 conversionLibcall(MachineInstr
&MI
, MachineIRBuilder
&MIRBuilder
, Type
*ToType
,
329 RTLIB::Libcall Libcall
= getConvRTLibDesc(MI
.getOpcode(), ToType
, FromType
);
330 return createLibcall(MIRBuilder
, Libcall
, {MI
.getOperand(0).getReg(), ToType
},
331 {{MI
.getOperand(1).getReg(), FromType
}});
334 LegalizerHelper::LegalizeResult
335 LegalizerHelper::libcall(MachineInstr
&MI
) {
336 LLT LLTy
= MRI
.getType(MI
.getOperand(0).getReg());
337 unsigned Size
= LLTy
.getSizeInBits();
338 auto &Ctx
= MIRBuilder
.getMF().getFunction().getContext();
340 MIRBuilder
.setInstr(MI
);
342 switch (MI
.getOpcode()) {
344 return UnableToLegalize
;
345 case TargetOpcode::G_SDIV
:
346 case TargetOpcode::G_UDIV
:
347 case TargetOpcode::G_SREM
:
348 case TargetOpcode::G_UREM
:
349 case TargetOpcode::G_CTLZ_ZERO_UNDEF
: {
350 Type
*HLTy
= IntegerType::get(Ctx
, Size
);
351 auto Status
= simpleLibcall(MI
, MIRBuilder
, Size
, HLTy
);
352 if (Status
!= Legalized
)
356 case TargetOpcode::G_FADD
:
357 case TargetOpcode::G_FSUB
:
358 case TargetOpcode::G_FMUL
:
359 case TargetOpcode::G_FDIV
:
360 case TargetOpcode::G_FMA
:
361 case TargetOpcode::G_FPOW
:
362 case TargetOpcode::G_FREM
:
363 case TargetOpcode::G_FCOS
:
364 case TargetOpcode::G_FSIN
:
365 case TargetOpcode::G_FLOG10
:
366 case TargetOpcode::G_FLOG
:
367 case TargetOpcode::G_FLOG2
:
368 case TargetOpcode::G_FEXP
: {
370 LLVM_DEBUG(dbgs() << "Size " << Size
<< " too large to legalize.\n");
371 return UnableToLegalize
;
373 Type
*HLTy
= Size
== 64 ? Type::getDoubleTy(Ctx
) : Type::getFloatTy(Ctx
);
374 auto Status
= simpleLibcall(MI
, MIRBuilder
, Size
, HLTy
);
375 if (Status
!= Legalized
)
379 case TargetOpcode::G_FPEXT
: {
380 // FIXME: Support other floating point types (half, fp128 etc)
381 unsigned FromSize
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
382 unsigned ToSize
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
383 if (ToSize
!= 64 || FromSize
!= 32)
384 return UnableToLegalize
;
385 LegalizeResult Status
= conversionLibcall(
386 MI
, MIRBuilder
, Type::getDoubleTy(Ctx
), Type::getFloatTy(Ctx
));
387 if (Status
!= Legalized
)
391 case TargetOpcode::G_FPTRUNC
: {
392 // FIXME: Support other floating point types (half, fp128 etc)
393 unsigned FromSize
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
394 unsigned ToSize
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
395 if (ToSize
!= 32 || FromSize
!= 64)
396 return UnableToLegalize
;
397 LegalizeResult Status
= conversionLibcall(
398 MI
, MIRBuilder
, Type::getFloatTy(Ctx
), Type::getDoubleTy(Ctx
));
399 if (Status
!= Legalized
)
403 case TargetOpcode::G_FPTOSI
:
404 case TargetOpcode::G_FPTOUI
: {
405 // FIXME: Support other types
406 unsigned FromSize
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
407 unsigned ToSize
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
408 if (ToSize
!= 32 || (FromSize
!= 32 && FromSize
!= 64))
409 return UnableToLegalize
;
410 LegalizeResult Status
= conversionLibcall(
411 MI
, MIRBuilder
, Type::getInt32Ty(Ctx
),
412 FromSize
== 64 ? Type::getDoubleTy(Ctx
) : Type::getFloatTy(Ctx
));
413 if (Status
!= Legalized
)
417 case TargetOpcode::G_SITOFP
:
418 case TargetOpcode::G_UITOFP
: {
419 // FIXME: Support other types
420 unsigned FromSize
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
421 unsigned ToSize
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
422 if (FromSize
!= 32 || (ToSize
!= 32 && ToSize
!= 64))
423 return UnableToLegalize
;
424 LegalizeResult Status
= conversionLibcall(
426 ToSize
== 64 ? Type::getDoubleTy(Ctx
) : Type::getFloatTy(Ctx
),
427 Type::getInt32Ty(Ctx
));
428 if (Status
!= Legalized
)
434 MI
.eraseFromParent();
438 LegalizerHelper::LegalizeResult
LegalizerHelper::narrowScalar(MachineInstr
&MI
,
441 MIRBuilder
.setInstr(MI
);
443 uint64_t SizeOp0
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
444 uint64_t NarrowSize
= NarrowTy
.getSizeInBits();
446 switch (MI
.getOpcode()) {
448 return UnableToLegalize
;
449 case TargetOpcode::G_IMPLICIT_DEF
: {
450 // FIXME: add support for when SizeOp0 isn't an exact multiple of
452 if (SizeOp0
% NarrowSize
!= 0)
453 return UnableToLegalize
;
454 int NumParts
= SizeOp0
/ NarrowSize
;
456 SmallVector
<unsigned, 2> DstRegs
;
457 for (int i
= 0; i
< NumParts
; ++i
)
459 MIRBuilder
.buildUndef(NarrowTy
)->getOperand(0).getReg());
461 unsigned DstReg
= MI
.getOperand(0).getReg();
462 if(MRI
.getType(DstReg
).isVector())
463 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
465 MIRBuilder
.buildMerge(DstReg
, DstRegs
);
466 MI
.eraseFromParent();
469 case TargetOpcode::G_ADD
: {
470 // FIXME: add support for when SizeOp0 isn't an exact multiple of
472 if (SizeOp0
% NarrowSize
!= 0)
473 return UnableToLegalize
;
474 // Expand in terms of carry-setting/consuming G_ADDE instructions.
475 int NumParts
= SizeOp0
/ NarrowTy
.getSizeInBits();
477 SmallVector
<unsigned, 2> Src1Regs
, Src2Regs
, DstRegs
;
478 extractParts(MI
.getOperand(1).getReg(), NarrowTy
, NumParts
, Src1Regs
);
479 extractParts(MI
.getOperand(2).getReg(), NarrowTy
, NumParts
, Src2Regs
);
481 unsigned CarryIn
= MRI
.createGenericVirtualRegister(LLT::scalar(1));
482 MIRBuilder
.buildConstant(CarryIn
, 0);
484 for (int i
= 0; i
< NumParts
; ++i
) {
485 unsigned DstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
486 unsigned CarryOut
= MRI
.createGenericVirtualRegister(LLT::scalar(1));
488 MIRBuilder
.buildUAdde(DstReg
, CarryOut
, Src1Regs
[i
],
489 Src2Regs
[i
], CarryIn
);
491 DstRegs
.push_back(DstReg
);
494 unsigned DstReg
= MI
.getOperand(0).getReg();
495 if(MRI
.getType(DstReg
).isVector())
496 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
498 MIRBuilder
.buildMerge(DstReg
, DstRegs
);
499 MI
.eraseFromParent();
502 case TargetOpcode::G_SUB
: {
503 // FIXME: add support for when SizeOp0 isn't an exact multiple of
505 if (SizeOp0
% NarrowSize
!= 0)
506 return UnableToLegalize
;
508 int NumParts
= SizeOp0
/ NarrowTy
.getSizeInBits();
510 SmallVector
<unsigned, 2> Src1Regs
, Src2Regs
, DstRegs
;
511 extractParts(MI
.getOperand(1).getReg(), NarrowTy
, NumParts
, Src1Regs
);
512 extractParts(MI
.getOperand(2).getReg(), NarrowTy
, NumParts
, Src2Regs
);
514 unsigned DstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
515 unsigned BorrowOut
= MRI
.createGenericVirtualRegister(LLT::scalar(1));
516 MIRBuilder
.buildInstr(TargetOpcode::G_USUBO
, {DstReg
, BorrowOut
},
517 {Src1Regs
[0], Src2Regs
[0]});
518 DstRegs
.push_back(DstReg
);
519 unsigned BorrowIn
= BorrowOut
;
520 for (int i
= 1; i
< NumParts
; ++i
) {
521 DstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
522 BorrowOut
= MRI
.createGenericVirtualRegister(LLT::scalar(1));
524 MIRBuilder
.buildInstr(TargetOpcode::G_USUBE
, {DstReg
, BorrowOut
},
525 {Src1Regs
[i
], Src2Regs
[i
], BorrowIn
});
527 DstRegs
.push_back(DstReg
);
528 BorrowIn
= BorrowOut
;
530 MIRBuilder
.buildMerge(MI
.getOperand(0).getReg(), DstRegs
);
531 MI
.eraseFromParent();
534 case TargetOpcode::G_MUL
:
535 return narrowScalarMul(MI
, TypeIdx
, NarrowTy
);
536 case TargetOpcode::G_EXTRACT
:
537 return narrowScalarExtract(MI
, TypeIdx
, NarrowTy
);
538 case TargetOpcode::G_INSERT
:
539 return narrowScalarInsert(MI
, TypeIdx
, NarrowTy
);
540 case TargetOpcode::G_LOAD
: {
541 const auto &MMO
= **MI
.memoperands_begin();
542 unsigned DstReg
= MI
.getOperand(0).getReg();
543 LLT DstTy
= MRI
.getType(DstReg
);
544 if (DstTy
.isVector())
545 return UnableToLegalize
;
547 if (8 * MMO
.getSize() != DstTy
.getSizeInBits()) {
548 unsigned TmpReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
549 auto &MMO
= **MI
.memoperands_begin();
550 MIRBuilder
.buildLoad(TmpReg
, MI
.getOperand(1).getReg(), MMO
);
551 MIRBuilder
.buildAnyExt(DstReg
, TmpReg
);
552 MI
.eraseFromParent();
556 return reduceLoadStoreWidth(MI
, TypeIdx
, NarrowTy
);
558 case TargetOpcode::G_ZEXTLOAD
:
559 case TargetOpcode::G_SEXTLOAD
: {
560 bool ZExt
= MI
.getOpcode() == TargetOpcode::G_ZEXTLOAD
;
561 unsigned DstReg
= MI
.getOperand(0).getReg();
562 unsigned PtrReg
= MI
.getOperand(1).getReg();
564 unsigned TmpReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
565 auto &MMO
= **MI
.memoperands_begin();
566 if (MMO
.getSize() * 8 == NarrowSize
) {
567 MIRBuilder
.buildLoad(TmpReg
, PtrReg
, MMO
);
569 unsigned ExtLoad
= ZExt
? TargetOpcode::G_ZEXTLOAD
570 : TargetOpcode::G_SEXTLOAD
;
571 MIRBuilder
.buildInstr(ExtLoad
)
574 .addMemOperand(&MMO
);
578 MIRBuilder
.buildZExt(DstReg
, TmpReg
);
580 MIRBuilder
.buildSExt(DstReg
, TmpReg
);
582 MI
.eraseFromParent();
585 case TargetOpcode::G_STORE
: {
586 const auto &MMO
= **MI
.memoperands_begin();
588 unsigned SrcReg
= MI
.getOperand(0).getReg();
589 LLT SrcTy
= MRI
.getType(SrcReg
);
590 if (SrcTy
.isVector())
591 return UnableToLegalize
;
593 int NumParts
= SizeOp0
/ NarrowSize
;
594 unsigned HandledSize
= NumParts
* NarrowTy
.getSizeInBits();
595 unsigned LeftoverBits
= SrcTy
.getSizeInBits() - HandledSize
;
596 if (SrcTy
.isVector() && LeftoverBits
!= 0)
597 return UnableToLegalize
;
599 if (8 * MMO
.getSize() != SrcTy
.getSizeInBits()) {
600 unsigned TmpReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
601 auto &MMO
= **MI
.memoperands_begin();
602 MIRBuilder
.buildTrunc(TmpReg
, SrcReg
);
603 MIRBuilder
.buildStore(TmpReg
, MI
.getOperand(1).getReg(), MMO
);
604 MI
.eraseFromParent();
608 return reduceLoadStoreWidth(MI
, 0, NarrowTy
);
610 case TargetOpcode::G_CONSTANT
: {
611 // FIXME: add support for when SizeOp0 isn't an exact multiple of
613 if (SizeOp0
% NarrowSize
!= 0)
614 return UnableToLegalize
;
615 int NumParts
= SizeOp0
/ NarrowSize
;
616 const APInt
&Cst
= MI
.getOperand(1).getCImm()->getValue();
617 LLVMContext
&Ctx
= MIRBuilder
.getMF().getFunction().getContext();
619 SmallVector
<unsigned, 2> DstRegs
;
620 for (int i
= 0; i
< NumParts
; ++i
) {
621 unsigned DstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
623 ConstantInt::get(Ctx
, Cst
.lshr(NarrowSize
* i
).trunc(NarrowSize
));
624 MIRBuilder
.buildConstant(DstReg
, *CI
);
625 DstRegs
.push_back(DstReg
);
627 unsigned DstReg
= MI
.getOperand(0).getReg();
628 if(MRI
.getType(DstReg
).isVector())
629 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
631 MIRBuilder
.buildMerge(DstReg
, DstRegs
);
632 MI
.eraseFromParent();
635 case TargetOpcode::G_SELECT
:
636 return narrowScalarSelect(MI
, TypeIdx
, NarrowTy
);
637 case TargetOpcode::G_AND
:
638 case TargetOpcode::G_OR
:
639 case TargetOpcode::G_XOR
: {
640 // Legalize bitwise operation:
641 // A = BinOp<Ty> B, C
643 // B1, ..., BN = G_UNMERGE_VALUES B
644 // C1, ..., CN = G_UNMERGE_VALUES C
645 // A1 = BinOp<Ty/N> B1, C2
647 // AN = BinOp<Ty/N> BN, CN
648 // A = G_MERGE_VALUES A1, ..., AN
650 // FIXME: add support for when SizeOp0 isn't an exact multiple of
652 if (SizeOp0
% NarrowSize
!= 0)
653 return UnableToLegalize
;
654 int NumParts
= SizeOp0
/ NarrowSize
;
656 // List the registers where the destination will be scattered.
657 SmallVector
<unsigned, 2> DstRegs
;
658 // List the registers where the first argument will be split.
659 SmallVector
<unsigned, 2> SrcsReg1
;
660 // List the registers where the second argument will be split.
661 SmallVector
<unsigned, 2> SrcsReg2
;
662 // Create all the temporary registers.
663 for (int i
= 0; i
< NumParts
; ++i
) {
664 unsigned DstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
665 unsigned SrcReg1
= MRI
.createGenericVirtualRegister(NarrowTy
);
666 unsigned SrcReg2
= MRI
.createGenericVirtualRegister(NarrowTy
);
668 DstRegs
.push_back(DstReg
);
669 SrcsReg1
.push_back(SrcReg1
);
670 SrcsReg2
.push_back(SrcReg2
);
672 // Explode the big arguments into smaller chunks.
673 MIRBuilder
.buildUnmerge(SrcsReg1
, MI
.getOperand(1).getReg());
674 MIRBuilder
.buildUnmerge(SrcsReg2
, MI
.getOperand(2).getReg());
676 // Do the operation on each small part.
677 for (int i
= 0; i
< NumParts
; ++i
)
678 MIRBuilder
.buildInstr(MI
.getOpcode(), {DstRegs
[i
]},
679 {SrcsReg1
[i
], SrcsReg2
[i
]});
681 // Gather the destination registers into the final destination.
682 unsigned DstReg
= MI
.getOperand(0).getReg();
683 if(MRI
.getType(DstReg
).isVector())
684 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
686 MIRBuilder
.buildMerge(DstReg
, DstRegs
);
687 MI
.eraseFromParent();
690 case TargetOpcode::G_SHL
:
691 case TargetOpcode::G_LSHR
:
692 case TargetOpcode::G_ASHR
:
693 return narrowScalarShift(MI
, TypeIdx
, NarrowTy
);
694 case TargetOpcode::G_CTLZ
:
695 case TargetOpcode::G_CTLZ_ZERO_UNDEF
:
696 case TargetOpcode::G_CTTZ
:
697 case TargetOpcode::G_CTTZ_ZERO_UNDEF
:
698 case TargetOpcode::G_CTPOP
:
700 return UnableToLegalize
; // TODO
702 Observer
.changingInstr(MI
);
703 narrowScalarDst(MI
, NarrowTy
, 0, TargetOpcode::G_ZEXT
);
704 Observer
.changedInstr(MI
);
706 case TargetOpcode::G_INTTOPTR
:
708 return UnableToLegalize
;
710 Observer
.changingInstr(MI
);
711 narrowScalarSrc(MI
, NarrowTy
, 1);
712 Observer
.changedInstr(MI
);
714 case TargetOpcode::G_PTRTOINT
:
716 return UnableToLegalize
;
718 Observer
.changingInstr(MI
);
719 narrowScalarDst(MI
, NarrowTy
, 0, TargetOpcode::G_ZEXT
);
720 Observer
.changedInstr(MI
);
725 void LegalizerHelper::widenScalarSrc(MachineInstr
&MI
, LLT WideTy
,
726 unsigned OpIdx
, unsigned ExtOpcode
) {
727 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
728 auto ExtB
= MIRBuilder
.buildInstr(ExtOpcode
, {WideTy
}, {MO
.getReg()});
729 MO
.setReg(ExtB
->getOperand(0).getReg());
732 void LegalizerHelper::narrowScalarSrc(MachineInstr
&MI
, LLT NarrowTy
,
734 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
735 auto ExtB
= MIRBuilder
.buildInstr(TargetOpcode::G_TRUNC
, {NarrowTy
},
737 MO
.setReg(ExtB
->getOperand(0).getReg());
740 void LegalizerHelper::widenScalarDst(MachineInstr
&MI
, LLT WideTy
,
741 unsigned OpIdx
, unsigned TruncOpcode
) {
742 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
743 unsigned DstExt
= MRI
.createGenericVirtualRegister(WideTy
);
744 MIRBuilder
.setInsertPt(MIRBuilder
.getMBB(), ++MIRBuilder
.getInsertPt());
745 MIRBuilder
.buildInstr(TruncOpcode
, {MO
.getReg()}, {DstExt
});
749 void LegalizerHelper::narrowScalarDst(MachineInstr
&MI
, LLT NarrowTy
,
750 unsigned OpIdx
, unsigned ExtOpcode
) {
751 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
752 unsigned DstTrunc
= MRI
.createGenericVirtualRegister(NarrowTy
);
753 MIRBuilder
.setInsertPt(MIRBuilder
.getMBB(), ++MIRBuilder
.getInsertPt());
754 MIRBuilder
.buildInstr(ExtOpcode
, {MO
.getReg()}, {DstTrunc
});
758 void LegalizerHelper::moreElementsVectorDst(MachineInstr
&MI
, LLT WideTy
,
760 MachineOperand
&MO
= MI
.getOperand(OpIdx
);
761 unsigned DstExt
= MRI
.createGenericVirtualRegister(WideTy
);
762 MIRBuilder
.setInsertPt(MIRBuilder
.getMBB(), ++MIRBuilder
.getInsertPt());
763 MIRBuilder
.buildExtract(MO
.getReg(), DstExt
, 0);
767 LegalizerHelper::LegalizeResult
768 LegalizerHelper::widenScalarMergeValues(MachineInstr
&MI
, unsigned TypeIdx
,
771 return UnableToLegalize
;
773 unsigned DstReg
= MI
.getOperand(0).getReg();
774 LLT DstTy
= MRI
.getType(DstReg
);
775 if (!DstTy
.isScalar())
776 return UnableToLegalize
;
778 unsigned NumOps
= MI
.getNumOperands();
779 unsigned NumSrc
= MI
.getNumOperands() - 1;
780 unsigned PartSize
= DstTy
.getSizeInBits() / NumSrc
;
782 unsigned Src1
= MI
.getOperand(1).getReg();
783 unsigned ResultReg
= MIRBuilder
.buildZExt(DstTy
, Src1
)->getOperand(0).getReg();
785 for (unsigned I
= 2; I
!= NumOps
; ++I
) {
786 const unsigned Offset
= (I
- 1) * PartSize
;
788 unsigned SrcReg
= MI
.getOperand(I
).getReg();
789 assert(MRI
.getType(SrcReg
) == LLT::scalar(PartSize
));
791 auto ZextInput
= MIRBuilder
.buildZExt(DstTy
, SrcReg
);
793 unsigned NextResult
= I
+ 1 == NumOps
? DstReg
:
794 MRI
.createGenericVirtualRegister(DstTy
);
796 auto ShiftAmt
= MIRBuilder
.buildConstant(DstTy
, Offset
);
797 auto Shl
= MIRBuilder
.buildShl(DstTy
, ZextInput
, ShiftAmt
);
798 MIRBuilder
.buildOr(NextResult
, ResultReg
, Shl
);
799 ResultReg
= NextResult
;
802 MI
.eraseFromParent();
806 LegalizerHelper::LegalizeResult
807 LegalizerHelper::widenScalarUnmergeValues(MachineInstr
&MI
, unsigned TypeIdx
,
810 return UnableToLegalize
;
812 unsigned NumDst
= MI
.getNumOperands() - 1;
813 unsigned SrcReg
= MI
.getOperand(NumDst
).getReg();
814 LLT SrcTy
= MRI
.getType(SrcReg
);
815 if (!SrcTy
.isScalar())
816 return UnableToLegalize
;
818 unsigned Dst0Reg
= MI
.getOperand(0).getReg();
819 LLT DstTy
= MRI
.getType(Dst0Reg
);
820 if (!DstTy
.isScalar())
821 return UnableToLegalize
;
823 unsigned NewSrcSize
= NumDst
* WideTy
.getSizeInBits();
824 LLT NewSrcTy
= LLT::scalar(NewSrcSize
);
825 unsigned SizeDiff
= WideTy
.getSizeInBits() - DstTy
.getSizeInBits();
827 auto WideSrc
= MIRBuilder
.buildZExt(NewSrcTy
, SrcReg
);
829 for (unsigned I
= 1; I
!= NumDst
; ++I
) {
830 auto ShiftAmt
= MIRBuilder
.buildConstant(NewSrcTy
, SizeDiff
* I
);
831 auto Shl
= MIRBuilder
.buildShl(NewSrcTy
, WideSrc
, ShiftAmt
);
832 WideSrc
= MIRBuilder
.buildOr(NewSrcTy
, WideSrc
, Shl
);
835 Observer
.changingInstr(MI
);
837 MI
.getOperand(NumDst
).setReg(WideSrc
->getOperand(0).getReg());
838 for (unsigned I
= 0; I
!= NumDst
; ++I
)
839 widenScalarDst(MI
, WideTy
, I
);
841 Observer
.changedInstr(MI
);
846 LegalizerHelper::LegalizeResult
847 LegalizerHelper::widenScalarExtract(MachineInstr
&MI
, unsigned TypeIdx
,
850 return UnableToLegalize
;
852 unsigned SrcReg
= MI
.getOperand(1).getReg();
853 LLT SrcTy
= MRI
.getType(SrcReg
);
854 if (!SrcTy
.isVector())
855 return UnableToLegalize
;
857 unsigned DstReg
= MI
.getOperand(0).getReg();
858 LLT DstTy
= MRI
.getType(DstReg
);
859 if (DstTy
!= SrcTy
.getElementType())
860 return UnableToLegalize
;
862 unsigned Offset
= MI
.getOperand(2).getImm();
863 if (Offset
% SrcTy
.getScalarSizeInBits() != 0)
864 return UnableToLegalize
;
866 Observer
.changingInstr(MI
);
867 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
869 MI
.getOperand(2).setImm((WideTy
.getSizeInBits() / SrcTy
.getSizeInBits()) *
871 widenScalarDst(MI
, WideTy
.getScalarType(), 0);
872 Observer
.changedInstr(MI
);
876 LegalizerHelper::LegalizeResult
877 LegalizerHelper::widenScalarInsert(MachineInstr
&MI
, unsigned TypeIdx
,
880 return UnableToLegalize
;
881 Observer
.changingInstr(MI
);
882 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
883 widenScalarDst(MI
, WideTy
);
884 Observer
.changedInstr(MI
);
888 LegalizerHelper::LegalizeResult
889 LegalizerHelper::widenScalar(MachineInstr
&MI
, unsigned TypeIdx
, LLT WideTy
) {
890 MIRBuilder
.setInstr(MI
);
892 switch (MI
.getOpcode()) {
894 return UnableToLegalize
;
895 case TargetOpcode::G_EXTRACT
:
896 return widenScalarExtract(MI
, TypeIdx
, WideTy
);
897 case TargetOpcode::G_INSERT
:
898 return widenScalarInsert(MI
, TypeIdx
, WideTy
);
899 case TargetOpcode::G_MERGE_VALUES
:
900 return widenScalarMergeValues(MI
, TypeIdx
, WideTy
);
901 case TargetOpcode::G_UNMERGE_VALUES
:
902 return widenScalarUnmergeValues(MI
, TypeIdx
, WideTy
);
903 case TargetOpcode::G_UADDO
:
904 case TargetOpcode::G_USUBO
: {
906 return UnableToLegalize
; // TODO
907 auto LHSZext
= MIRBuilder
.buildInstr(TargetOpcode::G_ZEXT
, {WideTy
},
908 {MI
.getOperand(2).getReg()});
909 auto RHSZext
= MIRBuilder
.buildInstr(TargetOpcode::G_ZEXT
, {WideTy
},
910 {MI
.getOperand(3).getReg()});
911 unsigned Opcode
= MI
.getOpcode() == TargetOpcode::G_UADDO
912 ? TargetOpcode::G_ADD
913 : TargetOpcode::G_SUB
;
914 // Do the arithmetic in the larger type.
915 auto NewOp
= MIRBuilder
.buildInstr(Opcode
, {WideTy
}, {LHSZext
, RHSZext
});
916 LLT OrigTy
= MRI
.getType(MI
.getOperand(0).getReg());
917 APInt Mask
= APInt::getAllOnesValue(OrigTy
.getSizeInBits());
918 auto AndOp
= MIRBuilder
.buildInstr(
919 TargetOpcode::G_AND
, {WideTy
},
920 {NewOp
, MIRBuilder
.buildConstant(WideTy
, Mask
.getZExtValue())});
921 // There is no overflow if the AndOp is the same as NewOp.
922 MIRBuilder
.buildICmp(CmpInst::ICMP_NE
, MI
.getOperand(1).getReg(), NewOp
,
924 // Now trunc the NewOp to the original result.
925 MIRBuilder
.buildTrunc(MI
.getOperand(0).getReg(), NewOp
);
926 MI
.eraseFromParent();
929 case TargetOpcode::G_CTTZ
:
930 case TargetOpcode::G_CTTZ_ZERO_UNDEF
:
931 case TargetOpcode::G_CTLZ
:
932 case TargetOpcode::G_CTLZ_ZERO_UNDEF
:
933 case TargetOpcode::G_CTPOP
: {
935 Observer
.changingInstr(MI
);
936 widenScalarDst(MI
, WideTy
, 0);
937 Observer
.changedInstr(MI
);
941 unsigned SrcReg
= MI
.getOperand(1).getReg();
943 // First ZEXT the input.
944 auto MIBSrc
= MIRBuilder
.buildZExt(WideTy
, SrcReg
);
945 LLT CurTy
= MRI
.getType(SrcReg
);
946 if (MI
.getOpcode() == TargetOpcode::G_CTTZ
) {
947 // The count is the same in the larger type except if the original
948 // value was zero. This can be handled by setting the bit just off
949 // the top of the original type.
951 APInt::getOneBitSet(WideTy
.getSizeInBits(), CurTy
.getSizeInBits());
952 MIBSrc
= MIRBuilder
.buildOr(
953 WideTy
, MIBSrc
, MIRBuilder
.buildConstant(WideTy
, TopBit
));
956 // Perform the operation at the larger size.
957 auto MIBNewOp
= MIRBuilder
.buildInstr(MI
.getOpcode(), {WideTy
}, {MIBSrc
});
958 // This is already the correct result for CTPOP and CTTZs
959 if (MI
.getOpcode() == TargetOpcode::G_CTLZ
||
960 MI
.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF
) {
961 // The correct result is NewOp - (Difference in widety and current ty).
962 unsigned SizeDiff
= WideTy
.getSizeInBits() - CurTy
.getSizeInBits();
963 MIBNewOp
= MIRBuilder
.buildInstr(
964 TargetOpcode::G_SUB
, {WideTy
},
965 {MIBNewOp
, MIRBuilder
.buildConstant(WideTy
, SizeDiff
)});
968 MIRBuilder
.buildZExtOrTrunc(MI
.getOperand(0), MIBNewOp
);
969 MI
.eraseFromParent();
972 case TargetOpcode::G_BSWAP
: {
973 Observer
.changingInstr(MI
);
974 unsigned DstReg
= MI
.getOperand(0).getReg();
976 unsigned ShrReg
= MRI
.createGenericVirtualRegister(WideTy
);
977 unsigned DstExt
= MRI
.createGenericVirtualRegister(WideTy
);
978 unsigned ShiftAmtReg
= MRI
.createGenericVirtualRegister(WideTy
);
979 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
981 MI
.getOperand(0).setReg(DstExt
);
983 MIRBuilder
.setInsertPt(MIRBuilder
.getMBB(), ++MIRBuilder
.getInsertPt());
985 LLT Ty
= MRI
.getType(DstReg
);
986 unsigned DiffBits
= WideTy
.getScalarSizeInBits() - Ty
.getScalarSizeInBits();
987 MIRBuilder
.buildConstant(ShiftAmtReg
, DiffBits
);
988 MIRBuilder
.buildInstr(TargetOpcode::G_LSHR
)
991 .addUse(ShiftAmtReg
);
993 MIRBuilder
.buildTrunc(DstReg
, ShrReg
);
994 Observer
.changedInstr(MI
);
997 case TargetOpcode::G_ADD
:
998 case TargetOpcode::G_AND
:
999 case TargetOpcode::G_MUL
:
1000 case TargetOpcode::G_OR
:
1001 case TargetOpcode::G_XOR
:
1002 case TargetOpcode::G_SUB
:
1003 // Perform operation at larger width (any extension is fines here, high bits
1004 // don't affect the result) and then truncate the result back to the
1006 Observer
.changingInstr(MI
);
1007 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
1008 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ANYEXT
);
1009 widenScalarDst(MI
, WideTy
);
1010 Observer
.changedInstr(MI
);
1013 case TargetOpcode::G_SHL
:
1014 Observer
.changingInstr(MI
);
1017 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ANYEXT
);
1018 widenScalarDst(MI
, WideTy
);
1020 assert(TypeIdx
== 1);
1021 // The "number of bits to shift" operand must preserve its value as an
1022 // unsigned integer:
1023 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ZEXT
);
1026 Observer
.changedInstr(MI
);
1029 case TargetOpcode::G_SDIV
:
1030 case TargetOpcode::G_SREM
:
1031 Observer
.changingInstr(MI
);
1032 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_SEXT
);
1033 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_SEXT
);
1034 widenScalarDst(MI
, WideTy
);
1035 Observer
.changedInstr(MI
);
1038 case TargetOpcode::G_ASHR
:
1039 case TargetOpcode::G_LSHR
:
1040 Observer
.changingInstr(MI
);
1043 unsigned CvtOp
= MI
.getOpcode() == TargetOpcode::G_ASHR
?
1044 TargetOpcode::G_SEXT
: TargetOpcode::G_ZEXT
;
1046 widenScalarSrc(MI
, WideTy
, 1, CvtOp
);
1047 widenScalarDst(MI
, WideTy
);
1049 assert(TypeIdx
== 1);
1050 // The "number of bits to shift" operand must preserve its value as an
1051 // unsigned integer:
1052 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ZEXT
);
1055 Observer
.changedInstr(MI
);
1057 case TargetOpcode::G_UDIV
:
1058 case TargetOpcode::G_UREM
:
1059 Observer
.changingInstr(MI
);
1060 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ZEXT
);
1061 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ZEXT
);
1062 widenScalarDst(MI
, WideTy
);
1063 Observer
.changedInstr(MI
);
1066 case TargetOpcode::G_SELECT
:
1067 Observer
.changingInstr(MI
);
1069 // Perform operation at larger width (any extension is fine here, high
1070 // bits don't affect the result) and then truncate the result back to the
1072 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_ANYEXT
);
1073 widenScalarSrc(MI
, WideTy
, 3, TargetOpcode::G_ANYEXT
);
1074 widenScalarDst(MI
, WideTy
);
1076 bool IsVec
= MRI
.getType(MI
.getOperand(1).getReg()).isVector();
1077 // Explicit extension is required here since high bits affect the result.
1078 widenScalarSrc(MI
, WideTy
, 1, MIRBuilder
.getBoolExtOp(IsVec
, false));
1080 Observer
.changedInstr(MI
);
1083 case TargetOpcode::G_FPTOSI
:
1084 case TargetOpcode::G_FPTOUI
:
1086 return UnableToLegalize
;
1087 Observer
.changingInstr(MI
);
1088 widenScalarDst(MI
, WideTy
);
1089 Observer
.changedInstr(MI
);
1092 case TargetOpcode::G_SITOFP
:
1094 return UnableToLegalize
;
1095 Observer
.changingInstr(MI
);
1096 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_SEXT
);
1097 Observer
.changedInstr(MI
);
1100 case TargetOpcode::G_UITOFP
:
1102 return UnableToLegalize
;
1103 Observer
.changingInstr(MI
);
1104 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ZEXT
);
1105 Observer
.changedInstr(MI
);
1108 case TargetOpcode::G_LOAD
:
1109 case TargetOpcode::G_SEXTLOAD
:
1110 case TargetOpcode::G_ZEXTLOAD
:
1111 Observer
.changingInstr(MI
);
1112 widenScalarDst(MI
, WideTy
);
1113 Observer
.changedInstr(MI
);
1116 case TargetOpcode::G_STORE
: {
1118 return UnableToLegalize
;
1120 LLT Ty
= MRI
.getType(MI
.getOperand(0).getReg());
1121 if (!isPowerOf2_32(Ty
.getSizeInBits()))
1122 return UnableToLegalize
;
1124 Observer
.changingInstr(MI
);
1126 unsigned ExtType
= Ty
.getScalarSizeInBits() == 1 ?
1127 TargetOpcode::G_ZEXT
: TargetOpcode::G_ANYEXT
;
1128 widenScalarSrc(MI
, WideTy
, 0, ExtType
);
1130 Observer
.changedInstr(MI
);
1133 case TargetOpcode::G_CONSTANT
: {
1134 MachineOperand
&SrcMO
= MI
.getOperand(1);
1135 LLVMContext
&Ctx
= MIRBuilder
.getMF().getFunction().getContext();
1136 const APInt
&Val
= SrcMO
.getCImm()->getValue().sext(WideTy
.getSizeInBits());
1137 Observer
.changingInstr(MI
);
1138 SrcMO
.setCImm(ConstantInt::get(Ctx
, Val
));
1140 widenScalarDst(MI
, WideTy
);
1141 Observer
.changedInstr(MI
);
1144 case TargetOpcode::G_FCONSTANT
: {
1145 MachineOperand
&SrcMO
= MI
.getOperand(1);
1146 LLVMContext
&Ctx
= MIRBuilder
.getMF().getFunction().getContext();
1147 APFloat Val
= SrcMO
.getFPImm()->getValueAPF();
1149 switch (WideTy
.getSizeInBits()) {
1151 Val
.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven
,
1155 Val
.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven
,
1159 return UnableToLegalize
;
1162 assert(!LosesInfo
&& "extend should always be lossless");
1164 Observer
.changingInstr(MI
);
1165 SrcMO
.setFPImm(ConstantFP::get(Ctx
, Val
));
1167 widenScalarDst(MI
, WideTy
, 0, TargetOpcode::G_FPTRUNC
);
1168 Observer
.changedInstr(MI
);
1171 case TargetOpcode::G_IMPLICIT_DEF
: {
1172 Observer
.changingInstr(MI
);
1173 widenScalarDst(MI
, WideTy
);
1174 Observer
.changedInstr(MI
);
1177 case TargetOpcode::G_BRCOND
:
1178 Observer
.changingInstr(MI
);
1179 widenScalarSrc(MI
, WideTy
, 0, MIRBuilder
.getBoolExtOp(false, false));
1180 Observer
.changedInstr(MI
);
1183 case TargetOpcode::G_FCMP
:
1184 Observer
.changingInstr(MI
);
1186 widenScalarDst(MI
, WideTy
);
1188 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_FPEXT
);
1189 widenScalarSrc(MI
, WideTy
, 3, TargetOpcode::G_FPEXT
);
1191 Observer
.changedInstr(MI
);
1194 case TargetOpcode::G_ICMP
:
1195 Observer
.changingInstr(MI
);
1197 widenScalarDst(MI
, WideTy
);
1199 unsigned ExtOpcode
= CmpInst::isSigned(static_cast<CmpInst::Predicate
>(
1200 MI
.getOperand(1).getPredicate()))
1201 ? TargetOpcode::G_SEXT
1202 : TargetOpcode::G_ZEXT
;
1203 widenScalarSrc(MI
, WideTy
, 2, ExtOpcode
);
1204 widenScalarSrc(MI
, WideTy
, 3, ExtOpcode
);
1206 Observer
.changedInstr(MI
);
1209 case TargetOpcode::G_GEP
:
1210 assert(TypeIdx
== 1 && "unable to legalize pointer of GEP");
1211 Observer
.changingInstr(MI
);
1212 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_SEXT
);
1213 Observer
.changedInstr(MI
);
1216 case TargetOpcode::G_PHI
: {
1217 assert(TypeIdx
== 0 && "Expecting only Idx 0");
1219 Observer
.changingInstr(MI
);
1220 for (unsigned I
= 1; I
< MI
.getNumOperands(); I
+= 2) {
1221 MachineBasicBlock
&OpMBB
= *MI
.getOperand(I
+ 1).getMBB();
1222 MIRBuilder
.setInsertPt(OpMBB
, OpMBB
.getFirstTerminator());
1223 widenScalarSrc(MI
, WideTy
, I
, TargetOpcode::G_ANYEXT
);
1226 MachineBasicBlock
&MBB
= *MI
.getParent();
1227 MIRBuilder
.setInsertPt(MBB
, --MBB
.getFirstNonPHI());
1228 widenScalarDst(MI
, WideTy
);
1229 Observer
.changedInstr(MI
);
1232 case TargetOpcode::G_EXTRACT_VECTOR_ELT
: {
1234 unsigned VecReg
= MI
.getOperand(1).getReg();
1235 LLT VecTy
= MRI
.getType(VecReg
);
1236 Observer
.changingInstr(MI
);
1238 widenScalarSrc(MI
, LLT::vector(VecTy
.getNumElements(),
1239 WideTy
.getSizeInBits()),
1240 1, TargetOpcode::G_SEXT
);
1242 widenScalarDst(MI
, WideTy
, 0);
1243 Observer
.changedInstr(MI
);
1248 return UnableToLegalize
;
1249 Observer
.changingInstr(MI
);
1250 widenScalarSrc(MI
, WideTy
, 2, TargetOpcode::G_SEXT
);
1251 Observer
.changedInstr(MI
);
1254 case TargetOpcode::G_FADD
:
1255 case TargetOpcode::G_FMUL
:
1256 case TargetOpcode::G_FSUB
:
1257 case TargetOpcode::G_FMA
:
1258 case TargetOpcode::G_FNEG
:
1259 case TargetOpcode::G_FABS
:
1260 case TargetOpcode::G_FCANONICALIZE
:
1261 case TargetOpcode::G_FDIV
:
1262 case TargetOpcode::G_FREM
:
1263 case TargetOpcode::G_FCEIL
:
1264 case TargetOpcode::G_FFLOOR
:
1265 case TargetOpcode::G_FCOS
:
1266 case TargetOpcode::G_FSIN
:
1267 case TargetOpcode::G_FLOG10
:
1268 case TargetOpcode::G_FLOG
:
1269 case TargetOpcode::G_FLOG2
:
1270 case TargetOpcode::G_FSQRT
:
1271 case TargetOpcode::G_FEXP
:
1272 assert(TypeIdx
== 0);
1273 Observer
.changingInstr(MI
);
1275 for (unsigned I
= 1, E
= MI
.getNumOperands(); I
!= E
; ++I
)
1276 widenScalarSrc(MI
, WideTy
, I
, TargetOpcode::G_FPEXT
);
1278 widenScalarDst(MI
, WideTy
, 0, TargetOpcode::G_FPTRUNC
);
1279 Observer
.changedInstr(MI
);
1281 case TargetOpcode::G_INTTOPTR
:
1283 return UnableToLegalize
;
1285 Observer
.changingInstr(MI
);
1286 widenScalarSrc(MI
, WideTy
, 1, TargetOpcode::G_ZEXT
);
1287 Observer
.changedInstr(MI
);
1289 case TargetOpcode::G_PTRTOINT
:
1291 return UnableToLegalize
;
1293 Observer
.changingInstr(MI
);
1294 widenScalarDst(MI
, WideTy
, 0);
1295 Observer
.changedInstr(MI
);
1300 LegalizerHelper::LegalizeResult
1301 LegalizerHelper::lower(MachineInstr
&MI
, unsigned TypeIdx
, LLT Ty
) {
1302 using namespace TargetOpcode
;
1303 MIRBuilder
.setInstr(MI
);
1305 switch(MI
.getOpcode()) {
1307 return UnableToLegalize
;
1308 case TargetOpcode::G_SREM
:
1309 case TargetOpcode::G_UREM
: {
1310 unsigned QuotReg
= MRI
.createGenericVirtualRegister(Ty
);
1311 MIRBuilder
.buildInstr(MI
.getOpcode() == G_SREM
? G_SDIV
: G_UDIV
)
1313 .addUse(MI
.getOperand(1).getReg())
1314 .addUse(MI
.getOperand(2).getReg());
1316 unsigned ProdReg
= MRI
.createGenericVirtualRegister(Ty
);
1317 MIRBuilder
.buildMul(ProdReg
, QuotReg
, MI
.getOperand(2).getReg());
1318 MIRBuilder
.buildSub(MI
.getOperand(0).getReg(), MI
.getOperand(1).getReg(),
1320 MI
.eraseFromParent();
1323 case TargetOpcode::G_SMULO
:
1324 case TargetOpcode::G_UMULO
: {
1325 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
1327 unsigned Res
= MI
.getOperand(0).getReg();
1328 unsigned Overflow
= MI
.getOperand(1).getReg();
1329 unsigned LHS
= MI
.getOperand(2).getReg();
1330 unsigned RHS
= MI
.getOperand(3).getReg();
1332 MIRBuilder
.buildMul(Res
, LHS
, RHS
);
1334 unsigned Opcode
= MI
.getOpcode() == TargetOpcode::G_SMULO
1335 ? TargetOpcode::G_SMULH
1336 : TargetOpcode::G_UMULH
;
1338 unsigned HiPart
= MRI
.createGenericVirtualRegister(Ty
);
1339 MIRBuilder
.buildInstr(Opcode
)
1344 unsigned Zero
= MRI
.createGenericVirtualRegister(Ty
);
1345 MIRBuilder
.buildConstant(Zero
, 0);
1347 // For *signed* multiply, overflow is detected by checking:
1348 // (hi != (lo >> bitwidth-1))
1349 if (Opcode
== TargetOpcode::G_SMULH
) {
1350 unsigned Shifted
= MRI
.createGenericVirtualRegister(Ty
);
1351 unsigned ShiftAmt
= MRI
.createGenericVirtualRegister(Ty
);
1352 MIRBuilder
.buildConstant(ShiftAmt
, Ty
.getSizeInBits() - 1);
1353 MIRBuilder
.buildInstr(TargetOpcode::G_ASHR
)
1357 MIRBuilder
.buildICmp(CmpInst::ICMP_NE
, Overflow
, HiPart
, Shifted
);
1359 MIRBuilder
.buildICmp(CmpInst::ICMP_NE
, Overflow
, HiPart
, Zero
);
1361 MI
.eraseFromParent();
1364 case TargetOpcode::G_FNEG
: {
1365 // TODO: Handle vector types once we are able to
1368 return UnableToLegalize
;
1369 unsigned Res
= MI
.getOperand(0).getReg();
1371 LLVMContext
&Ctx
= MIRBuilder
.getMF().getFunction().getContext();
1372 switch (Ty
.getSizeInBits()) {
1374 ZeroTy
= Type::getHalfTy(Ctx
);
1377 ZeroTy
= Type::getFloatTy(Ctx
);
1380 ZeroTy
= Type::getDoubleTy(Ctx
);
1383 ZeroTy
= Type::getFP128Ty(Ctx
);
1386 llvm_unreachable("unexpected floating-point type");
1388 ConstantFP
&ZeroForNegation
=
1389 *cast
<ConstantFP
>(ConstantFP::getZeroValueForNegation(ZeroTy
));
1390 auto Zero
= MIRBuilder
.buildFConstant(Ty
, ZeroForNegation
);
1391 MIRBuilder
.buildInstr(TargetOpcode::G_FSUB
)
1393 .addUse(Zero
->getOperand(0).getReg())
1394 .addUse(MI
.getOperand(1).getReg());
1395 MI
.eraseFromParent();
1398 case TargetOpcode::G_FSUB
: {
1399 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
1400 // First, check if G_FNEG is marked as Lower. If so, we may
1401 // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
1402 if (LI
.getAction({G_FNEG
, {Ty
}}).Action
== Lower
)
1403 return UnableToLegalize
;
1404 unsigned Res
= MI
.getOperand(0).getReg();
1405 unsigned LHS
= MI
.getOperand(1).getReg();
1406 unsigned RHS
= MI
.getOperand(2).getReg();
1407 unsigned Neg
= MRI
.createGenericVirtualRegister(Ty
);
1408 MIRBuilder
.buildInstr(TargetOpcode::G_FNEG
).addDef(Neg
).addUse(RHS
);
1409 MIRBuilder
.buildInstr(TargetOpcode::G_FADD
)
1413 MI
.eraseFromParent();
1416 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS
: {
1417 unsigned OldValRes
= MI
.getOperand(0).getReg();
1418 unsigned SuccessRes
= MI
.getOperand(1).getReg();
1419 unsigned Addr
= MI
.getOperand(2).getReg();
1420 unsigned CmpVal
= MI
.getOperand(3).getReg();
1421 unsigned NewVal
= MI
.getOperand(4).getReg();
1422 MIRBuilder
.buildAtomicCmpXchg(OldValRes
, Addr
, CmpVal
, NewVal
,
1423 **MI
.memoperands_begin());
1424 MIRBuilder
.buildICmp(CmpInst::ICMP_EQ
, SuccessRes
, OldValRes
, CmpVal
);
1425 MI
.eraseFromParent();
1428 case TargetOpcode::G_LOAD
:
1429 case TargetOpcode::G_SEXTLOAD
:
1430 case TargetOpcode::G_ZEXTLOAD
: {
1431 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
1432 unsigned DstReg
= MI
.getOperand(0).getReg();
1433 unsigned PtrReg
= MI
.getOperand(1).getReg();
1434 LLT DstTy
= MRI
.getType(DstReg
);
1435 auto &MMO
= **MI
.memoperands_begin();
1437 if (DstTy
.getSizeInBits() == MMO
.getSize() /* in bytes */ * 8) {
1438 // In the case of G_LOAD, this was a non-extending load already and we're
1439 // about to lower to the same instruction.
1440 if (MI
.getOpcode() == TargetOpcode::G_LOAD
)
1441 return UnableToLegalize
;
1442 MIRBuilder
.buildLoad(DstReg
, PtrReg
, MMO
);
1443 MI
.eraseFromParent();
1447 if (DstTy
.isScalar()) {
1448 unsigned TmpReg
= MRI
.createGenericVirtualRegister(
1449 LLT::scalar(MMO
.getSize() /* in bytes */ * 8));
1450 MIRBuilder
.buildLoad(TmpReg
, PtrReg
, MMO
);
1451 switch (MI
.getOpcode()) {
1453 llvm_unreachable("Unexpected opcode");
1454 case TargetOpcode::G_LOAD
:
1455 MIRBuilder
.buildAnyExt(DstReg
, TmpReg
);
1457 case TargetOpcode::G_SEXTLOAD
:
1458 MIRBuilder
.buildSExt(DstReg
, TmpReg
);
1460 case TargetOpcode::G_ZEXTLOAD
:
1461 MIRBuilder
.buildZExt(DstReg
, TmpReg
);
1464 MI
.eraseFromParent();
1468 return UnableToLegalize
;
1470 case TargetOpcode::G_CTLZ_ZERO_UNDEF
:
1471 case TargetOpcode::G_CTTZ_ZERO_UNDEF
:
1472 case TargetOpcode::G_CTLZ
:
1473 case TargetOpcode::G_CTTZ
:
1474 case TargetOpcode::G_CTPOP
:
1475 return lowerBitCount(MI
, TypeIdx
, Ty
);
1477 unsigned Res
= MI
.getOperand(0).getReg();
1478 unsigned CarryOut
= MI
.getOperand(1).getReg();
1479 unsigned LHS
= MI
.getOperand(2).getReg();
1480 unsigned RHS
= MI
.getOperand(3).getReg();
1481 unsigned CarryIn
= MI
.getOperand(4).getReg();
1483 unsigned TmpRes
= MRI
.createGenericVirtualRegister(Ty
);
1484 unsigned ZExtCarryIn
= MRI
.createGenericVirtualRegister(Ty
);
1486 MIRBuilder
.buildAdd(TmpRes
, LHS
, RHS
);
1487 MIRBuilder
.buildZExt(ZExtCarryIn
, CarryIn
);
1488 MIRBuilder
.buildAdd(Res
, TmpRes
, ZExtCarryIn
);
1489 MIRBuilder
.buildICmp(CmpInst::ICMP_ULT
, CarryOut
, Res
, LHS
);
1491 MI
.eraseFromParent();
1495 unsigned Res
= MI
.getOperand(0).getReg();
1496 unsigned BorrowOut
= MI
.getOperand(1).getReg();
1497 unsigned LHS
= MI
.getOperand(2).getReg();
1498 unsigned RHS
= MI
.getOperand(3).getReg();
1500 MIRBuilder
.buildSub(Res
, LHS
, RHS
);
1501 MIRBuilder
.buildICmp(CmpInst::ICMP_ULT
, BorrowOut
, LHS
, RHS
);
1503 MI
.eraseFromParent();
1507 unsigned Res
= MI
.getOperand(0).getReg();
1508 unsigned BorrowOut
= MI
.getOperand(1).getReg();
1509 unsigned LHS
= MI
.getOperand(2).getReg();
1510 unsigned RHS
= MI
.getOperand(3).getReg();
1511 unsigned BorrowIn
= MI
.getOperand(4).getReg();
1513 unsigned TmpRes
= MRI
.createGenericVirtualRegister(Ty
);
1514 unsigned ZExtBorrowIn
= MRI
.createGenericVirtualRegister(Ty
);
1515 unsigned LHS_EQ_RHS
= MRI
.createGenericVirtualRegister(LLT::scalar(1));
1516 unsigned LHS_ULT_RHS
= MRI
.createGenericVirtualRegister(LLT::scalar(1));
1518 MIRBuilder
.buildSub(TmpRes
, LHS
, RHS
);
1519 MIRBuilder
.buildZExt(ZExtBorrowIn
, BorrowIn
);
1520 MIRBuilder
.buildSub(Res
, TmpRes
, ZExtBorrowIn
);
1521 MIRBuilder
.buildICmp(CmpInst::ICMP_EQ
, LHS_EQ_RHS
, LHS
, RHS
);
1522 MIRBuilder
.buildICmp(CmpInst::ICMP_ULT
, LHS_ULT_RHS
, LHS
, RHS
);
1523 MIRBuilder
.buildSelect(BorrowOut
, LHS_EQ_RHS
, BorrowIn
, LHS_ULT_RHS
);
1525 MI
.eraseFromParent();
1531 LegalizerHelper::LegalizeResult
LegalizerHelper::fewerElementsVectorImplicitDef(
1532 MachineInstr
&MI
, unsigned TypeIdx
, LLT NarrowTy
) {
1533 SmallVector
<unsigned, 2> DstRegs
;
1535 unsigned NarrowSize
= NarrowTy
.getSizeInBits();
1536 unsigned DstReg
= MI
.getOperand(0).getReg();
1537 unsigned Size
= MRI
.getType(DstReg
).getSizeInBits();
1538 int NumParts
= Size
/ NarrowSize
;
1539 // FIXME: Don't know how to handle the situation where the small vectors
1540 // aren't all the same size yet.
1541 if (Size
% NarrowSize
!= 0)
1542 return UnableToLegalize
;
1544 for (int i
= 0; i
< NumParts
; ++i
) {
1545 unsigned TmpReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
1546 MIRBuilder
.buildUndef(TmpReg
);
1547 DstRegs
.push_back(TmpReg
);
1550 if (NarrowTy
.isVector())
1551 MIRBuilder
.buildConcatVectors(DstReg
, DstRegs
);
1553 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
1555 MI
.eraseFromParent();
1559 LegalizerHelper::LegalizeResult
1560 LegalizerHelper::fewerElementsVectorBasic(MachineInstr
&MI
, unsigned TypeIdx
,
1562 const unsigned Opc
= MI
.getOpcode();
1563 const unsigned NumOps
= MI
.getNumOperands() - 1;
1564 const unsigned NarrowSize
= NarrowTy
.getSizeInBits();
1565 const unsigned DstReg
= MI
.getOperand(0).getReg();
1566 const unsigned Flags
= MI
.getFlags();
1567 const LLT DstTy
= MRI
.getType(DstReg
);
1568 const unsigned Size
= DstTy
.getSizeInBits();
1569 const int NumParts
= Size
/ NarrowSize
;
1570 const LLT EltTy
= DstTy
.getElementType();
1571 const unsigned EltSize
= EltTy
.getSizeInBits();
1572 const unsigned BitsForNumParts
= NarrowSize
* NumParts
;
1574 // Check if we have any leftovers. If we do, then only handle the case where
1575 // the leftover is one element.
1576 if (BitsForNumParts
!= Size
&& BitsForNumParts
+ EltSize
!= Size
)
1577 return UnableToLegalize
;
1579 if (BitsForNumParts
!= Size
) {
1580 unsigned AccumDstReg
= MRI
.createGenericVirtualRegister(DstTy
);
1581 MIRBuilder
.buildUndef(AccumDstReg
);
1583 // Handle the pieces which evenly divide into the requested type with
1584 // extract/op/insert sequence.
1585 for (unsigned Offset
= 0; Offset
< BitsForNumParts
; Offset
+= NarrowSize
) {
1586 SmallVector
<SrcOp
, 4> SrcOps
;
1587 for (unsigned I
= 1, E
= MI
.getNumOperands(); I
!= E
; ++I
) {
1588 unsigned PartOpReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
1589 MIRBuilder
.buildExtract(PartOpReg
, MI
.getOperand(I
).getReg(), Offset
);
1590 SrcOps
.push_back(PartOpReg
);
1593 unsigned PartDstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
1594 MIRBuilder
.buildInstr(Opc
, {PartDstReg
}, SrcOps
, Flags
);
1596 unsigned PartInsertReg
= MRI
.createGenericVirtualRegister(DstTy
);
1597 MIRBuilder
.buildInsert(PartInsertReg
, AccumDstReg
, PartDstReg
, Offset
);
1598 AccumDstReg
= PartInsertReg
;
1599 Offset
+= NarrowSize
;
1602 // Handle the remaining element sized leftover piece.
1603 SmallVector
<SrcOp
, 4> SrcOps
;
1604 for (unsigned I
= 1, E
= MI
.getNumOperands(); I
!= E
; ++I
) {
1605 unsigned PartOpReg
= MRI
.createGenericVirtualRegister(EltTy
);
1606 MIRBuilder
.buildExtract(PartOpReg
, MI
.getOperand(I
).getReg(),
1608 SrcOps
.push_back(PartOpReg
);
1611 unsigned PartDstReg
= MRI
.createGenericVirtualRegister(EltTy
);
1612 MIRBuilder
.buildInstr(Opc
, {PartDstReg
}, SrcOps
, Flags
);
1613 MIRBuilder
.buildInsert(DstReg
, AccumDstReg
, PartDstReg
, BitsForNumParts
);
1614 MI
.eraseFromParent();
1619 SmallVector
<unsigned, 2> DstRegs
, Src0Regs
, Src1Regs
, Src2Regs
;
1621 extractParts(MI
.getOperand(1).getReg(), NarrowTy
, NumParts
, Src0Regs
);
1624 extractParts(MI
.getOperand(2).getReg(), NarrowTy
, NumParts
, Src1Regs
);
1627 extractParts(MI
.getOperand(3).getReg(), NarrowTy
, NumParts
, Src2Regs
);
1629 for (int i
= 0; i
< NumParts
; ++i
) {
1630 unsigned DstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
1633 MIRBuilder
.buildInstr(Opc
, {DstReg
}, {Src0Regs
[i
]}, Flags
);
1634 else if (NumOps
== 2) {
1635 MIRBuilder
.buildInstr(Opc
, {DstReg
}, {Src0Regs
[i
], Src1Regs
[i
]}, Flags
);
1636 } else if (NumOps
== 3) {
1637 MIRBuilder
.buildInstr(Opc
, {DstReg
},
1638 {Src0Regs
[i
], Src1Regs
[i
], Src2Regs
[i
]}, Flags
);
1641 DstRegs
.push_back(DstReg
);
1644 if (NarrowTy
.isVector())
1645 MIRBuilder
.buildConcatVectors(DstReg
, DstRegs
);
1647 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
1649 MI
.eraseFromParent();
1653 // Handle splitting vector operations which need to have the same number of
1654 // elements in each type index, but each type index may have a different element
1657 // e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
1658 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
1659 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
1661 // Also handles some irregular breakdown cases, e.g.
1662 // e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
1663 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
1664 // s64 = G_SHL s64, s32
1665 LegalizerHelper::LegalizeResult
1666 LegalizerHelper::fewerElementsVectorMultiEltType(
1667 MachineInstr
&MI
, unsigned TypeIdx
, LLT NarrowTyArg
) {
1669 return UnableToLegalize
;
1671 const LLT NarrowTy0
= NarrowTyArg
;
1672 const unsigned NewNumElts
=
1673 NarrowTy0
.isVector() ? NarrowTy0
.getNumElements() : 1;
1675 const unsigned DstReg
= MI
.getOperand(0).getReg();
1676 LLT DstTy
= MRI
.getType(DstReg
);
1679 // All of the operands need to have the same number of elements, so if we can
1680 // determine a type breakdown for the result type, we can for all of the
1682 int NumParts
= getNarrowTypeBreakDown(DstTy
, NarrowTy0
, LeftoverTy0
);
1684 return UnableToLegalize
;
1686 SmallVector
<MachineInstrBuilder
, 4> NewInsts
;
1688 SmallVector
<unsigned, 4> DstRegs
, LeftoverDstRegs
;
1689 SmallVector
<unsigned, 4> PartRegs
, LeftoverRegs
;
1691 for (unsigned I
= 1, E
= MI
.getNumOperands(); I
!= E
; ++I
) {
1693 unsigned SrcReg
= MI
.getOperand(I
).getReg();
1694 LLT SrcTyI
= MRI
.getType(SrcReg
);
1695 LLT NarrowTyI
= LLT::scalarOrVector(NewNumElts
, SrcTyI
.getScalarType());
1698 // Split this operand into the requested typed registers, and any leftover
1699 // required to reproduce the original type.
1700 if (!extractParts(SrcReg
, SrcTyI
, NarrowTyI
, LeftoverTyI
, PartRegs
,
1702 return UnableToLegalize
;
1705 // For the first operand, create an instruction for each part and setup
1707 for (unsigned PartReg
: PartRegs
) {
1708 unsigned PartDstReg
= MRI
.createGenericVirtualRegister(NarrowTy0
);
1709 NewInsts
.push_back(MIRBuilder
.buildInstrNoInsert(MI
.getOpcode())
1712 DstRegs
.push_back(PartDstReg
);
1715 for (unsigned LeftoverReg
: LeftoverRegs
) {
1716 unsigned PartDstReg
= MRI
.createGenericVirtualRegister(LeftoverTy0
);
1717 NewInsts
.push_back(MIRBuilder
.buildInstrNoInsert(MI
.getOpcode())
1719 .addUse(LeftoverReg
));
1720 LeftoverDstRegs
.push_back(PartDstReg
);
1723 assert(NewInsts
.size() == PartRegs
.size() + LeftoverRegs
.size());
1725 // Add the newly created operand splits to the existing instructions. The
1726 // odd-sized pieces are ordered after the requested NarrowTyArg sized
1728 unsigned InstCount
= 0;
1729 for (unsigned J
= 0, JE
= PartRegs
.size(); J
!= JE
; ++J
)
1730 NewInsts
[InstCount
++].addUse(PartRegs
[J
]);
1731 for (unsigned J
= 0, JE
= LeftoverRegs
.size(); J
!= JE
; ++J
)
1732 NewInsts
[InstCount
++].addUse(LeftoverRegs
[J
]);
1736 LeftoverRegs
.clear();
1739 // Insert the newly built operations and rebuild the result register.
1740 for (auto &MIB
: NewInsts
)
1741 MIRBuilder
.insertInstr(MIB
);
1743 insertParts(DstReg
, DstTy
, NarrowTy0
, DstRegs
, LeftoverTy0
, LeftoverDstRegs
);
1745 MI
.eraseFromParent();
1749 LegalizerHelper::LegalizeResult
1750 LegalizerHelper::fewerElementsVectorCasts(MachineInstr
&MI
, unsigned TypeIdx
,
1753 return UnableToLegalize
;
1755 unsigned DstReg
= MI
.getOperand(0).getReg();
1756 unsigned SrcReg
= MI
.getOperand(1).getReg();
1757 LLT DstTy
= MRI
.getType(DstReg
);
1758 LLT SrcTy
= MRI
.getType(SrcReg
);
1760 LLT NarrowTy0
= NarrowTy
;
1764 if (NarrowTy
.isVector()) {
1765 // Uneven breakdown not handled.
1766 NumParts
= DstTy
.getNumElements() / NarrowTy
.getNumElements();
1767 if (NumParts
* NarrowTy
.getNumElements() != DstTy
.getNumElements())
1768 return UnableToLegalize
;
1770 NarrowTy1
= LLT::vector(NumParts
, SrcTy
.getElementType().getSizeInBits());
1772 NumParts
= DstTy
.getNumElements();
1773 NarrowTy1
= SrcTy
.getElementType();
1776 SmallVector
<unsigned, 4> SrcRegs
, DstRegs
;
1777 extractParts(SrcReg
, NarrowTy1
, NumParts
, SrcRegs
);
1779 for (unsigned I
= 0; I
< NumParts
; ++I
) {
1780 unsigned DstReg
= MRI
.createGenericVirtualRegister(NarrowTy0
);
1781 MachineInstr
*NewInst
= MIRBuilder
.buildInstr(MI
.getOpcode())
1783 .addUse(SrcRegs
[I
]);
1785 NewInst
->setFlags(MI
.getFlags());
1786 DstRegs
.push_back(DstReg
);
1789 if (NarrowTy
.isVector())
1790 MIRBuilder
.buildConcatVectors(DstReg
, DstRegs
);
1792 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
1794 MI
.eraseFromParent();
1798 LegalizerHelper::LegalizeResult
1799 LegalizerHelper::fewerElementsVectorCmp(MachineInstr
&MI
, unsigned TypeIdx
,
1801 unsigned DstReg
= MI
.getOperand(0).getReg();
1802 unsigned Src0Reg
= MI
.getOperand(2).getReg();
1803 LLT DstTy
= MRI
.getType(DstReg
);
1804 LLT SrcTy
= MRI
.getType(Src0Reg
);
1807 LLT NarrowTy0
, NarrowTy1
;
1810 unsigned NewElts
= NarrowTy
.isVector() ? NarrowTy
.getNumElements() : 1;
1811 unsigned OldElts
= DstTy
.getNumElements();
1813 NarrowTy0
= NarrowTy
;
1814 NumParts
= NarrowTy
.isVector() ? (OldElts
/ NewElts
) : DstTy
.getNumElements();
1815 NarrowTy1
= NarrowTy
.isVector() ?
1816 LLT::vector(NarrowTy
.getNumElements(), SrcTy
.getScalarSizeInBits()) :
1817 SrcTy
.getElementType();
1820 unsigned NewElts
= NarrowTy
.isVector() ? NarrowTy
.getNumElements() : 1;
1821 unsigned OldElts
= SrcTy
.getNumElements();
1823 NumParts
= NarrowTy
.isVector() ? (OldElts
/ NewElts
) :
1824 NarrowTy
.getNumElements();
1825 NarrowTy0
= LLT::vector(NarrowTy
.getNumElements(),
1826 DstTy
.getScalarSizeInBits());
1827 NarrowTy1
= NarrowTy
;
1830 // FIXME: Don't know how to handle the situation where the small vectors
1831 // aren't all the same size yet.
1832 if (NarrowTy1
.isVector() &&
1833 NarrowTy1
.getNumElements() * NumParts
!= DstTy
.getNumElements())
1834 return UnableToLegalize
;
1836 CmpInst::Predicate Pred
1837 = static_cast<CmpInst::Predicate
>(MI
.getOperand(1).getPredicate());
1839 SmallVector
<unsigned, 2> Src1Regs
, Src2Regs
, DstRegs
;
1840 extractParts(MI
.getOperand(2).getReg(), NarrowTy1
, NumParts
, Src1Regs
);
1841 extractParts(MI
.getOperand(3).getReg(), NarrowTy1
, NumParts
, Src2Regs
);
1843 for (unsigned I
= 0; I
< NumParts
; ++I
) {
1844 unsigned DstReg
= MRI
.createGenericVirtualRegister(NarrowTy0
);
1845 DstRegs
.push_back(DstReg
);
1847 if (MI
.getOpcode() == TargetOpcode::G_ICMP
)
1848 MIRBuilder
.buildICmp(Pred
, DstReg
, Src1Regs
[I
], Src2Regs
[I
]);
1850 MachineInstr
*NewCmp
1851 = MIRBuilder
.buildFCmp(Pred
, DstReg
, Src1Regs
[I
], Src2Regs
[I
]);
1852 NewCmp
->setFlags(MI
.getFlags());
1856 if (NarrowTy1
.isVector())
1857 MIRBuilder
.buildConcatVectors(DstReg
, DstRegs
);
1859 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
1861 MI
.eraseFromParent();
1865 LegalizerHelper::LegalizeResult
1866 LegalizerHelper::fewerElementsVectorSelect(MachineInstr
&MI
, unsigned TypeIdx
,
1868 unsigned DstReg
= MI
.getOperand(0).getReg();
1869 unsigned CondReg
= MI
.getOperand(1).getReg();
1871 unsigned NumParts
= 0;
1872 LLT NarrowTy0
, NarrowTy1
;
1874 LLT DstTy
= MRI
.getType(DstReg
);
1875 LLT CondTy
= MRI
.getType(CondReg
);
1876 unsigned Size
= DstTy
.getSizeInBits();
1878 assert(TypeIdx
== 0 || CondTy
.isVector());
1881 NarrowTy0
= NarrowTy
;
1884 unsigned NarrowSize
= NarrowTy0
.getSizeInBits();
1885 // FIXME: Don't know how to handle the situation where the small vectors
1886 // aren't all the same size yet.
1887 if (Size
% NarrowSize
!= 0)
1888 return UnableToLegalize
;
1890 NumParts
= Size
/ NarrowSize
;
1892 // Need to break down the condition type
1893 if (CondTy
.isVector()) {
1894 if (CondTy
.getNumElements() == NumParts
)
1895 NarrowTy1
= CondTy
.getElementType();
1897 NarrowTy1
= LLT::vector(CondTy
.getNumElements() / NumParts
,
1898 CondTy
.getScalarSizeInBits());
1901 NumParts
= CondTy
.getNumElements();
1902 if (NarrowTy
.isVector()) {
1903 // TODO: Handle uneven breakdown.
1904 if (NumParts
* NarrowTy
.getNumElements() != CondTy
.getNumElements())
1905 return UnableToLegalize
;
1907 return UnableToLegalize
;
1909 NarrowTy0
= DstTy
.getElementType();
1910 NarrowTy1
= NarrowTy
;
1914 SmallVector
<unsigned, 2> DstRegs
, Src0Regs
, Src1Regs
, Src2Regs
;
1915 if (CondTy
.isVector())
1916 extractParts(MI
.getOperand(1).getReg(), NarrowTy1
, NumParts
, Src0Regs
);
1918 extractParts(MI
.getOperand(2).getReg(), NarrowTy0
, NumParts
, Src1Regs
);
1919 extractParts(MI
.getOperand(3).getReg(), NarrowTy0
, NumParts
, Src2Regs
);
1921 for (unsigned i
= 0; i
< NumParts
; ++i
) {
1922 unsigned DstReg
= MRI
.createGenericVirtualRegister(NarrowTy0
);
1923 MIRBuilder
.buildSelect(DstReg
, CondTy
.isVector() ? Src0Regs
[i
] : CondReg
,
1924 Src1Regs
[i
], Src2Regs
[i
]);
1925 DstRegs
.push_back(DstReg
);
1928 if (NarrowTy0
.isVector())
1929 MIRBuilder
.buildConcatVectors(DstReg
, DstRegs
);
1931 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
1933 MI
.eraseFromParent();
1937 LegalizerHelper::LegalizeResult
1938 LegalizerHelper::reduceLoadStoreWidth(MachineInstr
&MI
, unsigned TypeIdx
,
1940 // FIXME: Don't know how to handle secondary types yet.
1942 return UnableToLegalize
;
1944 MachineMemOperand
*MMO
= *MI
.memoperands_begin();
1946 // This implementation doesn't work for atomics. Give up instead of doing
1947 // something invalid.
1948 if (MMO
->getOrdering() != AtomicOrdering::NotAtomic
||
1949 MMO
->getFailureOrdering() != AtomicOrdering::NotAtomic
)
1950 return UnableToLegalize
;
1952 bool IsLoad
= MI
.getOpcode() == TargetOpcode::G_LOAD
;
1953 unsigned ValReg
= MI
.getOperand(0).getReg();
1954 unsigned AddrReg
= MI
.getOperand(1).getReg();
1955 LLT ValTy
= MRI
.getType(ValReg
);
1959 SmallVector
<unsigned, 8> NarrowRegs
, NarrowLeftoverRegs
;
1961 NumParts
= getNarrowTypeBreakDown(ValTy
, NarrowTy
, LeftoverTy
);
1963 if (extractParts(ValReg
, ValTy
, NarrowTy
, LeftoverTy
, NarrowRegs
,
1964 NarrowLeftoverRegs
))
1965 NumParts
= NarrowRegs
.size();
1969 return UnableToLegalize
;
1971 const LLT OffsetTy
= LLT::scalar(MRI
.getType(AddrReg
).getScalarSizeInBits());
1973 unsigned TotalSize
= ValTy
.getSizeInBits();
1975 // Split the load/store into PartTy sized pieces starting at Offset. If this
1976 // is a load, return the new registers in ValRegs. For a store, each elements
1977 // of ValRegs should be PartTy. Returns the next offset that needs to be
1979 auto splitTypePieces
= [=](LLT PartTy
, SmallVectorImpl
<unsigned> &ValRegs
,
1980 unsigned Offset
) -> unsigned {
1981 MachineFunction
&MF
= MIRBuilder
.getMF();
1982 unsigned PartSize
= PartTy
.getSizeInBits();
1983 for (unsigned Idx
= 0, E
= NumParts
; Idx
!= E
&& Offset
< TotalSize
;
1984 Offset
+= PartSize
, ++Idx
) {
1985 unsigned ByteSize
= PartSize
/ 8;
1986 unsigned ByteOffset
= Offset
/ 8;
1987 unsigned NewAddrReg
= 0;
1989 MIRBuilder
.materializeGEP(NewAddrReg
, AddrReg
, OffsetTy
, ByteOffset
);
1991 MachineMemOperand
*NewMMO
=
1992 MF
.getMachineMemOperand(MMO
, ByteOffset
, ByteSize
);
1995 unsigned Dst
= MRI
.createGenericVirtualRegister(PartTy
);
1996 ValRegs
.push_back(Dst
);
1997 MIRBuilder
.buildLoad(Dst
, NewAddrReg
, *NewMMO
);
1999 MIRBuilder
.buildStore(ValRegs
[Idx
], NewAddrReg
, *NewMMO
);
2006 unsigned HandledOffset
= splitTypePieces(NarrowTy
, NarrowRegs
, 0);
2008 // Handle the rest of the register if this isn't an even type breakdown.
2009 if (LeftoverTy
.isValid())
2010 splitTypePieces(LeftoverTy
, NarrowLeftoverRegs
, HandledOffset
);
2013 insertParts(ValReg
, ValTy
, NarrowTy
, NarrowRegs
,
2014 LeftoverTy
, NarrowLeftoverRegs
);
2017 MI
.eraseFromParent();
2021 LegalizerHelper::LegalizeResult
2022 LegalizerHelper::fewerElementsVector(MachineInstr
&MI
, unsigned TypeIdx
,
2024 using namespace TargetOpcode
;
2026 MIRBuilder
.setInstr(MI
);
2027 switch (MI
.getOpcode()) {
2028 case G_IMPLICIT_DEF
:
2029 return fewerElementsVectorImplicitDef(MI
, TypeIdx
, NarrowTy
);
2043 case G_FCANONICALIZE
:
2055 case G_INTRINSIC_ROUND
:
2056 case G_INTRINSIC_TRUNC
:
2061 return fewerElementsVectorBasic(MI
, TypeIdx
, NarrowTy
);
2065 return fewerElementsVectorMultiEltType(MI
, TypeIdx
, NarrowTy
);
2077 case G_ADDRSPACE_CAST
:
2078 return fewerElementsVectorCasts(MI
, TypeIdx
, NarrowTy
);
2081 return fewerElementsVectorCmp(MI
, TypeIdx
, NarrowTy
);
2083 return fewerElementsVectorSelect(MI
, TypeIdx
, NarrowTy
);
2086 return reduceLoadStoreWidth(MI
, TypeIdx
, NarrowTy
);
2088 return UnableToLegalize
;
2092 LegalizerHelper::LegalizeResult
2093 LegalizerHelper::narrowScalarShiftByConstant(MachineInstr
&MI
, const APInt
&Amt
,
2094 const LLT HalfTy
, const LLT AmtTy
) {
2096 unsigned InL
= MRI
.createGenericVirtualRegister(HalfTy
);
2097 unsigned InH
= MRI
.createGenericVirtualRegister(HalfTy
);
2098 MIRBuilder
.buildUnmerge({InL
, InH
}, MI
.getOperand(1).getReg());
2100 if (Amt
.isNullValue()) {
2101 MIRBuilder
.buildMerge(MI
.getOperand(0).getReg(), {InL
, InH
});
2102 MI
.eraseFromParent();
2107 unsigned NVTBits
= HalfTy
.getSizeInBits();
2108 unsigned VTBits
= 2 * NVTBits
;
2111 if (MI
.getOpcode() == TargetOpcode::G_SHL
) {
2112 if (Amt
.ugt(VTBits
)) {
2113 Lo
= Hi
= MIRBuilder
.buildConstant(NVT
, 0);
2114 } else if (Amt
.ugt(NVTBits
)) {
2115 Lo
= MIRBuilder
.buildConstant(NVT
, 0);
2116 Hi
= MIRBuilder
.buildShl(NVT
, InL
,
2117 MIRBuilder
.buildConstant(AmtTy
, Amt
- NVTBits
));
2118 } else if (Amt
== NVTBits
) {
2119 Lo
= MIRBuilder
.buildConstant(NVT
, 0);
2122 Lo
= MIRBuilder
.buildShl(NVT
, InL
, MIRBuilder
.buildConstant(AmtTy
, Amt
));
2124 MIRBuilder
.buildShl(NVT
, InH
, MIRBuilder
.buildConstant(AmtTy
, Amt
));
2125 auto OrRHS
= MIRBuilder
.buildLShr(
2126 NVT
, InL
, MIRBuilder
.buildConstant(AmtTy
, -Amt
+ NVTBits
));
2127 Hi
= MIRBuilder
.buildOr(NVT
, OrLHS
, OrRHS
);
2129 } else if (MI
.getOpcode() == TargetOpcode::G_LSHR
) {
2130 if (Amt
.ugt(VTBits
)) {
2131 Lo
= Hi
= MIRBuilder
.buildConstant(NVT
, 0);
2132 } else if (Amt
.ugt(NVTBits
)) {
2133 Lo
= MIRBuilder
.buildLShr(NVT
, InH
,
2134 MIRBuilder
.buildConstant(AmtTy
, Amt
- NVTBits
));
2135 Hi
= MIRBuilder
.buildConstant(NVT
, 0);
2136 } else if (Amt
== NVTBits
) {
2138 Hi
= MIRBuilder
.buildConstant(NVT
, 0);
2140 auto ShiftAmtConst
= MIRBuilder
.buildConstant(AmtTy
, Amt
);
2142 auto OrLHS
= MIRBuilder
.buildLShr(NVT
, InL
, ShiftAmtConst
);
2143 auto OrRHS
= MIRBuilder
.buildShl(
2144 NVT
, InH
, MIRBuilder
.buildConstant(AmtTy
, -Amt
+ NVTBits
));
2146 Lo
= MIRBuilder
.buildOr(NVT
, OrLHS
, OrRHS
);
2147 Hi
= MIRBuilder
.buildLShr(NVT
, InH
, ShiftAmtConst
);
2150 if (Amt
.ugt(VTBits
)) {
2151 Hi
= Lo
= MIRBuilder
.buildAShr(
2152 NVT
, InH
, MIRBuilder
.buildConstant(AmtTy
, NVTBits
- 1));
2153 } else if (Amt
.ugt(NVTBits
)) {
2154 Lo
= MIRBuilder
.buildAShr(NVT
, InH
,
2155 MIRBuilder
.buildConstant(AmtTy
, Amt
- NVTBits
));
2156 Hi
= MIRBuilder
.buildAShr(NVT
, InH
,
2157 MIRBuilder
.buildConstant(AmtTy
, NVTBits
- 1));
2158 } else if (Amt
== NVTBits
) {
2160 Hi
= MIRBuilder
.buildAShr(NVT
, InH
,
2161 MIRBuilder
.buildConstant(AmtTy
, NVTBits
- 1));
2163 auto ShiftAmtConst
= MIRBuilder
.buildConstant(AmtTy
, Amt
);
2165 auto OrLHS
= MIRBuilder
.buildLShr(NVT
, InL
, ShiftAmtConst
);
2166 auto OrRHS
= MIRBuilder
.buildShl(
2167 NVT
, InH
, MIRBuilder
.buildConstant(AmtTy
, -Amt
+ NVTBits
));
2169 Lo
= MIRBuilder
.buildOr(NVT
, OrLHS
, OrRHS
);
2170 Hi
= MIRBuilder
.buildAShr(NVT
, InH
, ShiftAmtConst
);
2174 MIRBuilder
.buildMerge(MI
.getOperand(0).getReg(), {Lo
.getReg(), Hi
.getReg()});
2175 MI
.eraseFromParent();
2180 // TODO: Optimize if constant shift amount.
2181 LegalizerHelper::LegalizeResult
2182 LegalizerHelper::narrowScalarShift(MachineInstr
&MI
, unsigned TypeIdx
,
2185 Observer
.changingInstr(MI
);
2186 narrowScalarSrc(MI
, RequestedTy
, 2);
2187 Observer
.changedInstr(MI
);
2191 unsigned DstReg
= MI
.getOperand(0).getReg();
2192 LLT DstTy
= MRI
.getType(DstReg
);
2193 if (DstTy
.isVector())
2194 return UnableToLegalize
;
2196 unsigned Amt
= MI
.getOperand(2).getReg();
2197 LLT ShiftAmtTy
= MRI
.getType(Amt
);
2198 const unsigned DstEltSize
= DstTy
.getScalarSizeInBits();
2199 if (DstEltSize
% 2 != 0)
2200 return UnableToLegalize
;
2202 // Ignore the input type. We can only go to exactly half the size of the
2203 // input. If that isn't small enough, the resulting pieces will be further
2205 const unsigned NewBitSize
= DstEltSize
/ 2;
2206 const LLT HalfTy
= LLT::scalar(NewBitSize
);
2207 const LLT CondTy
= LLT::scalar(1);
2209 if (const MachineInstr
*KShiftAmt
=
2210 getOpcodeDef(TargetOpcode::G_CONSTANT
, Amt
, MRI
)) {
2211 return narrowScalarShiftByConstant(
2212 MI
, KShiftAmt
->getOperand(1).getCImm()->getValue(), HalfTy
, ShiftAmtTy
);
2215 // TODO: Expand with known bits.
2217 // Handle the fully general expansion by an unknown amount.
2218 auto NewBits
= MIRBuilder
.buildConstant(ShiftAmtTy
, NewBitSize
);
2220 unsigned InL
= MRI
.createGenericVirtualRegister(HalfTy
);
2221 unsigned InH
= MRI
.createGenericVirtualRegister(HalfTy
);
2222 MIRBuilder
.buildUnmerge({InL
, InH
}, MI
.getOperand(1).getReg());
2224 auto AmtExcess
= MIRBuilder
.buildSub(ShiftAmtTy
, Amt
, NewBits
);
2225 auto AmtLack
= MIRBuilder
.buildSub(ShiftAmtTy
, NewBits
, Amt
);
2227 auto Zero
= MIRBuilder
.buildConstant(ShiftAmtTy
, 0);
2228 auto IsShort
= MIRBuilder
.buildICmp(ICmpInst::ICMP_ULT
, CondTy
, Amt
, NewBits
);
2229 auto IsZero
= MIRBuilder
.buildICmp(ICmpInst::ICMP_EQ
, CondTy
, Amt
, Zero
);
2231 unsigned ResultRegs
[2];
2232 switch (MI
.getOpcode()) {
2233 case TargetOpcode::G_SHL
: {
2234 // Short: ShAmt < NewBitSize
2235 auto LoS
= MIRBuilder
.buildShl(HalfTy
, InH
, Amt
);
2237 auto OrLHS
= MIRBuilder
.buildShl(HalfTy
, InH
, Amt
);
2238 auto OrRHS
= MIRBuilder
.buildLShr(HalfTy
, InL
, AmtLack
);
2239 auto HiS
= MIRBuilder
.buildOr(HalfTy
, OrLHS
, OrRHS
);
2241 // Long: ShAmt >= NewBitSize
2242 auto LoL
= MIRBuilder
.buildConstant(HalfTy
, 0); // Lo part is zero.
2243 auto HiL
= MIRBuilder
.buildShl(HalfTy
, InL
, AmtExcess
); // Hi from Lo part.
2245 auto Lo
= MIRBuilder
.buildSelect(HalfTy
, IsShort
, LoS
, LoL
);
2246 auto Hi
= MIRBuilder
.buildSelect(
2247 HalfTy
, IsZero
, InH
, MIRBuilder
.buildSelect(HalfTy
, IsShort
, HiS
, HiL
));
2249 ResultRegs
[0] = Lo
.getReg(0);
2250 ResultRegs
[1] = Hi
.getReg(0);
2253 case TargetOpcode::G_LSHR
: {
2254 // Short: ShAmt < NewBitSize
2255 auto HiS
= MIRBuilder
.buildLShr(HalfTy
, InH
, Amt
);
2257 auto OrLHS
= MIRBuilder
.buildLShr(HalfTy
, InL
, Amt
);
2258 auto OrRHS
= MIRBuilder
.buildShl(HalfTy
, InH
, AmtLack
);
2259 auto LoS
= MIRBuilder
.buildOr(HalfTy
, OrLHS
, OrRHS
);
2261 // Long: ShAmt >= NewBitSize
2262 auto HiL
= MIRBuilder
.buildConstant(HalfTy
, 0); // Hi part is zero.
2263 auto LoL
= MIRBuilder
.buildLShr(HalfTy
, InH
, AmtExcess
); // Lo from Hi part.
2265 auto Lo
= MIRBuilder
.buildSelect(
2266 HalfTy
, IsZero
, InL
, MIRBuilder
.buildSelect(HalfTy
, IsShort
, LoS
, LoL
));
2267 auto Hi
= MIRBuilder
.buildSelect(HalfTy
, IsShort
, HiS
, HiL
);
2269 ResultRegs
[0] = Lo
.getReg(0);
2270 ResultRegs
[1] = Hi
.getReg(0);
2273 case TargetOpcode::G_ASHR
: {
2274 // Short: ShAmt < NewBitSize
2275 auto HiS
= MIRBuilder
.buildAShr(HalfTy
, InH
, Amt
);
2277 auto OrLHS
= MIRBuilder
.buildLShr(HalfTy
, InL
, Amt
);
2278 auto OrRHS
= MIRBuilder
.buildLShr(HalfTy
, InH
, AmtLack
);
2279 auto LoS
= MIRBuilder
.buildOr(HalfTy
, OrLHS
, OrRHS
);
2281 // Long: ShAmt >= NewBitSize
2284 auto HiL
= MIRBuilder
.buildAShr(
2285 HalfTy
, InH
, MIRBuilder
.buildConstant(ShiftAmtTy
, NewBitSize
- 1));
2287 auto LoL
= MIRBuilder
.buildAShr(HalfTy
, InH
, AmtExcess
); // Lo from Hi part.
2289 auto Lo
= MIRBuilder
.buildSelect(
2290 HalfTy
, IsZero
, InL
, MIRBuilder
.buildSelect(HalfTy
, IsShort
, LoS
, LoL
));
2292 auto Hi
= MIRBuilder
.buildSelect(HalfTy
, IsShort
, HiS
, HiL
);
2294 ResultRegs
[0] = Lo
.getReg(0);
2295 ResultRegs
[1] = Hi
.getReg(0);
2299 llvm_unreachable("not a shift");
2302 MIRBuilder
.buildMerge(DstReg
, ResultRegs
);
2303 MI
.eraseFromParent();
2307 LegalizerHelper::LegalizeResult
2308 LegalizerHelper::moreElementsVector(MachineInstr
&MI
, unsigned TypeIdx
,
2310 MIRBuilder
.setInstr(MI
);
2311 unsigned Opc
= MI
.getOpcode();
2313 case TargetOpcode::G_IMPLICIT_DEF
: {
2314 Observer
.changingInstr(MI
);
2315 moreElementsVectorDst(MI
, MoreTy
, 0);
2316 Observer
.changedInstr(MI
);
2320 return UnableToLegalize
;
2324 LegalizerHelper::LegalizeResult
2325 LegalizerHelper::narrowScalarMul(MachineInstr
&MI
, unsigned TypeIdx
, LLT NewTy
) {
2326 unsigned DstReg
= MI
.getOperand(0).getReg();
2327 unsigned Src0
= MI
.getOperand(1).getReg();
2328 unsigned Src1
= MI
.getOperand(2).getReg();
2329 LLT Ty
= MRI
.getType(DstReg
);
2331 return UnableToLegalize
;
2333 unsigned Size
= Ty
.getSizeInBits();
2334 unsigned NewSize
= Size
/ 2;
2335 if (Size
!= 2 * NewSize
)
2336 return UnableToLegalize
;
2338 LLT HalfTy
= LLT::scalar(NewSize
);
2339 // TODO: if HalfTy != NewTy, handle the breakdown all at once?
2341 unsigned ShiftAmt
= MRI
.createGenericVirtualRegister(Ty
);
2342 unsigned Lo
= MRI
.createGenericVirtualRegister(HalfTy
);
2343 unsigned Hi
= MRI
.createGenericVirtualRegister(HalfTy
);
2344 unsigned ExtLo
= MRI
.createGenericVirtualRegister(Ty
);
2345 unsigned ExtHi
= MRI
.createGenericVirtualRegister(Ty
);
2346 unsigned ShiftedHi
= MRI
.createGenericVirtualRegister(Ty
);
2348 SmallVector
<unsigned, 2> Src0Parts
;
2349 SmallVector
<unsigned, 2> Src1Parts
;
2351 extractParts(Src0
, HalfTy
, 2, Src0Parts
);
2352 extractParts(Src1
, HalfTy
, 2, Src1Parts
);
2354 MIRBuilder
.buildMul(Lo
, Src0Parts
[0], Src1Parts
[0]);
2356 // TODO: Use smulh or umulh depending on what the target has.
2357 MIRBuilder
.buildUMulH(Hi
, Src0Parts
[1], Src1Parts
[1]);
2359 MIRBuilder
.buildConstant(ShiftAmt
, NewSize
);
2360 MIRBuilder
.buildAnyExt(ExtHi
, Hi
);
2361 MIRBuilder
.buildShl(ShiftedHi
, ExtHi
, ShiftAmt
);
2363 MIRBuilder
.buildZExt(ExtLo
, Lo
);
2364 MIRBuilder
.buildOr(DstReg
, ExtLo
, ShiftedHi
);
2365 MI
.eraseFromParent();
2370 LegalizerHelper::LegalizeResult
2371 LegalizerHelper::narrowScalarExtract(MachineInstr
&MI
, unsigned TypeIdx
,
2374 return UnableToLegalize
;
2376 uint64_t NarrowSize
= NarrowTy
.getSizeInBits();
2378 int64_t SizeOp1
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
2379 // FIXME: add support for when SizeOp1 isn't an exact multiple of
2381 if (SizeOp1
% NarrowSize
!= 0)
2382 return UnableToLegalize
;
2383 int NumParts
= SizeOp1
/ NarrowSize
;
2385 SmallVector
<unsigned, 2> SrcRegs
, DstRegs
;
2386 SmallVector
<uint64_t, 2> Indexes
;
2387 extractParts(MI
.getOperand(1).getReg(), NarrowTy
, NumParts
, SrcRegs
);
2389 unsigned OpReg
= MI
.getOperand(0).getReg();
2390 uint64_t OpStart
= MI
.getOperand(2).getImm();
2391 uint64_t OpSize
= MRI
.getType(OpReg
).getSizeInBits();
2392 for (int i
= 0; i
< NumParts
; ++i
) {
2393 unsigned SrcStart
= i
* NarrowSize
;
2395 if (SrcStart
+ NarrowSize
<= OpStart
|| SrcStart
>= OpStart
+ OpSize
) {
2396 // No part of the extract uses this subregister, ignore it.
2398 } else if (SrcStart
== OpStart
&& NarrowTy
== MRI
.getType(OpReg
)) {
2399 // The entire subregister is extracted, forward the value.
2400 DstRegs
.push_back(SrcRegs
[i
]);
2404 // OpSegStart is where this destination segment would start in OpReg if it
2405 // extended infinitely in both directions.
2406 int64_t ExtractOffset
;
2408 if (OpStart
< SrcStart
) {
2410 SegSize
= std::min(NarrowSize
, OpStart
+ OpSize
- SrcStart
);
2412 ExtractOffset
= OpStart
- SrcStart
;
2413 SegSize
= std::min(SrcStart
+ NarrowSize
- OpStart
, OpSize
);
2416 unsigned SegReg
= SrcRegs
[i
];
2417 if (ExtractOffset
!= 0 || SegSize
!= NarrowSize
) {
2418 // A genuine extract is needed.
2419 SegReg
= MRI
.createGenericVirtualRegister(LLT::scalar(SegSize
));
2420 MIRBuilder
.buildExtract(SegReg
, SrcRegs
[i
], ExtractOffset
);
2423 DstRegs
.push_back(SegReg
);
2426 unsigned DstReg
= MI
.getOperand(0).getReg();
2427 if(MRI
.getType(DstReg
).isVector())
2428 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
2430 MIRBuilder
.buildMerge(DstReg
, DstRegs
);
2431 MI
.eraseFromParent();
2435 LegalizerHelper::LegalizeResult
2436 LegalizerHelper::narrowScalarInsert(MachineInstr
&MI
, unsigned TypeIdx
,
2438 // FIXME: Don't know how to handle secondary types yet.
2440 return UnableToLegalize
;
2442 uint64_t SizeOp0
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
2443 uint64_t NarrowSize
= NarrowTy
.getSizeInBits();
2445 // FIXME: add support for when SizeOp0 isn't an exact multiple of
2447 if (SizeOp0
% NarrowSize
!= 0)
2448 return UnableToLegalize
;
2450 int NumParts
= SizeOp0
/ NarrowSize
;
2452 SmallVector
<unsigned, 2> SrcRegs
, DstRegs
;
2453 SmallVector
<uint64_t, 2> Indexes
;
2454 extractParts(MI
.getOperand(1).getReg(), NarrowTy
, NumParts
, SrcRegs
);
2456 unsigned OpReg
= MI
.getOperand(2).getReg();
2457 uint64_t OpStart
= MI
.getOperand(3).getImm();
2458 uint64_t OpSize
= MRI
.getType(OpReg
).getSizeInBits();
2459 for (int i
= 0; i
< NumParts
; ++i
) {
2460 unsigned DstStart
= i
* NarrowSize
;
2462 if (DstStart
+ NarrowSize
<= OpStart
|| DstStart
>= OpStart
+ OpSize
) {
2463 // No part of the insert affects this subregister, forward the original.
2464 DstRegs
.push_back(SrcRegs
[i
]);
2466 } else if (DstStart
== OpStart
&& NarrowTy
== MRI
.getType(OpReg
)) {
2467 // The entire subregister is defined by this insert, forward the new
2469 DstRegs
.push_back(OpReg
);
2473 // OpSegStart is where this destination segment would start in OpReg if it
2474 // extended infinitely in both directions.
2475 int64_t ExtractOffset
, InsertOffset
;
2477 if (OpStart
< DstStart
) {
2479 ExtractOffset
= DstStart
- OpStart
;
2480 SegSize
= std::min(NarrowSize
, OpStart
+ OpSize
- DstStart
);
2482 InsertOffset
= OpStart
- DstStart
;
2485 std::min(NarrowSize
- InsertOffset
, OpStart
+ OpSize
- DstStart
);
2488 unsigned SegReg
= OpReg
;
2489 if (ExtractOffset
!= 0 || SegSize
!= OpSize
) {
2490 // A genuine extract is needed.
2491 SegReg
= MRI
.createGenericVirtualRegister(LLT::scalar(SegSize
));
2492 MIRBuilder
.buildExtract(SegReg
, OpReg
, ExtractOffset
);
2495 unsigned DstReg
= MRI
.createGenericVirtualRegister(NarrowTy
);
2496 MIRBuilder
.buildInsert(DstReg
, SrcRegs
[i
], SegReg
, InsertOffset
);
2497 DstRegs
.push_back(DstReg
);
2500 assert(DstRegs
.size() == (unsigned)NumParts
&& "not all parts covered");
2501 unsigned DstReg
= MI
.getOperand(0).getReg();
2502 if(MRI
.getType(DstReg
).isVector())
2503 MIRBuilder
.buildBuildVector(DstReg
, DstRegs
);
2505 MIRBuilder
.buildMerge(DstReg
, DstRegs
);
2506 MI
.eraseFromParent();
2510 LegalizerHelper::LegalizeResult
2511 LegalizerHelper::narrowScalarSelect(MachineInstr
&MI
, unsigned TypeIdx
,
2514 return UnableToLegalize
;
2516 unsigned CondReg
= MI
.getOperand(1).getReg();
2517 LLT CondTy
= MRI
.getType(CondReg
);
2518 if (CondTy
.isVector()) // TODO: Handle vselect
2519 return UnableToLegalize
;
2521 unsigned DstReg
= MI
.getOperand(0).getReg();
2522 LLT DstTy
= MRI
.getType(DstReg
);
2524 SmallVector
<unsigned, 4> DstRegs
, DstLeftoverRegs
;
2525 SmallVector
<unsigned, 4> Src1Regs
, Src1LeftoverRegs
;
2526 SmallVector
<unsigned, 4> Src2Regs
, Src2LeftoverRegs
;
2528 if (!extractParts(MI
.getOperand(2).getReg(), DstTy
, NarrowTy
, LeftoverTy
,
2529 Src1Regs
, Src1LeftoverRegs
))
2530 return UnableToLegalize
;
2533 if (!extractParts(MI
.getOperand(3).getReg(), DstTy
, NarrowTy
, Unused
,
2534 Src2Regs
, Src2LeftoverRegs
))
2535 llvm_unreachable("inconsistent extractParts result");
2537 for (unsigned I
= 0, E
= Src1Regs
.size(); I
!= E
; ++I
) {
2538 auto Select
= MIRBuilder
.buildSelect(NarrowTy
,
2539 CondReg
, Src1Regs
[I
], Src2Regs
[I
]);
2540 DstRegs
.push_back(Select
->getOperand(0).getReg());
2543 for (unsigned I
= 0, E
= Src1LeftoverRegs
.size(); I
!= E
; ++I
) {
2544 auto Select
= MIRBuilder
.buildSelect(
2545 LeftoverTy
, CondReg
, Src1LeftoverRegs
[I
], Src2LeftoverRegs
[I
]);
2546 DstLeftoverRegs
.push_back(Select
->getOperand(0).getReg());
2549 insertParts(DstReg
, DstTy
, NarrowTy
, DstRegs
,
2550 LeftoverTy
, DstLeftoverRegs
);
2552 MI
.eraseFromParent();
2556 LegalizerHelper::LegalizeResult
2557 LegalizerHelper::lowerBitCount(MachineInstr
&MI
, unsigned TypeIdx
, LLT Ty
) {
2558 unsigned Opc
= MI
.getOpcode();
2559 auto &TII
= *MI
.getMF()->getSubtarget().getInstrInfo();
2560 auto isSupported
= [this](const LegalityQuery
&Q
) {
2561 auto QAction
= LI
.getAction(Q
).Action
;
2562 return QAction
== Legal
|| QAction
== Libcall
|| QAction
== Custom
;
2566 return UnableToLegalize
;
2567 case TargetOpcode::G_CTLZ_ZERO_UNDEF
: {
2568 // This trivially expands to CTLZ.
2569 Observer
.changingInstr(MI
);
2570 MI
.setDesc(TII
.get(TargetOpcode::G_CTLZ
));
2571 Observer
.changedInstr(MI
);
2574 case TargetOpcode::G_CTLZ
: {
2575 unsigned SrcReg
= MI
.getOperand(1).getReg();
2576 unsigned Len
= Ty
.getSizeInBits();
2577 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF
, {Ty
, Ty
}})) {
2578 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
2579 auto MIBCtlzZU
= MIRBuilder
.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF
,
2581 auto MIBZero
= MIRBuilder
.buildConstant(Ty
, 0);
2582 auto MIBLen
= MIRBuilder
.buildConstant(Ty
, Len
);
2583 auto MIBICmp
= MIRBuilder
.buildICmp(CmpInst::ICMP_EQ
, LLT::scalar(1),
2585 MIRBuilder
.buildSelect(MI
.getOperand(0).getReg(), MIBICmp
, MIBLen
,
2587 MI
.eraseFromParent();
2590 // for now, we do this:
2591 // NewLen = NextPowerOf2(Len);
2592 // x = x | (x >> 1);
2593 // x = x | (x >> 2);
2595 // x = x | (x >>16);
2596 // x = x | (x >>32); // for 64-bit input
2598 // return Len - popcount(x);
2600 // Ref: "Hacker's Delight" by Henry Warren
2601 unsigned Op
= SrcReg
;
2602 unsigned NewLen
= PowerOf2Ceil(Len
);
2603 for (unsigned i
= 0; (1U << i
) <= (NewLen
/ 2); ++i
) {
2604 auto MIBShiftAmt
= MIRBuilder
.buildConstant(Ty
, 1ULL << i
);
2605 auto MIBOp
= MIRBuilder
.buildInstr(
2606 TargetOpcode::G_OR
, {Ty
},
2607 {Op
, MIRBuilder
.buildInstr(TargetOpcode::G_LSHR
, {Ty
},
2608 {Op
, MIBShiftAmt
})});
2609 Op
= MIBOp
->getOperand(0).getReg();
2611 auto MIBPop
= MIRBuilder
.buildInstr(TargetOpcode::G_CTPOP
, {Ty
}, {Op
});
2612 MIRBuilder
.buildInstr(TargetOpcode::G_SUB
, {MI
.getOperand(0).getReg()},
2613 {MIRBuilder
.buildConstant(Ty
, Len
), MIBPop
});
2614 MI
.eraseFromParent();
2617 case TargetOpcode::G_CTTZ_ZERO_UNDEF
: {
2618 // This trivially expands to CTTZ.
2619 Observer
.changingInstr(MI
);
2620 MI
.setDesc(TII
.get(TargetOpcode::G_CTTZ
));
2621 Observer
.changedInstr(MI
);
2624 case TargetOpcode::G_CTTZ
: {
2625 unsigned SrcReg
= MI
.getOperand(1).getReg();
2626 unsigned Len
= Ty
.getSizeInBits();
2627 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF
, {Ty
, Ty
}})) {
2628 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
2630 auto MIBCttzZU
= MIRBuilder
.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF
,
2632 auto MIBZero
= MIRBuilder
.buildConstant(Ty
, 0);
2633 auto MIBLen
= MIRBuilder
.buildConstant(Ty
, Len
);
2634 auto MIBICmp
= MIRBuilder
.buildICmp(CmpInst::ICMP_EQ
, LLT::scalar(1),
2636 MIRBuilder
.buildSelect(MI
.getOperand(0).getReg(), MIBICmp
, MIBLen
,
2638 MI
.eraseFromParent();
2641 // for now, we use: { return popcount(~x & (x - 1)); }
2642 // unless the target has ctlz but not ctpop, in which case we use:
2643 // { return 32 - nlz(~x & (x-1)); }
2644 // Ref: "Hacker's Delight" by Henry Warren
2645 auto MIBCstNeg1
= MIRBuilder
.buildConstant(Ty
, -1);
2647 MIRBuilder
.buildInstr(TargetOpcode::G_XOR
, {Ty
}, {SrcReg
, MIBCstNeg1
});
2648 auto MIBTmp
= MIRBuilder
.buildInstr(
2649 TargetOpcode::G_AND
, {Ty
},
2650 {MIBNot
, MIRBuilder
.buildInstr(TargetOpcode::G_ADD
, {Ty
},
2651 {SrcReg
, MIBCstNeg1
})});
2652 if (!isSupported({TargetOpcode::G_CTPOP
, {Ty
, Ty
}}) &&
2653 isSupported({TargetOpcode::G_CTLZ
, {Ty
, Ty
}})) {
2654 auto MIBCstLen
= MIRBuilder
.buildConstant(Ty
, Len
);
2655 MIRBuilder
.buildInstr(
2656 TargetOpcode::G_SUB
, {MI
.getOperand(0).getReg()},
2658 MIRBuilder
.buildInstr(TargetOpcode::G_CTLZ
, {Ty
}, {MIBTmp
})});
2659 MI
.eraseFromParent();
2662 MI
.setDesc(TII
.get(TargetOpcode::G_CTPOP
));
2663 MI
.getOperand(1).setReg(MIBTmp
->getOperand(0).getReg());