[llvm-exegesis] Fix missing std::move.
[llvm-complete.git] / lib / CodeGen / GlobalISel / LegalizerHelper.cpp
blobff2e61c03b47e03c50e237347737d30be153dee7
1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file This file implements the LegalizerHelper class to legalize
11 /// individual instructions and the LegalizeMachineIR wrapper pass for the
12 /// primary legalization.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
17 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/TargetInstrInfo.h"
21 #include "llvm/CodeGen/TargetLowering.h"
22 #include "llvm/CodeGen/TargetSubtargetInfo.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/MathExtras.h"
25 #include "llvm/Support/raw_ostream.h"
27 #define DEBUG_TYPE "legalizer"
29 using namespace llvm;
30 using namespace LegalizeActions;
32 LegalizerHelper::LegalizerHelper(MachineFunction &MF)
33 : MRI(MF.getRegInfo()), LI(*MF.getSubtarget().getLegalizerInfo()) {
34 MIRBuilder.setMF(MF);
37 LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI)
38 : MRI(MF.getRegInfo()), LI(LI) {
39 MIRBuilder.setMF(MF);
41 LegalizerHelper::LegalizeResult
42 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
43 LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs()));
45 auto Step = LI.getAction(MI, MRI);
46 switch (Step.Action) {
47 case Legal:
48 LLVM_DEBUG(dbgs() << ".. Already legal\n");
49 return AlreadyLegal;
50 case Libcall:
51 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
52 return libcall(MI);
53 case NarrowScalar:
54 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
55 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
56 case WidenScalar:
57 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
58 return widenScalar(MI, Step.TypeIdx, Step.NewType);
59 case Lower:
60 LLVM_DEBUG(dbgs() << ".. Lower\n");
61 return lower(MI, Step.TypeIdx, Step.NewType);
62 case FewerElements:
63 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
64 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
65 case Custom:
66 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
67 return LI.legalizeCustom(MI, MRI, MIRBuilder) ? Legalized
68 : UnableToLegalize;
69 default:
70 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
71 return UnableToLegalize;
75 void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts,
76 SmallVectorImpl<unsigned> &VRegs) {
77 for (int i = 0; i < NumParts; ++i)
78 VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
79 MIRBuilder.buildUnmerge(VRegs, Reg);
82 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
83 switch (Opcode) {
84 case TargetOpcode::G_SDIV:
85 assert(Size == 32 && "Unsupported size");
86 return RTLIB::SDIV_I32;
87 case TargetOpcode::G_UDIV:
88 assert(Size == 32 && "Unsupported size");
89 return RTLIB::UDIV_I32;
90 case TargetOpcode::G_SREM:
91 assert(Size == 32 && "Unsupported size");
92 return RTLIB::SREM_I32;
93 case TargetOpcode::G_UREM:
94 assert(Size == 32 && "Unsupported size");
95 return RTLIB::UREM_I32;
96 case TargetOpcode::G_FADD:
97 assert((Size == 32 || Size == 64) && "Unsupported size");
98 return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32;
99 case TargetOpcode::G_FSUB:
100 assert((Size == 32 || Size == 64) && "Unsupported size");
101 return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32;
102 case TargetOpcode::G_FMUL:
103 assert((Size == 32 || Size == 64) && "Unsupported size");
104 return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32;
105 case TargetOpcode::G_FDIV:
106 assert((Size == 32 || Size == 64) && "Unsupported size");
107 return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32;
108 case TargetOpcode::G_FREM:
109 return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32;
110 case TargetOpcode::G_FPOW:
111 return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32;
112 case TargetOpcode::G_FMA:
113 assert((Size == 32 || Size == 64) && "Unsupported size");
114 return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32;
116 llvm_unreachable("Unknown libcall function");
119 LegalizerHelper::LegalizeResult
120 llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
121 const CallLowering::ArgInfo &Result,
122 ArrayRef<CallLowering::ArgInfo> Args) {
123 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
124 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
125 const char *Name = TLI.getLibcallName(Libcall);
127 MIRBuilder.getMF().getFrameInfo().setHasCalls(true);
128 if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall),
129 MachineOperand::CreateES(Name), Result, Args))
130 return LegalizerHelper::UnableToLegalize;
132 return LegalizerHelper::Legalized;
135 // Useful for libcalls where all operands have the same type.
136 static LegalizerHelper::LegalizeResult
137 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
138 Type *OpType) {
139 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
141 SmallVector<CallLowering::ArgInfo, 3> Args;
142 for (unsigned i = 1; i < MI.getNumOperands(); i++)
143 Args.push_back({MI.getOperand(i).getReg(), OpType});
144 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
145 Args);
148 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
149 Type *FromType) {
150 auto ToMVT = MVT::getVT(ToType);
151 auto FromMVT = MVT::getVT(FromType);
153 switch (Opcode) {
154 case TargetOpcode::G_FPEXT:
155 return RTLIB::getFPEXT(FromMVT, ToMVT);
156 case TargetOpcode::G_FPTRUNC:
157 return RTLIB::getFPROUND(FromMVT, ToMVT);
158 case TargetOpcode::G_FPTOSI:
159 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
160 case TargetOpcode::G_FPTOUI:
161 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
162 case TargetOpcode::G_SITOFP:
163 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
164 case TargetOpcode::G_UITOFP:
165 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
167 llvm_unreachable("Unsupported libcall function");
170 static LegalizerHelper::LegalizeResult
171 conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
172 Type *FromType) {
173 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
174 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType},
175 {{MI.getOperand(1).getReg(), FromType}});
178 LegalizerHelper::LegalizeResult
179 LegalizerHelper::libcall(MachineInstr &MI) {
180 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
181 unsigned Size = LLTy.getSizeInBits();
182 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
184 MIRBuilder.setInstr(MI);
186 switch (MI.getOpcode()) {
187 default:
188 return UnableToLegalize;
189 case TargetOpcode::G_SDIV:
190 case TargetOpcode::G_UDIV:
191 case TargetOpcode::G_SREM:
192 case TargetOpcode::G_UREM: {
193 Type *HLTy = Type::getInt32Ty(Ctx);
194 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
195 if (Status != Legalized)
196 return Status;
197 break;
199 case TargetOpcode::G_FADD:
200 case TargetOpcode::G_FSUB:
201 case TargetOpcode::G_FMUL:
202 case TargetOpcode::G_FDIV:
203 case TargetOpcode::G_FMA:
204 case TargetOpcode::G_FPOW:
205 case TargetOpcode::G_FREM: {
206 Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
207 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
208 if (Status != Legalized)
209 return Status;
210 break;
212 case TargetOpcode::G_FPEXT: {
213 // FIXME: Support other floating point types (half, fp128 etc)
214 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
215 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
216 if (ToSize != 64 || FromSize != 32)
217 return UnableToLegalize;
218 LegalizeResult Status = conversionLibcall(
219 MI, MIRBuilder, Type::getDoubleTy(Ctx), Type::getFloatTy(Ctx));
220 if (Status != Legalized)
221 return Status;
222 break;
224 case TargetOpcode::G_FPTRUNC: {
225 // FIXME: Support other floating point types (half, fp128 etc)
226 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
227 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
228 if (ToSize != 32 || FromSize != 64)
229 return UnableToLegalize;
230 LegalizeResult Status = conversionLibcall(
231 MI, MIRBuilder, Type::getFloatTy(Ctx), Type::getDoubleTy(Ctx));
232 if (Status != Legalized)
233 return Status;
234 break;
236 case TargetOpcode::G_FPTOSI:
237 case TargetOpcode::G_FPTOUI: {
238 // FIXME: Support other types
239 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
240 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
241 if (ToSize != 32 || (FromSize != 32 && FromSize != 64))
242 return UnableToLegalize;
243 LegalizeResult Status = conversionLibcall(
244 MI, MIRBuilder, Type::getInt32Ty(Ctx),
245 FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
246 if (Status != Legalized)
247 return Status;
248 break;
250 case TargetOpcode::G_SITOFP:
251 case TargetOpcode::G_UITOFP: {
252 // FIXME: Support other types
253 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
254 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
255 if (FromSize != 32 || (ToSize != 32 && ToSize != 64))
256 return UnableToLegalize;
257 LegalizeResult Status = conversionLibcall(
258 MI, MIRBuilder,
259 ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
260 Type::getInt32Ty(Ctx));
261 if (Status != Legalized)
262 return Status;
263 break;
267 MI.eraseFromParent();
268 return Legalized;
271 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
272 unsigned TypeIdx,
273 LLT NarrowTy) {
274 // FIXME: Don't know how to handle secondary types yet.
275 if (TypeIdx != 0 && MI.getOpcode() != TargetOpcode::G_EXTRACT)
276 return UnableToLegalize;
278 MIRBuilder.setInstr(MI);
280 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
281 uint64_t NarrowSize = NarrowTy.getSizeInBits();
283 switch (MI.getOpcode()) {
284 default:
285 return UnableToLegalize;
286 case TargetOpcode::G_IMPLICIT_DEF: {
287 // FIXME: add support for when SizeOp0 isn't an exact multiple of
288 // NarrowSize.
289 if (SizeOp0 % NarrowSize != 0)
290 return UnableToLegalize;
291 int NumParts = SizeOp0 / NarrowSize;
293 SmallVector<unsigned, 2> DstRegs;
294 for (int i = 0; i < NumParts; ++i)
295 DstRegs.push_back(
296 MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg());
297 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
298 MI.eraseFromParent();
299 return Legalized;
301 case TargetOpcode::G_ADD: {
302 // FIXME: add support for when SizeOp0 isn't an exact multiple of
303 // NarrowSize.
304 if (SizeOp0 % NarrowSize != 0)
305 return UnableToLegalize;
306 // Expand in terms of carry-setting/consuming G_ADDE instructions.
307 int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
309 SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
310 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
311 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
313 unsigned CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1));
314 MIRBuilder.buildConstant(CarryIn, 0);
316 for (int i = 0; i < NumParts; ++i) {
317 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
318 unsigned CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
320 MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
321 Src2Regs[i], CarryIn);
323 DstRegs.push_back(DstReg);
324 CarryIn = CarryOut;
326 unsigned DstReg = MI.getOperand(0).getReg();
327 MIRBuilder.buildMerge(DstReg, DstRegs);
328 MI.eraseFromParent();
329 return Legalized;
331 case TargetOpcode::G_EXTRACT: {
332 if (TypeIdx != 1)
333 return UnableToLegalize;
335 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
336 // FIXME: add support for when SizeOp1 isn't an exact multiple of
337 // NarrowSize.
338 if (SizeOp1 % NarrowSize != 0)
339 return UnableToLegalize;
340 int NumParts = SizeOp1 / NarrowSize;
342 SmallVector<unsigned, 2> SrcRegs, DstRegs;
343 SmallVector<uint64_t, 2> Indexes;
344 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
346 unsigned OpReg = MI.getOperand(0).getReg();
347 uint64_t OpStart = MI.getOperand(2).getImm();
348 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
349 for (int i = 0; i < NumParts; ++i) {
350 unsigned SrcStart = i * NarrowSize;
352 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
353 // No part of the extract uses this subregister, ignore it.
354 continue;
355 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
356 // The entire subregister is extracted, forward the value.
357 DstRegs.push_back(SrcRegs[i]);
358 continue;
361 // OpSegStart is where this destination segment would start in OpReg if it
362 // extended infinitely in both directions.
363 int64_t ExtractOffset;
364 uint64_t SegSize;
365 if (OpStart < SrcStart) {
366 ExtractOffset = 0;
367 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
368 } else {
369 ExtractOffset = OpStart - SrcStart;
370 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
373 unsigned SegReg = SrcRegs[i];
374 if (ExtractOffset != 0 || SegSize != NarrowSize) {
375 // A genuine extract is needed.
376 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
377 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
380 DstRegs.push_back(SegReg);
383 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
384 MI.eraseFromParent();
385 return Legalized;
387 case TargetOpcode::G_INSERT: {
388 // FIXME: add support for when SizeOp0 isn't an exact multiple of
389 // NarrowSize.
390 if (SizeOp0 % NarrowSize != 0)
391 return UnableToLegalize;
393 int NumParts = SizeOp0 / NarrowSize;
395 SmallVector<unsigned, 2> SrcRegs, DstRegs;
396 SmallVector<uint64_t, 2> Indexes;
397 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
399 unsigned OpReg = MI.getOperand(2).getReg();
400 uint64_t OpStart = MI.getOperand(3).getImm();
401 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
402 for (int i = 0; i < NumParts; ++i) {
403 unsigned DstStart = i * NarrowSize;
405 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
406 // No part of the insert affects this subregister, forward the original.
407 DstRegs.push_back(SrcRegs[i]);
408 continue;
409 } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
410 // The entire subregister is defined by this insert, forward the new
411 // value.
412 DstRegs.push_back(OpReg);
413 continue;
416 // OpSegStart is where this destination segment would start in OpReg if it
417 // extended infinitely in both directions.
418 int64_t ExtractOffset, InsertOffset;
419 uint64_t SegSize;
420 if (OpStart < DstStart) {
421 InsertOffset = 0;
422 ExtractOffset = DstStart - OpStart;
423 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
424 } else {
425 InsertOffset = OpStart - DstStart;
426 ExtractOffset = 0;
427 SegSize =
428 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
431 unsigned SegReg = OpReg;
432 if (ExtractOffset != 0 || SegSize != OpSize) {
433 // A genuine extract is needed.
434 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
435 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
438 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
439 MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
440 DstRegs.push_back(DstReg);
443 assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
444 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
445 MI.eraseFromParent();
446 return Legalized;
448 case TargetOpcode::G_LOAD: {
449 // FIXME: add support for when SizeOp0 isn't an exact multiple of
450 // NarrowSize.
451 if (SizeOp0 % NarrowSize != 0)
452 return UnableToLegalize;
454 const auto &MMO = **MI.memoperands_begin();
455 // This implementation doesn't work for atomics. Give up instead of doing
456 // something invalid.
457 if (MMO.getOrdering() != AtomicOrdering::NotAtomic ||
458 MMO.getFailureOrdering() != AtomicOrdering::NotAtomic)
459 return UnableToLegalize;
461 int NumParts = SizeOp0 / NarrowSize;
462 LLT OffsetTy = LLT::scalar(
463 MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits());
465 SmallVector<unsigned, 2> DstRegs;
466 for (int i = 0; i < NumParts; ++i) {
467 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
468 unsigned SrcReg = 0;
469 unsigned Adjustment = i * NarrowSize / 8;
471 MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand(
472 MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(),
473 NarrowSize / 8, i == 0 ? MMO.getAlignment() : NarrowSize / 8,
474 MMO.getAAInfo(), MMO.getRanges(), MMO.getSyncScopeID(),
475 MMO.getOrdering(), MMO.getFailureOrdering());
477 MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy,
478 Adjustment);
480 MIRBuilder.buildLoad(DstReg, SrcReg, *SplitMMO);
482 DstRegs.push_back(DstReg);
484 unsigned DstReg = MI.getOperand(0).getReg();
485 MIRBuilder.buildMerge(DstReg, DstRegs);
486 MI.eraseFromParent();
487 return Legalized;
489 case TargetOpcode::G_STORE: {
490 // FIXME: add support for when SizeOp0 isn't an exact multiple of
491 // NarrowSize.
492 if (SizeOp0 % NarrowSize != 0)
493 return UnableToLegalize;
495 const auto &MMO = **MI.memoperands_begin();
496 // This implementation doesn't work for atomics. Give up instead of doing
497 // something invalid.
498 if (MMO.getOrdering() != AtomicOrdering::NotAtomic ||
499 MMO.getFailureOrdering() != AtomicOrdering::NotAtomic)
500 return UnableToLegalize;
502 int NumParts = SizeOp0 / NarrowSize;
503 LLT OffsetTy = LLT::scalar(
504 MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits());
506 SmallVector<unsigned, 2> SrcRegs;
507 extractParts(MI.getOperand(0).getReg(), NarrowTy, NumParts, SrcRegs);
509 for (int i = 0; i < NumParts; ++i) {
510 unsigned DstReg = 0;
511 unsigned Adjustment = i * NarrowSize / 8;
513 MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand(
514 MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(),
515 NarrowSize / 8, i == 0 ? MMO.getAlignment() : NarrowSize / 8,
516 MMO.getAAInfo(), MMO.getRanges(), MMO.getSyncScopeID(),
517 MMO.getOrdering(), MMO.getFailureOrdering());
519 MIRBuilder.materializeGEP(DstReg, MI.getOperand(1).getReg(), OffsetTy,
520 Adjustment);
522 MIRBuilder.buildStore(SrcRegs[i], DstReg, *SplitMMO);
524 MI.eraseFromParent();
525 return Legalized;
527 case TargetOpcode::G_CONSTANT: {
528 // FIXME: add support for when SizeOp0 isn't an exact multiple of
529 // NarrowSize.
530 if (SizeOp0 % NarrowSize != 0)
531 return UnableToLegalize;
532 int NumParts = SizeOp0 / NarrowSize;
533 const APInt &Cst = MI.getOperand(1).getCImm()->getValue();
534 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
536 SmallVector<unsigned, 2> DstRegs;
537 for (int i = 0; i < NumParts; ++i) {
538 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
539 ConstantInt *CI =
540 ConstantInt::get(Ctx, Cst.lshr(NarrowSize * i).trunc(NarrowSize));
541 MIRBuilder.buildConstant(DstReg, *CI);
542 DstRegs.push_back(DstReg);
544 unsigned DstReg = MI.getOperand(0).getReg();
545 MIRBuilder.buildMerge(DstReg, DstRegs);
546 MI.eraseFromParent();
547 return Legalized;
549 case TargetOpcode::G_OR: {
550 // Legalize bitwise operation:
551 // A = BinOp<Ty> B, C
552 // into:
553 // B1, ..., BN = G_UNMERGE_VALUES B
554 // C1, ..., CN = G_UNMERGE_VALUES C
555 // A1 = BinOp<Ty/N> B1, C2
556 // ...
557 // AN = BinOp<Ty/N> BN, CN
558 // A = G_MERGE_VALUES A1, ..., AN
560 // FIXME: add support for when SizeOp0 isn't an exact multiple of
561 // NarrowSize.
562 if (SizeOp0 % NarrowSize != 0)
563 return UnableToLegalize;
564 int NumParts = SizeOp0 / NarrowSize;
566 // List the registers where the destination will be scattered.
567 SmallVector<unsigned, 2> DstRegs;
568 // List the registers where the first argument will be split.
569 SmallVector<unsigned, 2> SrcsReg1;
570 // List the registers where the second argument will be split.
571 SmallVector<unsigned, 2> SrcsReg2;
572 // Create all the temporary registers.
573 for (int i = 0; i < NumParts; ++i) {
574 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
575 unsigned SrcReg1 = MRI.createGenericVirtualRegister(NarrowTy);
576 unsigned SrcReg2 = MRI.createGenericVirtualRegister(NarrowTy);
578 DstRegs.push_back(DstReg);
579 SrcsReg1.push_back(SrcReg1);
580 SrcsReg2.push_back(SrcReg2);
582 // Explode the big arguments into smaller chunks.
583 MIRBuilder.buildUnmerge(SrcsReg1, MI.getOperand(1).getReg());
584 MIRBuilder.buildUnmerge(SrcsReg2, MI.getOperand(2).getReg());
586 // Do the operation on each small part.
587 for (int i = 0; i < NumParts; ++i)
588 MIRBuilder.buildOr(DstRegs[i], SrcsReg1[i], SrcsReg2[i]);
590 // Gather the destination registers into the final destination.
591 unsigned DstReg = MI.getOperand(0).getReg();
592 MIRBuilder.buildMerge(DstReg, DstRegs);
593 MI.eraseFromParent();
594 return Legalized;
599 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
600 unsigned OpIdx, unsigned ExtOpcode) {
601 MachineOperand &MO = MI.getOperand(OpIdx);
602 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, WideTy, MO.getReg());
603 MO.setReg(ExtB->getOperand(0).getReg());
606 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
607 unsigned OpIdx, unsigned TruncOpcode) {
608 MachineOperand &MO = MI.getOperand(OpIdx);
609 unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
610 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
611 MIRBuilder.buildInstr(TruncOpcode, MO.getReg(), DstExt);
612 MO.setReg(DstExt);
615 LegalizerHelper::LegalizeResult
616 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
617 MIRBuilder.setInstr(MI);
619 switch (MI.getOpcode()) {
620 default:
621 return UnableToLegalize;
622 case TargetOpcode::G_UADDO:
623 case TargetOpcode::G_USUBO: {
624 if (TypeIdx == 1)
625 return UnableToLegalize; // TODO
626 auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, WideTy,
627 MI.getOperand(2).getReg());
628 auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, WideTy,
629 MI.getOperand(3).getReg());
630 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO
631 ? TargetOpcode::G_ADD
632 : TargetOpcode::G_SUB;
633 // Do the arithmetic in the larger type.
634 auto NewOp = MIRBuilder.buildInstr(Opcode, WideTy, LHSZext, RHSZext);
635 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
636 APInt Mask = APInt::getAllOnesValue(OrigTy.getSizeInBits());
637 auto AndOp = MIRBuilder.buildInstr(
638 TargetOpcode::G_AND, WideTy, NewOp,
639 MIRBuilder.buildConstant(WideTy, Mask.getZExtValue()));
640 // There is no overflow if the AndOp is the same as NewOp.
641 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1).getReg(), NewOp,
642 AndOp);
643 // Now trunc the NewOp to the original result.
644 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp);
645 MI.eraseFromParent();
646 return Legalized;
648 case TargetOpcode::G_CTTZ:
649 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
650 case TargetOpcode::G_CTLZ:
651 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
652 case TargetOpcode::G_CTPOP: {
653 // First ZEXT the input.
654 auto MIBSrc = MIRBuilder.buildZExt(WideTy, MI.getOperand(1).getReg());
655 LLT CurTy = MRI.getType(MI.getOperand(0).getReg());
656 if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
657 // The count is the same in the larger type except if the original
658 // value was zero. This can be handled by setting the bit just off
659 // the top of the original type.
660 auto TopBit =
661 APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
662 MIBSrc = MIRBuilder.buildInstr(
663 TargetOpcode::G_OR, WideTy, MIBSrc,
664 MIRBuilder.buildConstant(WideTy, TopBit.getSExtValue()));
666 // Perform the operation at the larger size.
667 auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), WideTy, MIBSrc);
668 // This is already the correct result for CTPOP and CTTZs
669 if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
670 MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
671 // The correct result is NewOp - (Difference in widety and current ty).
672 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
673 MIBNewOp =
674 MIRBuilder.buildInstr(TargetOpcode::G_SUB, WideTy, MIBNewOp,
675 MIRBuilder.buildConstant(WideTy, SizeDiff));
677 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
678 // Make the original instruction a trunc now, and update it's source.
679 MI.setDesc(TII.get(TargetOpcode::G_TRUNC));
680 MI.getOperand(1).setReg(MIBNewOp->getOperand(0).getReg());
681 MIRBuilder.recordInsertion(&MI);
682 return Legalized;
685 case TargetOpcode::G_ADD:
686 case TargetOpcode::G_AND:
687 case TargetOpcode::G_MUL:
688 case TargetOpcode::G_OR:
689 case TargetOpcode::G_XOR:
690 case TargetOpcode::G_SUB:
691 // Perform operation at larger width (any extension is fine here, high bits
692 // don't affect the result) and then truncate the result back to the
693 // original type.
694 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
695 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
696 widenScalarDst(MI, WideTy);
697 MIRBuilder.recordInsertion(&MI);
698 return Legalized;
700 case TargetOpcode::G_SHL:
701 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
702 // The "number of bits to shift" operand must preserve its value as an
703 // unsigned integer:
704 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
705 widenScalarDst(MI, WideTy);
706 MIRBuilder.recordInsertion(&MI);
707 return Legalized;
709 case TargetOpcode::G_SDIV:
710 case TargetOpcode::G_SREM:
711 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
712 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
713 widenScalarDst(MI, WideTy);
714 MIRBuilder.recordInsertion(&MI);
715 return Legalized;
717 case TargetOpcode::G_ASHR:
718 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
719 // The "number of bits to shift" operand must preserve its value as an
720 // unsigned integer:
721 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
722 widenScalarDst(MI, WideTy);
723 MIRBuilder.recordInsertion(&MI);
724 return Legalized;
726 case TargetOpcode::G_UDIV:
727 case TargetOpcode::G_UREM:
728 case TargetOpcode::G_LSHR:
729 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
730 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
731 widenScalarDst(MI, WideTy);
732 MIRBuilder.recordInsertion(&MI);
733 return Legalized;
735 case TargetOpcode::G_SELECT:
736 if (TypeIdx != 0)
737 return UnableToLegalize;
738 // Perform operation at larger width (any extension is fine here, high bits
739 // don't affect the result) and then truncate the result back to the
740 // original type.
741 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
742 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
743 widenScalarDst(MI, WideTy);
744 MIRBuilder.recordInsertion(&MI);
745 return Legalized;
747 case TargetOpcode::G_FPTOSI:
748 case TargetOpcode::G_FPTOUI:
749 if (TypeIdx != 0)
750 return UnableToLegalize;
751 widenScalarDst(MI, WideTy);
752 MIRBuilder.recordInsertion(&MI);
753 return Legalized;
755 case TargetOpcode::G_SITOFP:
756 if (TypeIdx != 1)
757 return UnableToLegalize;
758 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
759 MIRBuilder.recordInsertion(&MI);
760 return Legalized;
762 case TargetOpcode::G_UITOFP:
763 if (TypeIdx != 1)
764 return UnableToLegalize;
765 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
766 MIRBuilder.recordInsertion(&MI);
767 return Legalized;
769 case TargetOpcode::G_INSERT:
770 if (TypeIdx != 0)
771 return UnableToLegalize;
772 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
773 widenScalarDst(MI, WideTy);
774 MIRBuilder.recordInsertion(&MI);
775 return Legalized;
777 case TargetOpcode::G_LOAD:
778 // For some types like i24, we might try to widen to i32. To properly handle
779 // this we should be using a dedicated extending load, until then avoid
780 // trying to legalize.
781 if (alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) !=
782 WideTy.getSizeInBits())
783 return UnableToLegalize;
784 LLVM_FALLTHROUGH;
785 case TargetOpcode::G_SEXTLOAD:
786 case TargetOpcode::G_ZEXTLOAD:
787 widenScalarDst(MI, WideTy);
788 MIRBuilder.recordInsertion(&MI);
789 return Legalized;
791 case TargetOpcode::G_STORE: {
792 if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(1) ||
793 WideTy != LLT::scalar(8))
794 return UnableToLegalize;
796 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ZEXT);
797 MIRBuilder.recordInsertion(&MI);
798 return Legalized;
800 case TargetOpcode::G_CONSTANT: {
801 MachineOperand &SrcMO = MI.getOperand(1);
802 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
803 const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits());
804 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
806 widenScalarDst(MI, WideTy);
807 MIRBuilder.recordInsertion(&MI);
808 return Legalized;
810 case TargetOpcode::G_FCONSTANT: {
811 MachineOperand &SrcMO = MI.getOperand(1);
812 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
813 APFloat Val = SrcMO.getFPImm()->getValueAPF();
814 bool LosesInfo;
815 switch (WideTy.getSizeInBits()) {
816 case 32:
817 Val.convert(APFloat::IEEEsingle(), APFloat::rmTowardZero, &LosesInfo);
818 break;
819 case 64:
820 Val.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &LosesInfo);
821 break;
822 default:
823 llvm_unreachable("Unhandled fp widen type");
825 SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
827 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
828 MIRBuilder.recordInsertion(&MI);
829 return Legalized;
831 case TargetOpcode::G_BRCOND:
832 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT);
833 MIRBuilder.recordInsertion(&MI);
834 return Legalized;
836 case TargetOpcode::G_FCMP:
837 if (TypeIdx == 0)
838 widenScalarDst(MI, WideTy);
839 else {
840 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
841 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
843 MIRBuilder.recordInsertion(&MI);
844 return Legalized;
846 case TargetOpcode::G_ICMP:
847 if (TypeIdx == 0)
848 widenScalarDst(MI, WideTy);
849 else {
850 unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
851 MI.getOperand(1).getPredicate()))
852 ? TargetOpcode::G_SEXT
853 : TargetOpcode::G_ZEXT;
854 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
855 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
857 MIRBuilder.recordInsertion(&MI);
858 return Legalized;
860 case TargetOpcode::G_GEP:
861 assert(TypeIdx == 1 && "unable to legalize pointer of GEP");
862 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
863 MIRBuilder.recordInsertion(&MI);
864 return Legalized;
866 case TargetOpcode::G_PHI: {
867 assert(TypeIdx == 0 && "Expecting only Idx 0");
869 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
870 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
871 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
872 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
875 MachineBasicBlock &MBB = *MI.getParent();
876 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
877 widenScalarDst(MI, WideTy);
878 MIRBuilder.recordInsertion(&MI);
879 return Legalized;
884 LegalizerHelper::LegalizeResult
885 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
886 using namespace TargetOpcode;
887 MIRBuilder.setInstr(MI);
889 switch(MI.getOpcode()) {
890 default:
891 return UnableToLegalize;
892 case TargetOpcode::G_SREM:
893 case TargetOpcode::G_UREM: {
894 unsigned QuotReg = MRI.createGenericVirtualRegister(Ty);
895 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV)
896 .addDef(QuotReg)
897 .addUse(MI.getOperand(1).getReg())
898 .addUse(MI.getOperand(2).getReg());
900 unsigned ProdReg = MRI.createGenericVirtualRegister(Ty);
901 MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg());
902 MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
903 ProdReg);
904 MI.eraseFromParent();
905 return Legalized;
907 case TargetOpcode::G_SMULO:
908 case TargetOpcode::G_UMULO: {
909 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
910 // result.
911 unsigned Res = MI.getOperand(0).getReg();
912 unsigned Overflow = MI.getOperand(1).getReg();
913 unsigned LHS = MI.getOperand(2).getReg();
914 unsigned RHS = MI.getOperand(3).getReg();
916 MIRBuilder.buildMul(Res, LHS, RHS);
918 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
919 ? TargetOpcode::G_SMULH
920 : TargetOpcode::G_UMULH;
922 unsigned HiPart = MRI.createGenericVirtualRegister(Ty);
923 MIRBuilder.buildInstr(Opcode)
924 .addDef(HiPart)
925 .addUse(LHS)
926 .addUse(RHS);
928 unsigned Zero = MRI.createGenericVirtualRegister(Ty);
929 MIRBuilder.buildConstant(Zero, 0);
931 // For *signed* multiply, overflow is detected by checking:
932 // (hi != (lo >> bitwidth-1))
933 if (Opcode == TargetOpcode::G_SMULH) {
934 unsigned Shifted = MRI.createGenericVirtualRegister(Ty);
935 unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty);
936 MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1);
937 MIRBuilder.buildInstr(TargetOpcode::G_ASHR)
938 .addDef(Shifted)
939 .addUse(Res)
940 .addUse(ShiftAmt);
941 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
942 } else {
943 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
945 MI.eraseFromParent();
946 return Legalized;
948 case TargetOpcode::G_FNEG: {
949 // TODO: Handle vector types once we are able to
950 // represent them.
951 if (Ty.isVector())
952 return UnableToLegalize;
953 unsigned Res = MI.getOperand(0).getReg();
954 Type *ZeroTy;
955 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
956 switch (Ty.getSizeInBits()) {
957 case 16:
958 ZeroTy = Type::getHalfTy(Ctx);
959 break;
960 case 32:
961 ZeroTy = Type::getFloatTy(Ctx);
962 break;
963 case 64:
964 ZeroTy = Type::getDoubleTy(Ctx);
965 break;
966 case 128:
967 ZeroTy = Type::getFP128Ty(Ctx);
968 break;
969 default:
970 llvm_unreachable("unexpected floating-point type");
972 ConstantFP &ZeroForNegation =
973 *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy));
974 auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
975 MIRBuilder.buildInstr(TargetOpcode::G_FSUB)
976 .addDef(Res)
977 .addUse(Zero->getOperand(0).getReg())
978 .addUse(MI.getOperand(1).getReg());
979 MI.eraseFromParent();
980 return Legalized;
982 case TargetOpcode::G_FSUB: {
983 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
984 // First, check if G_FNEG is marked as Lower. If so, we may
985 // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
986 if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
987 return UnableToLegalize;
988 unsigned Res = MI.getOperand(0).getReg();
989 unsigned LHS = MI.getOperand(1).getReg();
990 unsigned RHS = MI.getOperand(2).getReg();
991 unsigned Neg = MRI.createGenericVirtualRegister(Ty);
992 MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS);
993 MIRBuilder.buildInstr(TargetOpcode::G_FADD)
994 .addDef(Res)
995 .addUse(LHS)
996 .addUse(Neg);
997 MI.eraseFromParent();
998 return Legalized;
1000 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1001 unsigned OldValRes = MI.getOperand(0).getReg();
1002 unsigned SuccessRes = MI.getOperand(1).getReg();
1003 unsigned Addr = MI.getOperand(2).getReg();
1004 unsigned CmpVal = MI.getOperand(3).getReg();
1005 unsigned NewVal = MI.getOperand(4).getReg();
1006 MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
1007 **MI.memoperands_begin());
1008 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
1009 MI.eraseFromParent();
1010 return Legalized;
1012 case TargetOpcode::G_LOAD:
1013 case TargetOpcode::G_SEXTLOAD:
1014 case TargetOpcode::G_ZEXTLOAD: {
1015 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
1016 unsigned DstReg = MI.getOperand(0).getReg();
1017 unsigned PtrReg = MI.getOperand(1).getReg();
1018 LLT DstTy = MRI.getType(DstReg);
1019 auto &MMO = **MI.memoperands_begin();
1021 if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) {
1022 // In the case of G_LOAD, this was a non-extending load already and we're
1023 // about to lower to the same instruction.
1024 if (MI.getOpcode() == TargetOpcode::G_LOAD)
1025 return UnableToLegalize;
1026 MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
1027 MI.eraseFromParent();
1028 return Legalized;
1031 if (DstTy.isScalar()) {
1032 unsigned TmpReg = MRI.createGenericVirtualRegister(
1033 LLT::scalar(MMO.getSize() /* in bytes */ * 8));
1034 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1035 switch (MI.getOpcode()) {
1036 default:
1037 llvm_unreachable("Unexpected opcode");
1038 case TargetOpcode::G_LOAD:
1039 MIRBuilder.buildAnyExt(DstReg, TmpReg);
1040 break;
1041 case TargetOpcode::G_SEXTLOAD:
1042 MIRBuilder.buildSExt(DstReg, TmpReg);
1043 break;
1044 case TargetOpcode::G_ZEXTLOAD:
1045 MIRBuilder.buildZExt(DstReg, TmpReg);
1046 break;
1048 MI.eraseFromParent();
1049 return Legalized;
1052 return UnableToLegalize;
1054 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1055 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1056 case TargetOpcode::G_CTLZ:
1057 case TargetOpcode::G_CTTZ:
1058 case TargetOpcode::G_CTPOP:
1059 return lowerBitCount(MI, TypeIdx, Ty);
1063 LegalizerHelper::LegalizeResult
1064 LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
1065 LLT NarrowTy) {
1066 // FIXME: Don't know how to handle secondary types yet.
1067 if (TypeIdx != 0)
1068 return UnableToLegalize;
1069 switch (MI.getOpcode()) {
1070 default:
1071 return UnableToLegalize;
1072 case TargetOpcode::G_ADD: {
1073 unsigned NarrowSize = NarrowTy.getSizeInBits();
1074 unsigned DstReg = MI.getOperand(0).getReg();
1075 unsigned Size = MRI.getType(DstReg).getSizeInBits();
1076 int NumParts = Size / NarrowSize;
1077 // FIXME: Don't know how to handle the situation where the small vectors
1078 // aren't all the same size yet.
1079 if (Size % NarrowSize != 0)
1080 return UnableToLegalize;
1082 MIRBuilder.setInstr(MI);
1084 SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
1085 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
1086 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
1088 for (int i = 0; i < NumParts; ++i) {
1089 unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
1090 MIRBuilder.buildAdd(DstReg, Src1Regs[i], Src2Regs[i]);
1091 DstRegs.push_back(DstReg);
1094 MIRBuilder.buildMerge(DstReg, DstRegs);
1095 MI.eraseFromParent();
1096 return Legalized;
1101 LegalizerHelper::LegalizeResult
1102 LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
1103 unsigned Opc = MI.getOpcode();
1104 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
1105 auto isLegalOrCustom = [this](const LegalityQuery &Q) {
1106 auto QAction = LI.getAction(Q).Action;
1107 return QAction == Legal || QAction == Custom;
1109 switch (Opc) {
1110 default:
1111 return UnableToLegalize;
1112 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1113 // This trivially expands to CTLZ.
1114 MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
1115 MIRBuilder.recordInsertion(&MI);
1116 return Legalized;
1118 case TargetOpcode::G_CTLZ: {
1119 unsigned SrcReg = MI.getOperand(1).getReg();
1120 unsigned Len = Ty.getSizeInBits();
1121 if (isLegalOrCustom({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty}})) {
1122 // If CTLZ_ZERO_UNDEF is legal or custom, emit that and a select with
1123 // zero.
1124 auto MIBCtlzZU =
1125 MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF, Ty, SrcReg);
1126 auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
1127 auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
1128 auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
1129 SrcReg, MIBZero);
1130 MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
1131 MIBCtlzZU);
1132 MI.eraseFromParent();
1133 return Legalized;
1135 // for now, we do this:
1136 // NewLen = NextPowerOf2(Len);
1137 // x = x | (x >> 1);
1138 // x = x | (x >> 2);
1139 // ...
1140 // x = x | (x >>16);
1141 // x = x | (x >>32); // for 64-bit input
1142 // Upto NewLen/2
1143 // return Len - popcount(x);
1145 // Ref: "Hacker's Delight" by Henry Warren
1146 unsigned Op = SrcReg;
1147 unsigned NewLen = PowerOf2Ceil(Len);
1148 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
1149 auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i);
1150 auto MIBOp = MIRBuilder.buildInstr(
1151 TargetOpcode::G_OR, Ty, Op,
1152 MIRBuilder.buildInstr(TargetOpcode::G_LSHR, Ty, Op, MIBShiftAmt));
1153 Op = MIBOp->getOperand(0).getReg();
1155 auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, Ty, Op);
1156 MIRBuilder.buildInstr(TargetOpcode::G_SUB, MI.getOperand(0).getReg(),
1157 MIRBuilder.buildConstant(Ty, Len), MIBPop);
1158 MI.eraseFromParent();
1159 return Legalized;
1161 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
1162 // This trivially expands to CTTZ.
1163 MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
1164 MIRBuilder.recordInsertion(&MI);
1165 return Legalized;
1167 case TargetOpcode::G_CTTZ: {
1168 unsigned SrcReg = MI.getOperand(1).getReg();
1169 unsigned Len = Ty.getSizeInBits();
1170 if (isLegalOrCustom({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty}})) {
1171 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
1172 // zero.
1173 auto MIBCttzZU =
1174 MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF, Ty, SrcReg);
1175 auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
1176 auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
1177 auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
1178 SrcReg, MIBZero);
1179 MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
1180 MIBCttzZU);
1181 MI.eraseFromParent();
1182 return Legalized;
1184 // for now, we use: { return popcount(~x & (x - 1)); }
1185 // unless the target has ctlz but not ctpop, in which case we use:
1186 // { return 32 - nlz(~x & (x-1)); }
1187 // Ref: "Hacker's Delight" by Henry Warren
1188 auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1);
1189 auto MIBNot =
1190 MIRBuilder.buildInstr(TargetOpcode::G_XOR, Ty, SrcReg, MIBCstNeg1);
1191 auto MIBTmp = MIRBuilder.buildInstr(
1192 TargetOpcode::G_AND, Ty, MIBNot,
1193 MIRBuilder.buildInstr(TargetOpcode::G_ADD, Ty, SrcReg, MIBCstNeg1));
1194 if (!isLegalOrCustom({TargetOpcode::G_CTPOP, {Ty}}) &&
1195 isLegalOrCustom({TargetOpcode::G_CTLZ, {Ty}})) {
1196 auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len);
1197 MIRBuilder.buildInstr(
1198 TargetOpcode::G_SUB, MI.getOperand(0).getReg(),
1199 MIBCstLen,
1200 MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, Ty, MIBTmp));
1201 MI.eraseFromParent();
1202 return Legalized;
1204 MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
1205 MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg());
1206 return Legalized;