[x86] fix assert with horizontal math + broadcast of vector (PR43402)
[llvm-core.git] / lib / CodeGen / GlobalISel / LegalizerHelper.cpp
blobf2b37eada2e0e7d858bbe9e44dd8a76e1674bdeb
1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file implements the LegalizerHelper class to legalize
10 /// individual instructions and the LegalizeMachineIR wrapper pass for the
11 /// primary legalization.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
16 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
17 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/TargetFrameLowering.h"
21 #include "llvm/CodeGen/TargetInstrInfo.h"
22 #include "llvm/CodeGen/TargetLowering.h"
23 #include "llvm/CodeGen/TargetSubtargetInfo.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Support/raw_ostream.h"
28 #define DEBUG_TYPE "legalizer"
30 using namespace llvm;
31 using namespace LegalizeActions;
33 /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
34 ///
35 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
36 /// with any leftover piece as type \p LeftoverTy
37 ///
38 /// Returns -1 in the first element of the pair if the breakdown is not
39 /// satisfiable.
40 static std::pair<int, int>
41 getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
42 assert(!LeftoverTy.isValid() && "this is an out argument");
44 unsigned Size = OrigTy.getSizeInBits();
45 unsigned NarrowSize = NarrowTy.getSizeInBits();
46 unsigned NumParts = Size / NarrowSize;
47 unsigned LeftoverSize = Size - NumParts * NarrowSize;
48 assert(Size > NarrowSize);
50 if (LeftoverSize == 0)
51 return {NumParts, 0};
53 if (NarrowTy.isVector()) {
54 unsigned EltSize = OrigTy.getScalarSizeInBits();
55 if (LeftoverSize % EltSize != 0)
56 return {-1, -1};
57 LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
58 } else {
59 LeftoverTy = LLT::scalar(LeftoverSize);
62 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
63 return std::make_pair(NumParts, NumLeftover);
66 LegalizerHelper::LegalizerHelper(MachineFunction &MF,
67 GISelChangeObserver &Observer,
68 MachineIRBuilder &Builder)
69 : MIRBuilder(Builder), MRI(MF.getRegInfo()),
70 LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) {
71 MIRBuilder.setMF(MF);
72 MIRBuilder.setChangeObserver(Observer);
75 LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
76 GISelChangeObserver &Observer,
77 MachineIRBuilder &B)
78 : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) {
79 MIRBuilder.setMF(MF);
80 MIRBuilder.setChangeObserver(Observer);
82 LegalizerHelper::LegalizeResult
83 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
84 LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs()));
86 if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
87 MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
88 return LI.legalizeIntrinsic(MI, MRI, MIRBuilder) ? Legalized
89 : UnableToLegalize;
90 auto Step = LI.getAction(MI, MRI);
91 switch (Step.Action) {
92 case Legal:
93 LLVM_DEBUG(dbgs() << ".. Already legal\n");
94 return AlreadyLegal;
95 case Libcall:
96 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
97 return libcall(MI);
98 case NarrowScalar:
99 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
100 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
101 case WidenScalar:
102 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
103 return widenScalar(MI, Step.TypeIdx, Step.NewType);
104 case Lower:
105 LLVM_DEBUG(dbgs() << ".. Lower\n");
106 return lower(MI, Step.TypeIdx, Step.NewType);
107 case FewerElements:
108 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
109 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
110 case MoreElements:
111 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
112 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
113 case Custom:
114 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
115 return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized
116 : UnableToLegalize;
117 default:
118 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
119 return UnableToLegalize;
123 void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
124 SmallVectorImpl<Register> &VRegs) {
125 for (int i = 0; i < NumParts; ++i)
126 VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
127 MIRBuilder.buildUnmerge(VRegs, Reg);
130 bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
131 LLT MainTy, LLT &LeftoverTy,
132 SmallVectorImpl<Register> &VRegs,
133 SmallVectorImpl<Register> &LeftoverRegs) {
134 assert(!LeftoverTy.isValid() && "this is an out argument");
136 unsigned RegSize = RegTy.getSizeInBits();
137 unsigned MainSize = MainTy.getSizeInBits();
138 unsigned NumParts = RegSize / MainSize;
139 unsigned LeftoverSize = RegSize - NumParts * MainSize;
141 // Use an unmerge when possible.
142 if (LeftoverSize == 0) {
143 for (unsigned I = 0; I < NumParts; ++I)
144 VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
145 MIRBuilder.buildUnmerge(VRegs, Reg);
146 return true;
149 if (MainTy.isVector()) {
150 unsigned EltSize = MainTy.getScalarSizeInBits();
151 if (LeftoverSize % EltSize != 0)
152 return false;
153 LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
154 } else {
155 LeftoverTy = LLT::scalar(LeftoverSize);
158 // For irregular sizes, extract the individual parts.
159 for (unsigned I = 0; I != NumParts; ++I) {
160 Register NewReg = MRI.createGenericVirtualRegister(MainTy);
161 VRegs.push_back(NewReg);
162 MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
165 for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
166 Offset += LeftoverSize) {
167 Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
168 LeftoverRegs.push_back(NewReg);
169 MIRBuilder.buildExtract(NewReg, Reg, Offset);
172 return true;
175 static LLT getGCDType(LLT OrigTy, LLT TargetTy) {
176 if (OrigTy.isVector() && TargetTy.isVector()) {
177 assert(OrigTy.getElementType() == TargetTy.getElementType());
178 int GCD = greatestCommonDivisor(OrigTy.getNumElements(),
179 TargetTy.getNumElements());
180 return LLT::scalarOrVector(GCD, OrigTy.getElementType());
183 if (OrigTy.isVector() && !TargetTy.isVector()) {
184 assert(OrigTy.getElementType() == TargetTy);
185 return TargetTy;
188 assert(!OrigTy.isVector() && !TargetTy.isVector());
190 int GCD = greatestCommonDivisor(OrigTy.getSizeInBits(),
191 TargetTy.getSizeInBits());
192 return LLT::scalar(GCD);
195 void LegalizerHelper::insertParts(Register DstReg,
196 LLT ResultTy, LLT PartTy,
197 ArrayRef<Register> PartRegs,
198 LLT LeftoverTy,
199 ArrayRef<Register> LeftoverRegs) {
200 if (!LeftoverTy.isValid()) {
201 assert(LeftoverRegs.empty());
203 if (!ResultTy.isVector()) {
204 MIRBuilder.buildMerge(DstReg, PartRegs);
205 return;
208 if (PartTy.isVector())
209 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
210 else
211 MIRBuilder.buildBuildVector(DstReg, PartRegs);
212 return;
215 unsigned PartSize = PartTy.getSizeInBits();
216 unsigned LeftoverPartSize = LeftoverTy.getSizeInBits();
218 Register CurResultReg = MRI.createGenericVirtualRegister(ResultTy);
219 MIRBuilder.buildUndef(CurResultReg);
221 unsigned Offset = 0;
222 for (Register PartReg : PartRegs) {
223 Register NewResultReg = MRI.createGenericVirtualRegister(ResultTy);
224 MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset);
225 CurResultReg = NewResultReg;
226 Offset += PartSize;
229 for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) {
230 // Use the original output register for the final insert to avoid a copy.
231 Register NewResultReg = (I + 1 == E) ?
232 DstReg : MRI.createGenericVirtualRegister(ResultTy);
234 MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset);
235 CurResultReg = NewResultReg;
236 Offset += LeftoverPartSize;
240 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
241 switch (Opcode) {
242 case TargetOpcode::G_SDIV:
243 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
244 switch (Size) {
245 case 32:
246 return RTLIB::SDIV_I32;
247 case 64:
248 return RTLIB::SDIV_I64;
249 case 128:
250 return RTLIB::SDIV_I128;
251 default:
252 llvm_unreachable("unexpected size");
254 case TargetOpcode::G_UDIV:
255 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
256 switch (Size) {
257 case 32:
258 return RTLIB::UDIV_I32;
259 case 64:
260 return RTLIB::UDIV_I64;
261 case 128:
262 return RTLIB::UDIV_I128;
263 default:
264 llvm_unreachable("unexpected size");
266 case TargetOpcode::G_SREM:
267 assert((Size == 32 || Size == 64) && "Unsupported size");
268 return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32;
269 case TargetOpcode::G_UREM:
270 assert((Size == 32 || Size == 64) && "Unsupported size");
271 return Size == 64 ? RTLIB::UREM_I64 : RTLIB::UREM_I32;
272 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
273 assert(Size == 32 && "Unsupported size");
274 return RTLIB::CTLZ_I32;
275 case TargetOpcode::G_FADD:
276 assert((Size == 32 || Size == 64) && "Unsupported size");
277 return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32;
278 case TargetOpcode::G_FSUB:
279 assert((Size == 32 || Size == 64) && "Unsupported size");
280 return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32;
281 case TargetOpcode::G_FMUL:
282 assert((Size == 32 || Size == 64) && "Unsupported size");
283 return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32;
284 case TargetOpcode::G_FDIV:
285 assert((Size == 32 || Size == 64) && "Unsupported size");
286 return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32;
287 case TargetOpcode::G_FEXP:
288 assert((Size == 32 || Size == 64) && "Unsupported size");
289 return Size == 64 ? RTLIB::EXP_F64 : RTLIB::EXP_F32;
290 case TargetOpcode::G_FEXP2:
291 assert((Size == 32 || Size == 64) && "Unsupported size");
292 return Size == 64 ? RTLIB::EXP2_F64 : RTLIB::EXP2_F32;
293 case TargetOpcode::G_FREM:
294 return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32;
295 case TargetOpcode::G_FPOW:
296 return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32;
297 case TargetOpcode::G_FMA:
298 assert((Size == 32 || Size == 64) && "Unsupported size");
299 return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32;
300 case TargetOpcode::G_FSIN:
301 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
302 return Size == 128 ? RTLIB::SIN_F128
303 : Size == 64 ? RTLIB::SIN_F64 : RTLIB::SIN_F32;
304 case TargetOpcode::G_FCOS:
305 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
306 return Size == 128 ? RTLIB::COS_F128
307 : Size == 64 ? RTLIB::COS_F64 : RTLIB::COS_F32;
308 case TargetOpcode::G_FLOG10:
309 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
310 return Size == 128 ? RTLIB::LOG10_F128
311 : Size == 64 ? RTLIB::LOG10_F64 : RTLIB::LOG10_F32;
312 case TargetOpcode::G_FLOG:
313 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
314 return Size == 128 ? RTLIB::LOG_F128
315 : Size == 64 ? RTLIB::LOG_F64 : RTLIB::LOG_F32;
316 case TargetOpcode::G_FLOG2:
317 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
318 return Size == 128 ? RTLIB::LOG2_F128
319 : Size == 64 ? RTLIB::LOG2_F64 : RTLIB::LOG2_F32;
320 case TargetOpcode::G_FCEIL:
321 assert((Size == 32 || Size == 64) && "Unsupported size");
322 return Size == 64 ? RTLIB::CEIL_F64 : RTLIB::CEIL_F32;
323 case TargetOpcode::G_FFLOOR:
324 assert((Size == 32 || Size == 64) && "Unsupported size");
325 return Size == 64 ? RTLIB::FLOOR_F64 : RTLIB::FLOOR_F32;
327 llvm_unreachable("Unknown libcall function");
330 /// True if an instruction is in tail position in its caller. Intended for
331 /// legalizing libcalls as tail calls when possible.
332 static bool isLibCallInTailPosition(MachineInstr &MI) {
333 const Function &F = MI.getParent()->getParent()->getFunction();
335 // Conservatively require the attributes of the call to match those of
336 // the return. Ignore NoAlias and NonNull because they don't affect the
337 // call sequence.
338 AttributeList CallerAttrs = F.getAttributes();
339 if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
340 .removeAttribute(Attribute::NoAlias)
341 .removeAttribute(Attribute::NonNull)
342 .hasAttributes())
343 return false;
345 // It's not safe to eliminate the sign / zero extension of the return value.
346 if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
347 CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
348 return false;
350 // Only tail call if the following instruction is a standard return.
351 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
352 MachineInstr *Next = MI.getNextNode();
353 if (!Next || TII.isTailCall(*Next) || !Next->isReturn())
354 return false;
356 return true;
359 LegalizerHelper::LegalizeResult
360 llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
361 const CallLowering::ArgInfo &Result,
362 ArrayRef<CallLowering::ArgInfo> Args) {
363 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
364 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
365 const char *Name = TLI.getLibcallName(Libcall);
367 CallLowering::CallLoweringInfo Info;
368 Info.CallConv = TLI.getLibcallCallingConv(Libcall);
369 Info.Callee = MachineOperand::CreateES(Name);
370 Info.OrigRet = Result;
371 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
372 if (!CLI.lowerCall(MIRBuilder, Info))
373 return LegalizerHelper::UnableToLegalize;
375 return LegalizerHelper::Legalized;
378 // Useful for libcalls where all operands have the same type.
379 static LegalizerHelper::LegalizeResult
380 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
381 Type *OpType) {
382 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
384 SmallVector<CallLowering::ArgInfo, 3> Args;
385 for (unsigned i = 1; i < MI.getNumOperands(); i++)
386 Args.push_back({MI.getOperand(i).getReg(), OpType});
387 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
388 Args);
391 LegalizerHelper::LegalizeResult
392 llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
393 MachineInstr &MI) {
394 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
395 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
397 SmallVector<CallLowering::ArgInfo, 3> Args;
398 for (unsigned i = 1; i < MI.getNumOperands(); i++) {
399 Register Reg = MI.getOperand(i).getReg();
401 // Need derive an IR type for call lowering.
402 LLT OpLLT = MRI.getType(Reg);
403 Type *OpTy = nullptr;
404 if (OpLLT.isPointer())
405 OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
406 else
407 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
408 Args.push_back({Reg, OpTy});
411 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
412 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
413 Intrinsic::ID ID = MI.getOperand(0).getIntrinsicID();
414 RTLIB::Libcall RTLibcall;
415 switch (ID) {
416 case Intrinsic::memcpy:
417 RTLibcall = RTLIB::MEMCPY;
418 break;
419 case Intrinsic::memset:
420 RTLibcall = RTLIB::MEMSET;
421 break;
422 case Intrinsic::memmove:
423 RTLibcall = RTLIB::MEMMOVE;
424 break;
425 default:
426 return LegalizerHelper::UnableToLegalize;
428 const char *Name = TLI.getLibcallName(RTLibcall);
430 MIRBuilder.setInstr(MI);
432 CallLowering::CallLoweringInfo Info;
433 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
434 Info.Callee = MachineOperand::CreateES(Name);
435 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx));
436 Info.IsTailCall = isLibCallInTailPosition(MI);
438 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
439 if (!CLI.lowerCall(MIRBuilder, Info))
440 return LegalizerHelper::UnableToLegalize;
442 if (Info.LoweredTailCall) {
443 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
444 // We must have a return following the call to get past
445 // isLibCallInTailPosition.
446 assert(MI.getNextNode() && MI.getNextNode()->isReturn() &&
447 "Expected instr following MI to be a return?");
449 // We lowered a tail call, so the call is now the return from the block.
450 // Delete the old return.
451 MI.getNextNode()->eraseFromParent();
454 return LegalizerHelper::Legalized;
457 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
458 Type *FromType) {
459 auto ToMVT = MVT::getVT(ToType);
460 auto FromMVT = MVT::getVT(FromType);
462 switch (Opcode) {
463 case TargetOpcode::G_FPEXT:
464 return RTLIB::getFPEXT(FromMVT, ToMVT);
465 case TargetOpcode::G_FPTRUNC:
466 return RTLIB::getFPROUND(FromMVT, ToMVT);
467 case TargetOpcode::G_FPTOSI:
468 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
469 case TargetOpcode::G_FPTOUI:
470 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
471 case TargetOpcode::G_SITOFP:
472 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
473 case TargetOpcode::G_UITOFP:
474 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
476 llvm_unreachable("Unsupported libcall function");
479 static LegalizerHelper::LegalizeResult
480 conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
481 Type *FromType) {
482 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
483 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType},
484 {{MI.getOperand(1).getReg(), FromType}});
487 LegalizerHelper::LegalizeResult
488 LegalizerHelper::libcall(MachineInstr &MI) {
489 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
490 unsigned Size = LLTy.getSizeInBits();
491 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
493 MIRBuilder.setInstr(MI);
495 switch (MI.getOpcode()) {
496 default:
497 return UnableToLegalize;
498 case TargetOpcode::G_SDIV:
499 case TargetOpcode::G_UDIV:
500 case TargetOpcode::G_SREM:
501 case TargetOpcode::G_UREM:
502 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
503 Type *HLTy = IntegerType::get(Ctx, Size);
504 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
505 if (Status != Legalized)
506 return Status;
507 break;
509 case TargetOpcode::G_FADD:
510 case TargetOpcode::G_FSUB:
511 case TargetOpcode::G_FMUL:
512 case TargetOpcode::G_FDIV:
513 case TargetOpcode::G_FMA:
514 case TargetOpcode::G_FPOW:
515 case TargetOpcode::G_FREM:
516 case TargetOpcode::G_FCOS:
517 case TargetOpcode::G_FSIN:
518 case TargetOpcode::G_FLOG10:
519 case TargetOpcode::G_FLOG:
520 case TargetOpcode::G_FLOG2:
521 case TargetOpcode::G_FEXP:
522 case TargetOpcode::G_FEXP2:
523 case TargetOpcode::G_FCEIL:
524 case TargetOpcode::G_FFLOOR: {
525 if (Size > 64) {
526 LLVM_DEBUG(dbgs() << "Size " << Size << " too large to legalize.\n");
527 return UnableToLegalize;
529 Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
530 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
531 if (Status != Legalized)
532 return Status;
533 break;
535 case TargetOpcode::G_FPEXT: {
536 // FIXME: Support other floating point types (half, fp128 etc)
537 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
538 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
539 if (ToSize != 64 || FromSize != 32)
540 return UnableToLegalize;
541 LegalizeResult Status = conversionLibcall(
542 MI, MIRBuilder, Type::getDoubleTy(Ctx), Type::getFloatTy(Ctx));
543 if (Status != Legalized)
544 return Status;
545 break;
547 case TargetOpcode::G_FPTRUNC: {
548 // FIXME: Support other floating point types (half, fp128 etc)
549 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
550 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
551 if (ToSize != 32 || FromSize != 64)
552 return UnableToLegalize;
553 LegalizeResult Status = conversionLibcall(
554 MI, MIRBuilder, Type::getFloatTy(Ctx), Type::getDoubleTy(Ctx));
555 if (Status != Legalized)
556 return Status;
557 break;
559 case TargetOpcode::G_FPTOSI:
560 case TargetOpcode::G_FPTOUI: {
561 // FIXME: Support other types
562 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
563 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
564 if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
565 return UnableToLegalize;
566 LegalizeResult Status = conversionLibcall(
567 MI, MIRBuilder,
568 ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
569 FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
570 if (Status != Legalized)
571 return Status;
572 break;
574 case TargetOpcode::G_SITOFP:
575 case TargetOpcode::G_UITOFP: {
576 // FIXME: Support other types
577 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
578 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
579 if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
580 return UnableToLegalize;
581 LegalizeResult Status = conversionLibcall(
582 MI, MIRBuilder,
583 ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
584 FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
585 if (Status != Legalized)
586 return Status;
587 break;
591 MI.eraseFromParent();
592 return Legalized;
595 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
596 unsigned TypeIdx,
597 LLT NarrowTy) {
598 MIRBuilder.setInstr(MI);
600 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
601 uint64_t NarrowSize = NarrowTy.getSizeInBits();
603 switch (MI.getOpcode()) {
604 default:
605 return UnableToLegalize;
606 case TargetOpcode::G_IMPLICIT_DEF: {
607 // FIXME: add support for when SizeOp0 isn't an exact multiple of
608 // NarrowSize.
609 if (SizeOp0 % NarrowSize != 0)
610 return UnableToLegalize;
611 int NumParts = SizeOp0 / NarrowSize;
613 SmallVector<Register, 2> DstRegs;
614 for (int i = 0; i < NumParts; ++i)
615 DstRegs.push_back(
616 MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg());
618 Register DstReg = MI.getOperand(0).getReg();
619 if(MRI.getType(DstReg).isVector())
620 MIRBuilder.buildBuildVector(DstReg, DstRegs);
621 else
622 MIRBuilder.buildMerge(DstReg, DstRegs);
623 MI.eraseFromParent();
624 return Legalized;
626 case TargetOpcode::G_CONSTANT: {
627 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
628 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
629 unsigned TotalSize = Ty.getSizeInBits();
630 unsigned NarrowSize = NarrowTy.getSizeInBits();
631 int NumParts = TotalSize / NarrowSize;
633 SmallVector<Register, 4> PartRegs;
634 for (int I = 0; I != NumParts; ++I) {
635 unsigned Offset = I * NarrowSize;
636 auto K = MIRBuilder.buildConstant(NarrowTy,
637 Val.lshr(Offset).trunc(NarrowSize));
638 PartRegs.push_back(K.getReg(0));
641 LLT LeftoverTy;
642 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
643 SmallVector<Register, 1> LeftoverRegs;
644 if (LeftoverBits != 0) {
645 LeftoverTy = LLT::scalar(LeftoverBits);
646 auto K = MIRBuilder.buildConstant(
647 LeftoverTy,
648 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
649 LeftoverRegs.push_back(K.getReg(0));
652 insertParts(MI.getOperand(0).getReg(),
653 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
655 MI.eraseFromParent();
656 return Legalized;
658 case TargetOpcode::G_SEXT: {
659 if (TypeIdx != 0)
660 return UnableToLegalize;
662 Register SrcReg = MI.getOperand(1).getReg();
663 LLT SrcTy = MRI.getType(SrcReg);
665 // FIXME: support the general case where the requested NarrowTy may not be
666 // the same as the source type. E.g. s128 = sext(s32)
667 if ((SrcTy.getSizeInBits() != SizeOp0 / 2) ||
668 SrcTy.getSizeInBits() != NarrowTy.getSizeInBits()) {
669 LLVM_DEBUG(dbgs() << "Can't narrow sext to type " << NarrowTy << "\n");
670 return UnableToLegalize;
673 // Shift the sign bit of the low register through the high register.
674 auto ShiftAmt =
675 MIRBuilder.buildConstant(LLT::scalar(64), NarrowTy.getSizeInBits() - 1);
676 auto Shift = MIRBuilder.buildAShr(NarrowTy, SrcReg, ShiftAmt);
677 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {SrcReg, Shift.getReg(0)});
678 MI.eraseFromParent();
679 return Legalized;
681 case TargetOpcode::G_ZEXT: {
682 if (TypeIdx != 0)
683 return UnableToLegalize;
685 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
686 uint64_t SizeOp1 = SrcTy.getSizeInBits();
687 if (SizeOp0 % SizeOp1 != 0)
688 return UnableToLegalize;
690 // Generate a merge where the bottom bits are taken from the source, and
691 // zero everything else.
692 Register ZeroReg = MIRBuilder.buildConstant(SrcTy, 0).getReg(0);
693 unsigned NumParts = SizeOp0 / SizeOp1;
694 SmallVector<Register, 4> Srcs = {MI.getOperand(1).getReg()};
695 for (unsigned Part = 1; Part < NumParts; ++Part)
696 Srcs.push_back(ZeroReg);
697 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), Srcs);
698 MI.eraseFromParent();
699 return Legalized;
701 case TargetOpcode::G_TRUNC: {
702 if (TypeIdx != 1)
703 return UnableToLegalize;
705 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
706 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
707 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
708 return UnableToLegalize;
711 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
712 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Unmerge.getReg(0));
713 MI.eraseFromParent();
714 return Legalized;
717 case TargetOpcode::G_ADD: {
718 // FIXME: add support for when SizeOp0 isn't an exact multiple of
719 // NarrowSize.
720 if (SizeOp0 % NarrowSize != 0)
721 return UnableToLegalize;
722 // Expand in terms of carry-setting/consuming G_ADDE instructions.
723 int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
725 SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
726 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
727 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
729 Register CarryIn;
730 for (int i = 0; i < NumParts; ++i) {
731 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
732 Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
734 if (i == 0)
735 MIRBuilder.buildUAddo(DstReg, CarryOut, Src1Regs[i], Src2Regs[i]);
736 else {
737 MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
738 Src2Regs[i], CarryIn);
741 DstRegs.push_back(DstReg);
742 CarryIn = CarryOut;
744 Register DstReg = MI.getOperand(0).getReg();
745 if(MRI.getType(DstReg).isVector())
746 MIRBuilder.buildBuildVector(DstReg, DstRegs);
747 else
748 MIRBuilder.buildMerge(DstReg, DstRegs);
749 MI.eraseFromParent();
750 return Legalized;
752 case TargetOpcode::G_SUB: {
753 // FIXME: add support for when SizeOp0 isn't an exact multiple of
754 // NarrowSize.
755 if (SizeOp0 % NarrowSize != 0)
756 return UnableToLegalize;
758 int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
760 SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
761 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
762 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
764 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
765 Register BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
766 MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut},
767 {Src1Regs[0], Src2Regs[0]});
768 DstRegs.push_back(DstReg);
769 Register BorrowIn = BorrowOut;
770 for (int i = 1; i < NumParts; ++i) {
771 DstReg = MRI.createGenericVirtualRegister(NarrowTy);
772 BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
774 MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut},
775 {Src1Regs[i], Src2Regs[i], BorrowIn});
777 DstRegs.push_back(DstReg);
778 BorrowIn = BorrowOut;
780 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
781 MI.eraseFromParent();
782 return Legalized;
784 case TargetOpcode::G_MUL:
785 case TargetOpcode::G_UMULH:
786 return narrowScalarMul(MI, NarrowTy);
787 case TargetOpcode::G_EXTRACT:
788 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
789 case TargetOpcode::G_INSERT:
790 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
791 case TargetOpcode::G_LOAD: {
792 const auto &MMO = **MI.memoperands_begin();
793 Register DstReg = MI.getOperand(0).getReg();
794 LLT DstTy = MRI.getType(DstReg);
795 if (DstTy.isVector())
796 return UnableToLegalize;
798 if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
799 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
800 auto &MMO = **MI.memoperands_begin();
801 MIRBuilder.buildLoad(TmpReg, MI.getOperand(1).getReg(), MMO);
802 MIRBuilder.buildAnyExt(DstReg, TmpReg);
803 MI.eraseFromParent();
804 return Legalized;
807 return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
809 case TargetOpcode::G_ZEXTLOAD:
810 case TargetOpcode::G_SEXTLOAD: {
811 bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD;
812 Register DstReg = MI.getOperand(0).getReg();
813 Register PtrReg = MI.getOperand(1).getReg();
815 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
816 auto &MMO = **MI.memoperands_begin();
817 if (MMO.getSizeInBits() == NarrowSize) {
818 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
819 } else {
820 unsigned ExtLoad = ZExt ? TargetOpcode::G_ZEXTLOAD
821 : TargetOpcode::G_SEXTLOAD;
822 MIRBuilder.buildInstr(ExtLoad)
823 .addDef(TmpReg)
824 .addUse(PtrReg)
825 .addMemOperand(&MMO);
828 if (ZExt)
829 MIRBuilder.buildZExt(DstReg, TmpReg);
830 else
831 MIRBuilder.buildSExt(DstReg, TmpReg);
833 MI.eraseFromParent();
834 return Legalized;
836 case TargetOpcode::G_STORE: {
837 const auto &MMO = **MI.memoperands_begin();
839 Register SrcReg = MI.getOperand(0).getReg();
840 LLT SrcTy = MRI.getType(SrcReg);
841 if (SrcTy.isVector())
842 return UnableToLegalize;
844 int NumParts = SizeOp0 / NarrowSize;
845 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
846 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
847 if (SrcTy.isVector() && LeftoverBits != 0)
848 return UnableToLegalize;
850 if (8 * MMO.getSize() != SrcTy.getSizeInBits()) {
851 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
852 auto &MMO = **MI.memoperands_begin();
853 MIRBuilder.buildTrunc(TmpReg, SrcReg);
854 MIRBuilder.buildStore(TmpReg, MI.getOperand(1).getReg(), MMO);
855 MI.eraseFromParent();
856 return Legalized;
859 return reduceLoadStoreWidth(MI, 0, NarrowTy);
861 case TargetOpcode::G_SELECT:
862 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
863 case TargetOpcode::G_AND:
864 case TargetOpcode::G_OR:
865 case TargetOpcode::G_XOR: {
866 // Legalize bitwise operation:
867 // A = BinOp<Ty> B, C
868 // into:
869 // B1, ..., BN = G_UNMERGE_VALUES B
870 // C1, ..., CN = G_UNMERGE_VALUES C
871 // A1 = BinOp<Ty/N> B1, C2
872 // ...
873 // AN = BinOp<Ty/N> BN, CN
874 // A = G_MERGE_VALUES A1, ..., AN
875 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
877 case TargetOpcode::G_SHL:
878 case TargetOpcode::G_LSHR:
879 case TargetOpcode::G_ASHR:
880 return narrowScalarShift(MI, TypeIdx, NarrowTy);
881 case TargetOpcode::G_CTLZ:
882 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
883 case TargetOpcode::G_CTTZ:
884 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
885 case TargetOpcode::G_CTPOP:
886 if (TypeIdx != 0)
887 return UnableToLegalize; // TODO
889 Observer.changingInstr(MI);
890 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
891 Observer.changedInstr(MI);
892 return Legalized;
893 case TargetOpcode::G_INTTOPTR:
894 if (TypeIdx != 1)
895 return UnableToLegalize;
897 Observer.changingInstr(MI);
898 narrowScalarSrc(MI, NarrowTy, 1);
899 Observer.changedInstr(MI);
900 return Legalized;
901 case TargetOpcode::G_PTRTOINT:
902 if (TypeIdx != 0)
903 return UnableToLegalize;
905 Observer.changingInstr(MI);
906 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
907 Observer.changedInstr(MI);
908 return Legalized;
909 case TargetOpcode::G_PHI: {
910 unsigned NumParts = SizeOp0 / NarrowSize;
911 SmallVector<Register, 2> DstRegs;
912 SmallVector<SmallVector<Register, 2>, 2> SrcRegs;
913 DstRegs.resize(NumParts);
914 SrcRegs.resize(MI.getNumOperands() / 2);
915 Observer.changingInstr(MI);
916 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
917 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
918 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
919 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
920 SrcRegs[i / 2]);
922 MachineBasicBlock &MBB = *MI.getParent();
923 MIRBuilder.setInsertPt(MBB, MI);
924 for (unsigned i = 0; i < NumParts; ++i) {
925 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
926 MachineInstrBuilder MIB =
927 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
928 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
929 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
931 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
932 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
933 Observer.changedInstr(MI);
934 MI.eraseFromParent();
935 return Legalized;
937 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
938 case TargetOpcode::G_INSERT_VECTOR_ELT: {
939 if (TypeIdx != 2)
940 return UnableToLegalize;
942 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
943 Observer.changingInstr(MI);
944 narrowScalarSrc(MI, NarrowTy, OpIdx);
945 Observer.changedInstr(MI);
946 return Legalized;
948 case TargetOpcode::G_ICMP: {
949 uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
950 if (NarrowSize * 2 != SrcSize)
951 return UnableToLegalize;
953 Observer.changingInstr(MI);
954 Register LHSL = MRI.createGenericVirtualRegister(NarrowTy);
955 Register LHSH = MRI.createGenericVirtualRegister(NarrowTy);
956 MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2).getReg());
958 Register RHSL = MRI.createGenericVirtualRegister(NarrowTy);
959 Register RHSH = MRI.createGenericVirtualRegister(NarrowTy);
960 MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3).getReg());
962 CmpInst::Predicate Pred =
963 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
964 LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
966 if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
967 MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL);
968 MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH);
969 MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH);
970 MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0);
971 MIRBuilder.buildICmp(Pred, MI.getOperand(0).getReg(), Or, Zero);
972 } else {
973 MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
974 MachineInstrBuilder CmpHEQ =
975 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
976 MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
977 ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
978 MIRBuilder.buildSelect(MI.getOperand(0).getReg(), CmpHEQ, CmpLU, CmpH);
980 Observer.changedInstr(MI);
981 MI.eraseFromParent();
982 return Legalized;
984 case TargetOpcode::G_SEXT_INREG: {
985 if (TypeIdx != 0)
986 return UnableToLegalize;
988 if (!MI.getOperand(2).isImm())
989 return UnableToLegalize;
990 int64_t SizeInBits = MI.getOperand(2).getImm();
992 // So long as the new type has more bits than the bits we're extending we
993 // don't need to break it apart.
994 if (NarrowTy.getScalarSizeInBits() >= SizeInBits) {
995 Observer.changingInstr(MI);
996 // We don't lose any non-extension bits by truncating the src and
997 // sign-extending the dst.
998 MachineOperand &MO1 = MI.getOperand(1);
999 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1.getReg());
1000 MO1.setReg(TruncMIB->getOperand(0).getReg());
1002 MachineOperand &MO2 = MI.getOperand(0);
1003 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1004 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1005 MIRBuilder.buildInstr(TargetOpcode::G_SEXT, {MO2.getReg()}, {DstExt});
1006 MO2.setReg(DstExt);
1007 Observer.changedInstr(MI);
1008 return Legalized;
1011 // Break it apart. Components below the extension point are unmodified. The
1012 // component containing the extension point becomes a narrower SEXT_INREG.
1013 // Components above it are ashr'd from the component containing the
1014 // extension point.
1015 if (SizeOp0 % NarrowSize != 0)
1016 return UnableToLegalize;
1017 int NumParts = SizeOp0 / NarrowSize;
1019 // List the registers where the destination will be scattered.
1020 SmallVector<Register, 2> DstRegs;
1021 // List the registers where the source will be split.
1022 SmallVector<Register, 2> SrcRegs;
1024 // Create all the temporary registers.
1025 for (int i = 0; i < NumParts; ++i) {
1026 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1028 SrcRegs.push_back(SrcReg);
1031 // Explode the big arguments into smaller chunks.
1032 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1).getReg());
1034 Register AshrCstReg =
1035 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1036 ->getOperand(0)
1037 .getReg();
1038 Register FullExtensionReg = 0;
1039 Register PartialExtensionReg = 0;
1041 // Do the operation on each small part.
1042 for (int i = 0; i < NumParts; ++i) {
1043 if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits)
1044 DstRegs.push_back(SrcRegs[i]);
1045 else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) {
1046 assert(PartialExtensionReg &&
1047 "Expected to visit partial extension before full");
1048 if (FullExtensionReg) {
1049 DstRegs.push_back(FullExtensionReg);
1050 continue;
1052 DstRegs.push_back(MIRBuilder
1053 .buildInstr(TargetOpcode::G_ASHR, {NarrowTy},
1054 {PartialExtensionReg, AshrCstReg})
1055 ->getOperand(0)
1056 .getReg());
1057 FullExtensionReg = DstRegs.back();
1058 } else {
1059 DstRegs.push_back(
1060 MIRBuilder
1061 .buildInstr(
1062 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1063 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1064 ->getOperand(0)
1065 .getReg());
1066 PartialExtensionReg = DstRegs.back();
1070 // Gather the destination registers into the final destination.
1071 Register DstReg = MI.getOperand(0).getReg();
1072 MIRBuilder.buildMerge(DstReg, DstRegs);
1073 MI.eraseFromParent();
1074 return Legalized;
1079 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
1080 unsigned OpIdx, unsigned ExtOpcode) {
1081 MachineOperand &MO = MI.getOperand(OpIdx);
1082 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO.getReg()});
1083 MO.setReg(ExtB->getOperand(0).getReg());
1086 void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
1087 unsigned OpIdx) {
1088 MachineOperand &MO = MI.getOperand(OpIdx);
1089 auto ExtB = MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {NarrowTy},
1090 {MO.getReg()});
1091 MO.setReg(ExtB->getOperand(0).getReg());
1094 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
1095 unsigned OpIdx, unsigned TruncOpcode) {
1096 MachineOperand &MO = MI.getOperand(OpIdx);
1097 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1098 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1099 MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt});
1100 MO.setReg(DstExt);
1103 void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
1104 unsigned OpIdx, unsigned ExtOpcode) {
1105 MachineOperand &MO = MI.getOperand(OpIdx);
1106 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
1107 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1108 MIRBuilder.buildInstr(ExtOpcode, {MO.getReg()}, {DstTrunc});
1109 MO.setReg(DstTrunc);
1112 void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
1113 unsigned OpIdx) {
1114 MachineOperand &MO = MI.getOperand(OpIdx);
1115 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1116 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1117 MIRBuilder.buildExtract(MO.getReg(), DstExt, 0);
1118 MO.setReg(DstExt);
1121 void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
1122 unsigned OpIdx) {
1123 MachineOperand &MO = MI.getOperand(OpIdx);
1125 LLT OldTy = MRI.getType(MO.getReg());
1126 unsigned OldElts = OldTy.getNumElements();
1127 unsigned NewElts = MoreTy.getNumElements();
1129 unsigned NumParts = NewElts / OldElts;
1131 // Use concat_vectors if the result is a multiple of the number of elements.
1132 if (NumParts * OldElts == NewElts) {
1133 SmallVector<Register, 8> Parts;
1134 Parts.push_back(MO.getReg());
1136 Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0);
1137 for (unsigned I = 1; I != NumParts; ++I)
1138 Parts.push_back(ImpDef);
1140 auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts);
1141 MO.setReg(Concat.getReg(0));
1142 return;
1145 Register MoreReg = MRI.createGenericVirtualRegister(MoreTy);
1146 Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0);
1147 MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0);
1148 MO.setReg(MoreReg);
1151 LegalizerHelper::LegalizeResult
1152 LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
1153 LLT WideTy) {
1154 if (TypeIdx != 1)
1155 return UnableToLegalize;
1157 Register DstReg = MI.getOperand(0).getReg();
1158 LLT DstTy = MRI.getType(DstReg);
1159 if (DstTy.isVector())
1160 return UnableToLegalize;
1162 Register Src1 = MI.getOperand(1).getReg();
1163 LLT SrcTy = MRI.getType(Src1);
1164 const int DstSize = DstTy.getSizeInBits();
1165 const int SrcSize = SrcTy.getSizeInBits();
1166 const int WideSize = WideTy.getSizeInBits();
1167 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1169 unsigned NumOps = MI.getNumOperands();
1170 unsigned NumSrc = MI.getNumOperands() - 1;
1171 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
1173 if (WideSize >= DstSize) {
1174 // Directly pack the bits in the target type.
1175 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
1177 for (unsigned I = 2; I != NumOps; ++I) {
1178 const unsigned Offset = (I - 1) * PartSize;
1180 Register SrcReg = MI.getOperand(I).getReg();
1181 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
1183 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
1185 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
1186 MRI.createGenericVirtualRegister(WideTy);
1188 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
1189 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
1190 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
1191 ResultReg = NextResult;
1194 if (WideSize > DstSize)
1195 MIRBuilder.buildTrunc(DstReg, ResultReg);
1196 else if (DstTy.isPointer())
1197 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
1199 MI.eraseFromParent();
1200 return Legalized;
1203 // Unmerge the original values to the GCD type, and recombine to the next
1204 // multiple greater than the original type.
1206 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
1207 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
1208 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
1209 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
1210 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
1211 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1212 // %12:_(s12) = G_MERGE_VALUES %10, %11
1214 // Padding with undef if necessary:
1216 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1217 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1218 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1219 // %7:_(s2) = G_IMPLICIT_DEF
1220 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1221 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1222 // %10:_(s12) = G_MERGE_VALUES %8, %9
1224 const int GCD = greatestCommonDivisor(SrcSize, WideSize);
1225 LLT GCDTy = LLT::scalar(GCD);
1227 SmallVector<Register, 8> Parts;
1228 SmallVector<Register, 8> NewMergeRegs;
1229 SmallVector<Register, 8> Unmerges;
1230 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
1232 // Decompose the original operands if they don't evenly divide.
1233 for (int I = 1, E = MI.getNumOperands(); I != E; ++I) {
1234 Register SrcReg = MI.getOperand(I).getReg();
1235 if (GCD == SrcSize) {
1236 Unmerges.push_back(SrcReg);
1237 } else {
1238 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
1239 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1240 Unmerges.push_back(Unmerge.getReg(J));
1244 // Pad with undef to the next size that is a multiple of the requested size.
1245 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1246 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
1247 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
1248 Unmerges.push_back(UndefReg);
1251 const int PartsPerGCD = WideSize / GCD;
1253 // Build merges of each piece.
1254 ArrayRef<Register> Slicer(Unmerges);
1255 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1256 auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
1257 NewMergeRegs.push_back(Merge.getReg(0));
1260 // A truncate may be necessary if the requested type doesn't evenly divide the
1261 // original result type.
1262 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1263 MIRBuilder.buildMerge(DstReg, NewMergeRegs);
1264 } else {
1265 auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
1266 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
1269 MI.eraseFromParent();
1270 return Legalized;
1273 LegalizerHelper::LegalizeResult
1274 LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
1275 LLT WideTy) {
1276 if (TypeIdx != 0)
1277 return UnableToLegalize;
1279 unsigned NumDst = MI.getNumOperands() - 1;
1280 Register SrcReg = MI.getOperand(NumDst).getReg();
1281 LLT SrcTy = MRI.getType(SrcReg);
1282 if (!SrcTy.isScalar())
1283 return UnableToLegalize;
1285 Register Dst0Reg = MI.getOperand(0).getReg();
1286 LLT DstTy = MRI.getType(Dst0Reg);
1287 if (!DstTy.isScalar())
1288 return UnableToLegalize;
1290 unsigned NewSrcSize = NumDst * WideTy.getSizeInBits();
1291 LLT NewSrcTy = LLT::scalar(NewSrcSize);
1292 unsigned SizeDiff = WideTy.getSizeInBits() - DstTy.getSizeInBits();
1294 auto WideSrc = MIRBuilder.buildZExt(NewSrcTy, SrcReg);
1296 for (unsigned I = 1; I != NumDst; ++I) {
1297 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, SizeDiff * I);
1298 auto Shl = MIRBuilder.buildShl(NewSrcTy, WideSrc, ShiftAmt);
1299 WideSrc = MIRBuilder.buildOr(NewSrcTy, WideSrc, Shl);
1302 Observer.changingInstr(MI);
1304 MI.getOperand(NumDst).setReg(WideSrc->getOperand(0).getReg());
1305 for (unsigned I = 0; I != NumDst; ++I)
1306 widenScalarDst(MI, WideTy, I);
1308 Observer.changedInstr(MI);
1310 return Legalized;
1313 LegalizerHelper::LegalizeResult
1314 LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
1315 LLT WideTy) {
1316 Register DstReg = MI.getOperand(0).getReg();
1317 Register SrcReg = MI.getOperand(1).getReg();
1318 LLT SrcTy = MRI.getType(SrcReg);
1320 LLT DstTy = MRI.getType(DstReg);
1321 unsigned Offset = MI.getOperand(2).getImm();
1323 if (TypeIdx == 0) {
1324 if (SrcTy.isVector() || DstTy.isVector())
1325 return UnableToLegalize;
1327 SrcOp Src(SrcReg);
1328 if (SrcTy.isPointer()) {
1329 // Extracts from pointers can be handled only if they are really just
1330 // simple integers.
1331 const DataLayout &DL = MIRBuilder.getDataLayout();
1332 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
1333 return UnableToLegalize;
1335 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
1336 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
1337 SrcTy = SrcAsIntTy;
1340 if (DstTy.isPointer())
1341 return UnableToLegalize;
1343 if (Offset == 0) {
1344 // Avoid a shift in the degenerate case.
1345 MIRBuilder.buildTrunc(DstReg,
1346 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
1347 MI.eraseFromParent();
1348 return Legalized;
1351 // Do a shift in the source type.
1352 LLT ShiftTy = SrcTy;
1353 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1354 Src = MIRBuilder.buildAnyExt(WideTy, Src);
1355 ShiftTy = WideTy;
1356 } else if (WideTy.getSizeInBits() > SrcTy.getSizeInBits())
1357 return UnableToLegalize;
1359 auto LShr = MIRBuilder.buildLShr(
1360 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
1361 MIRBuilder.buildTrunc(DstReg, LShr);
1362 MI.eraseFromParent();
1363 return Legalized;
1366 if (SrcTy.isScalar()) {
1367 Observer.changingInstr(MI);
1368 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1369 Observer.changedInstr(MI);
1370 return Legalized;
1373 if (!SrcTy.isVector())
1374 return UnableToLegalize;
1376 if (DstTy != SrcTy.getElementType())
1377 return UnableToLegalize;
1379 if (Offset % SrcTy.getScalarSizeInBits() != 0)
1380 return UnableToLegalize;
1382 Observer.changingInstr(MI);
1383 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1385 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
1386 Offset);
1387 widenScalarDst(MI, WideTy.getScalarType(), 0);
1388 Observer.changedInstr(MI);
1389 return Legalized;
1392 LegalizerHelper::LegalizeResult
1393 LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
1394 LLT WideTy) {
1395 if (TypeIdx != 0)
1396 return UnableToLegalize;
1397 Observer.changingInstr(MI);
1398 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1399 widenScalarDst(MI, WideTy);
1400 Observer.changedInstr(MI);
1401 return Legalized;
1404 LegalizerHelper::LegalizeResult
1405 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
1406 MIRBuilder.setInstr(MI);
1408 switch (MI.getOpcode()) {
1409 default:
1410 return UnableToLegalize;
1411 case TargetOpcode::G_EXTRACT:
1412 return widenScalarExtract(MI, TypeIdx, WideTy);
1413 case TargetOpcode::G_INSERT:
1414 return widenScalarInsert(MI, TypeIdx, WideTy);
1415 case TargetOpcode::G_MERGE_VALUES:
1416 return widenScalarMergeValues(MI, TypeIdx, WideTy);
1417 case TargetOpcode::G_UNMERGE_VALUES:
1418 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
1419 case TargetOpcode::G_UADDO:
1420 case TargetOpcode::G_USUBO: {
1421 if (TypeIdx == 1)
1422 return UnableToLegalize; // TODO
1423 auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
1424 {MI.getOperand(2).getReg()});
1425 auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
1426 {MI.getOperand(3).getReg()});
1427 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO
1428 ? TargetOpcode::G_ADD
1429 : TargetOpcode::G_SUB;
1430 // Do the arithmetic in the larger type.
1431 auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext});
1432 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
1433 APInt Mask = APInt::getAllOnesValue(OrigTy.getSizeInBits());
1434 auto AndOp = MIRBuilder.buildInstr(
1435 TargetOpcode::G_AND, {WideTy},
1436 {NewOp, MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())});
1437 // There is no overflow if the AndOp is the same as NewOp.
1438 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1).getReg(), NewOp,
1439 AndOp);
1440 // Now trunc the NewOp to the original result.
1441 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp);
1442 MI.eraseFromParent();
1443 return Legalized;
1445 case TargetOpcode::G_CTTZ:
1446 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1447 case TargetOpcode::G_CTLZ:
1448 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1449 case TargetOpcode::G_CTPOP: {
1450 if (TypeIdx == 0) {
1451 Observer.changingInstr(MI);
1452 widenScalarDst(MI, WideTy, 0);
1453 Observer.changedInstr(MI);
1454 return Legalized;
1457 Register SrcReg = MI.getOperand(1).getReg();
1459 // First ZEXT the input.
1460 auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
1461 LLT CurTy = MRI.getType(SrcReg);
1462 if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
1463 // The count is the same in the larger type except if the original
1464 // value was zero. This can be handled by setting the bit just off
1465 // the top of the original type.
1466 auto TopBit =
1467 APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
1468 MIBSrc = MIRBuilder.buildOr(
1469 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
1472 // Perform the operation at the larger size.
1473 auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
1474 // This is already the correct result for CTPOP and CTTZs
1475 if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
1476 MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
1477 // The correct result is NewOp - (Difference in widety and current ty).
1478 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
1479 MIBNewOp = MIRBuilder.buildInstr(
1480 TargetOpcode::G_SUB, {WideTy},
1481 {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)});
1484 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
1485 MI.eraseFromParent();
1486 return Legalized;
1488 case TargetOpcode::G_BSWAP: {
1489 Observer.changingInstr(MI);
1490 Register DstReg = MI.getOperand(0).getReg();
1492 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
1493 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1494 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
1495 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1497 MI.getOperand(0).setReg(DstExt);
1499 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1501 LLT Ty = MRI.getType(DstReg);
1502 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1503 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
1504 MIRBuilder.buildInstr(TargetOpcode::G_LSHR)
1505 .addDef(ShrReg)
1506 .addUse(DstExt)
1507 .addUse(ShiftAmtReg);
1509 MIRBuilder.buildTrunc(DstReg, ShrReg);
1510 Observer.changedInstr(MI);
1511 return Legalized;
1513 case TargetOpcode::G_BITREVERSE: {
1514 Observer.changingInstr(MI);
1516 Register DstReg = MI.getOperand(0).getReg();
1517 LLT Ty = MRI.getType(DstReg);
1518 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1520 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1521 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1522 MI.getOperand(0).setReg(DstExt);
1523 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1525 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
1526 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
1527 MIRBuilder.buildTrunc(DstReg, Shift);
1528 Observer.changedInstr(MI);
1529 return Legalized;
1531 case TargetOpcode::G_ADD:
1532 case TargetOpcode::G_AND:
1533 case TargetOpcode::G_MUL:
1534 case TargetOpcode::G_OR:
1535 case TargetOpcode::G_XOR:
1536 case TargetOpcode::G_SUB:
1537 // Perform operation at larger width (any extension is fines here, high bits
1538 // don't affect the result) and then truncate the result back to the
1539 // original type.
1540 Observer.changingInstr(MI);
1541 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1542 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1543 widenScalarDst(MI, WideTy);
1544 Observer.changedInstr(MI);
1545 return Legalized;
1547 case TargetOpcode::G_SHL:
1548 Observer.changingInstr(MI);
1550 if (TypeIdx == 0) {
1551 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1552 widenScalarDst(MI, WideTy);
1553 } else {
1554 assert(TypeIdx == 1);
1555 // The "number of bits to shift" operand must preserve its value as an
1556 // unsigned integer:
1557 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1560 Observer.changedInstr(MI);
1561 return Legalized;
1563 case TargetOpcode::G_SDIV:
1564 case TargetOpcode::G_SREM:
1565 case TargetOpcode::G_SMIN:
1566 case TargetOpcode::G_SMAX:
1567 Observer.changingInstr(MI);
1568 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1569 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1570 widenScalarDst(MI, WideTy);
1571 Observer.changedInstr(MI);
1572 return Legalized;
1574 case TargetOpcode::G_ASHR:
1575 case TargetOpcode::G_LSHR:
1576 Observer.changingInstr(MI);
1578 if (TypeIdx == 0) {
1579 unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
1580 TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
1582 widenScalarSrc(MI, WideTy, 1, CvtOp);
1583 widenScalarDst(MI, WideTy);
1584 } else {
1585 assert(TypeIdx == 1);
1586 // The "number of bits to shift" operand must preserve its value as an
1587 // unsigned integer:
1588 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1591 Observer.changedInstr(MI);
1592 return Legalized;
1593 case TargetOpcode::G_UDIV:
1594 case TargetOpcode::G_UREM:
1595 case TargetOpcode::G_UMIN:
1596 case TargetOpcode::G_UMAX:
1597 Observer.changingInstr(MI);
1598 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1599 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1600 widenScalarDst(MI, WideTy);
1601 Observer.changedInstr(MI);
1602 return Legalized;
1604 case TargetOpcode::G_SELECT:
1605 Observer.changingInstr(MI);
1606 if (TypeIdx == 0) {
1607 // Perform operation at larger width (any extension is fine here, high
1608 // bits don't affect the result) and then truncate the result back to the
1609 // original type.
1610 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1611 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
1612 widenScalarDst(MI, WideTy);
1613 } else {
1614 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
1615 // Explicit extension is required here since high bits affect the result.
1616 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
1618 Observer.changedInstr(MI);
1619 return Legalized;
1621 case TargetOpcode::G_FPTOSI:
1622 case TargetOpcode::G_FPTOUI:
1623 if (TypeIdx != 0)
1624 return UnableToLegalize;
1625 Observer.changingInstr(MI);
1626 widenScalarDst(MI, WideTy);
1627 Observer.changedInstr(MI);
1628 return Legalized;
1630 case TargetOpcode::G_SITOFP:
1631 if (TypeIdx != 1)
1632 return UnableToLegalize;
1633 Observer.changingInstr(MI);
1634 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1635 Observer.changedInstr(MI);
1636 return Legalized;
1638 case TargetOpcode::G_UITOFP:
1639 if (TypeIdx != 1)
1640 return UnableToLegalize;
1641 Observer.changingInstr(MI);
1642 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1643 Observer.changedInstr(MI);
1644 return Legalized;
1646 case TargetOpcode::G_LOAD:
1647 case TargetOpcode::G_SEXTLOAD:
1648 case TargetOpcode::G_ZEXTLOAD:
1649 Observer.changingInstr(MI);
1650 widenScalarDst(MI, WideTy);
1651 Observer.changedInstr(MI);
1652 return Legalized;
1654 case TargetOpcode::G_STORE: {
1655 if (TypeIdx != 0)
1656 return UnableToLegalize;
1658 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1659 if (!isPowerOf2_32(Ty.getSizeInBits()))
1660 return UnableToLegalize;
1662 Observer.changingInstr(MI);
1664 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
1665 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
1666 widenScalarSrc(MI, WideTy, 0, ExtType);
1668 Observer.changedInstr(MI);
1669 return Legalized;
1671 case TargetOpcode::G_CONSTANT: {
1672 MachineOperand &SrcMO = MI.getOperand(1);
1673 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
1674 const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits());
1675 Observer.changingInstr(MI);
1676 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
1678 widenScalarDst(MI, WideTy);
1679 Observer.changedInstr(MI);
1680 return Legalized;
1682 case TargetOpcode::G_FCONSTANT: {
1683 MachineOperand &SrcMO = MI.getOperand(1);
1684 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
1685 APFloat Val = SrcMO.getFPImm()->getValueAPF();
1686 bool LosesInfo;
1687 switch (WideTy.getSizeInBits()) {
1688 case 32:
1689 Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
1690 &LosesInfo);
1691 break;
1692 case 64:
1693 Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
1694 &LosesInfo);
1695 break;
1696 default:
1697 return UnableToLegalize;
1700 assert(!LosesInfo && "extend should always be lossless");
1702 Observer.changingInstr(MI);
1703 SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
1705 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
1706 Observer.changedInstr(MI);
1707 return Legalized;
1709 case TargetOpcode::G_IMPLICIT_DEF: {
1710 Observer.changingInstr(MI);
1711 widenScalarDst(MI, WideTy);
1712 Observer.changedInstr(MI);
1713 return Legalized;
1715 case TargetOpcode::G_BRCOND:
1716 Observer.changingInstr(MI);
1717 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
1718 Observer.changedInstr(MI);
1719 return Legalized;
1721 case TargetOpcode::G_FCMP:
1722 Observer.changingInstr(MI);
1723 if (TypeIdx == 0)
1724 widenScalarDst(MI, WideTy);
1725 else {
1726 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
1727 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
1729 Observer.changedInstr(MI);
1730 return Legalized;
1732 case TargetOpcode::G_ICMP:
1733 Observer.changingInstr(MI);
1734 if (TypeIdx == 0)
1735 widenScalarDst(MI, WideTy);
1736 else {
1737 unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
1738 MI.getOperand(1).getPredicate()))
1739 ? TargetOpcode::G_SEXT
1740 : TargetOpcode::G_ZEXT;
1741 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
1742 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
1744 Observer.changedInstr(MI);
1745 return Legalized;
1747 case TargetOpcode::G_GEP:
1748 assert(TypeIdx == 1 && "unable to legalize pointer of GEP");
1749 Observer.changingInstr(MI);
1750 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1751 Observer.changedInstr(MI);
1752 return Legalized;
1754 case TargetOpcode::G_PHI: {
1755 assert(TypeIdx == 0 && "Expecting only Idx 0");
1757 Observer.changingInstr(MI);
1758 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
1759 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
1760 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
1761 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
1764 MachineBasicBlock &MBB = *MI.getParent();
1765 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
1766 widenScalarDst(MI, WideTy);
1767 Observer.changedInstr(MI);
1768 return Legalized;
1770 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
1771 if (TypeIdx == 0) {
1772 Register VecReg = MI.getOperand(1).getReg();
1773 LLT VecTy = MRI.getType(VecReg);
1774 Observer.changingInstr(MI);
1776 widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(),
1777 WideTy.getSizeInBits()),
1778 1, TargetOpcode::G_SEXT);
1780 widenScalarDst(MI, WideTy, 0);
1781 Observer.changedInstr(MI);
1782 return Legalized;
1785 if (TypeIdx != 2)
1786 return UnableToLegalize;
1787 Observer.changingInstr(MI);
1788 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1789 Observer.changedInstr(MI);
1790 return Legalized;
1792 case TargetOpcode::G_FADD:
1793 case TargetOpcode::G_FMUL:
1794 case TargetOpcode::G_FSUB:
1795 case TargetOpcode::G_FMA:
1796 case TargetOpcode::G_FMAD:
1797 case TargetOpcode::G_FNEG:
1798 case TargetOpcode::G_FABS:
1799 case TargetOpcode::G_FCANONICALIZE:
1800 case TargetOpcode::G_FMINNUM:
1801 case TargetOpcode::G_FMAXNUM:
1802 case TargetOpcode::G_FMINNUM_IEEE:
1803 case TargetOpcode::G_FMAXNUM_IEEE:
1804 case TargetOpcode::G_FMINIMUM:
1805 case TargetOpcode::G_FMAXIMUM:
1806 case TargetOpcode::G_FDIV:
1807 case TargetOpcode::G_FREM:
1808 case TargetOpcode::G_FCEIL:
1809 case TargetOpcode::G_FFLOOR:
1810 case TargetOpcode::G_FCOS:
1811 case TargetOpcode::G_FSIN:
1812 case TargetOpcode::G_FLOG10:
1813 case TargetOpcode::G_FLOG:
1814 case TargetOpcode::G_FLOG2:
1815 case TargetOpcode::G_FRINT:
1816 case TargetOpcode::G_FNEARBYINT:
1817 case TargetOpcode::G_FSQRT:
1818 case TargetOpcode::G_FEXP:
1819 case TargetOpcode::G_FEXP2:
1820 case TargetOpcode::G_FPOW:
1821 case TargetOpcode::G_INTRINSIC_TRUNC:
1822 case TargetOpcode::G_INTRINSIC_ROUND:
1823 assert(TypeIdx == 0);
1824 Observer.changingInstr(MI);
1826 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
1827 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
1829 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
1830 Observer.changedInstr(MI);
1831 return Legalized;
1832 case TargetOpcode::G_INTTOPTR:
1833 if (TypeIdx != 1)
1834 return UnableToLegalize;
1836 Observer.changingInstr(MI);
1837 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1838 Observer.changedInstr(MI);
1839 return Legalized;
1840 case TargetOpcode::G_PTRTOINT:
1841 if (TypeIdx != 0)
1842 return UnableToLegalize;
1844 Observer.changingInstr(MI);
1845 widenScalarDst(MI, WideTy, 0);
1846 Observer.changedInstr(MI);
1847 return Legalized;
1848 case TargetOpcode::G_BUILD_VECTOR: {
1849 Observer.changingInstr(MI);
1851 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
1852 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
1853 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
1855 // Avoid changing the result vector type if the source element type was
1856 // requested.
1857 if (TypeIdx == 1) {
1858 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
1859 MI.setDesc(TII.get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
1860 } else {
1861 widenScalarDst(MI, WideTy, 0);
1864 Observer.changedInstr(MI);
1865 return Legalized;
1867 case TargetOpcode::G_SEXT_INREG:
1868 if (TypeIdx != 0)
1869 return UnableToLegalize;
1871 Observer.changingInstr(MI);
1872 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1873 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
1874 Observer.changedInstr(MI);
1875 return Legalized;
1879 LegalizerHelper::LegalizeResult
1880 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
1881 using namespace TargetOpcode;
1882 MIRBuilder.setInstr(MI);
1884 switch(MI.getOpcode()) {
1885 default:
1886 return UnableToLegalize;
1887 case TargetOpcode::G_SREM:
1888 case TargetOpcode::G_UREM: {
1889 Register QuotReg = MRI.createGenericVirtualRegister(Ty);
1890 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV)
1891 .addDef(QuotReg)
1892 .addUse(MI.getOperand(1).getReg())
1893 .addUse(MI.getOperand(2).getReg());
1895 Register ProdReg = MRI.createGenericVirtualRegister(Ty);
1896 MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg());
1897 MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
1898 ProdReg);
1899 MI.eraseFromParent();
1900 return Legalized;
1902 case TargetOpcode::G_SMULO:
1903 case TargetOpcode::G_UMULO: {
1904 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
1905 // result.
1906 Register Res = MI.getOperand(0).getReg();
1907 Register Overflow = MI.getOperand(1).getReg();
1908 Register LHS = MI.getOperand(2).getReg();
1909 Register RHS = MI.getOperand(3).getReg();
1911 MIRBuilder.buildMul(Res, LHS, RHS);
1913 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
1914 ? TargetOpcode::G_SMULH
1915 : TargetOpcode::G_UMULH;
1917 Register HiPart = MRI.createGenericVirtualRegister(Ty);
1918 MIRBuilder.buildInstr(Opcode)
1919 .addDef(HiPart)
1920 .addUse(LHS)
1921 .addUse(RHS);
1923 Register Zero = MRI.createGenericVirtualRegister(Ty);
1924 MIRBuilder.buildConstant(Zero, 0);
1926 // For *signed* multiply, overflow is detected by checking:
1927 // (hi != (lo >> bitwidth-1))
1928 if (Opcode == TargetOpcode::G_SMULH) {
1929 Register Shifted = MRI.createGenericVirtualRegister(Ty);
1930 Register ShiftAmt = MRI.createGenericVirtualRegister(Ty);
1931 MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1);
1932 MIRBuilder.buildInstr(TargetOpcode::G_ASHR)
1933 .addDef(Shifted)
1934 .addUse(Res)
1935 .addUse(ShiftAmt);
1936 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
1937 } else {
1938 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
1940 MI.eraseFromParent();
1941 return Legalized;
1943 case TargetOpcode::G_FNEG: {
1944 // TODO: Handle vector types once we are able to
1945 // represent them.
1946 if (Ty.isVector())
1947 return UnableToLegalize;
1948 Register Res = MI.getOperand(0).getReg();
1949 Type *ZeroTy;
1950 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
1951 switch (Ty.getSizeInBits()) {
1952 case 16:
1953 ZeroTy = Type::getHalfTy(Ctx);
1954 break;
1955 case 32:
1956 ZeroTy = Type::getFloatTy(Ctx);
1957 break;
1958 case 64:
1959 ZeroTy = Type::getDoubleTy(Ctx);
1960 break;
1961 case 128:
1962 ZeroTy = Type::getFP128Ty(Ctx);
1963 break;
1964 default:
1965 llvm_unreachable("unexpected floating-point type");
1967 ConstantFP &ZeroForNegation =
1968 *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy));
1969 auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
1970 Register SubByReg = MI.getOperand(1).getReg();
1971 Register ZeroReg = Zero->getOperand(0).getReg();
1972 MIRBuilder.buildInstr(TargetOpcode::G_FSUB, {Res}, {ZeroReg, SubByReg},
1973 MI.getFlags());
1974 MI.eraseFromParent();
1975 return Legalized;
1977 case TargetOpcode::G_FSUB: {
1978 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
1979 // First, check if G_FNEG is marked as Lower. If so, we may
1980 // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
1981 if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
1982 return UnableToLegalize;
1983 Register Res = MI.getOperand(0).getReg();
1984 Register LHS = MI.getOperand(1).getReg();
1985 Register RHS = MI.getOperand(2).getReg();
1986 Register Neg = MRI.createGenericVirtualRegister(Ty);
1987 MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS);
1988 MIRBuilder.buildInstr(TargetOpcode::G_FADD, {Res}, {LHS, Neg}, MI.getFlags());
1989 MI.eraseFromParent();
1990 return Legalized;
1992 case TargetOpcode::G_FMAD:
1993 return lowerFMad(MI);
1994 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1995 Register OldValRes = MI.getOperand(0).getReg();
1996 Register SuccessRes = MI.getOperand(1).getReg();
1997 Register Addr = MI.getOperand(2).getReg();
1998 Register CmpVal = MI.getOperand(3).getReg();
1999 Register NewVal = MI.getOperand(4).getReg();
2000 MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
2001 **MI.memoperands_begin());
2002 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
2003 MI.eraseFromParent();
2004 return Legalized;
2006 case TargetOpcode::G_LOAD:
2007 case TargetOpcode::G_SEXTLOAD:
2008 case TargetOpcode::G_ZEXTLOAD: {
2009 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
2010 Register DstReg = MI.getOperand(0).getReg();
2011 Register PtrReg = MI.getOperand(1).getReg();
2012 LLT DstTy = MRI.getType(DstReg);
2013 auto &MMO = **MI.memoperands_begin();
2015 if (DstTy.getSizeInBits() == MMO.getSizeInBits()) {
2016 if (MI.getOpcode() == TargetOpcode::G_LOAD) {
2017 // This load needs splitting into power of 2 sized loads.
2018 if (DstTy.isVector())
2019 return UnableToLegalize;
2020 if (isPowerOf2_32(DstTy.getSizeInBits()))
2021 return UnableToLegalize; // Don't know what we're being asked to do.
2023 // Our strategy here is to generate anyextending loads for the smaller
2024 // types up to next power-2 result type, and then combine the two larger
2025 // result values together, before truncating back down to the non-pow-2
2026 // type.
2027 // E.g. v1 = i24 load =>
2028 // v2 = i32 load (2 byte)
2029 // v3 = i32 load (1 byte)
2030 // v4 = i32 shl v3, 16
2031 // v5 = i32 or v4, v2
2032 // v1 = i24 trunc v5
2033 // By doing this we generate the correct truncate which should get
2034 // combined away as an artifact with a matching extend.
2035 uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits());
2036 uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize;
2038 MachineFunction &MF = MIRBuilder.getMF();
2039 MachineMemOperand *LargeMMO =
2040 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
2041 MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
2042 &MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2044 LLT PtrTy = MRI.getType(PtrReg);
2045 unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits());
2046 LLT AnyExtTy = LLT::scalar(AnyExtSize);
2047 Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
2048 Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
2049 auto LargeLoad =
2050 MIRBuilder.buildLoad(LargeLdReg, PtrReg, *LargeMMO);
2052 auto OffsetCst =
2053 MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8);
2054 Register GEPReg = MRI.createGenericVirtualRegister(PtrTy);
2055 auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0));
2056 auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0),
2057 *SmallMMO);
2059 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
2060 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
2061 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
2062 MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)});
2063 MI.eraseFromParent();
2064 return Legalized;
2066 MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
2067 MI.eraseFromParent();
2068 return Legalized;
2071 if (DstTy.isScalar()) {
2072 Register TmpReg =
2073 MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits()));
2074 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
2075 switch (MI.getOpcode()) {
2076 default:
2077 llvm_unreachable("Unexpected opcode");
2078 case TargetOpcode::G_LOAD:
2079 MIRBuilder.buildAnyExt(DstReg, TmpReg);
2080 break;
2081 case TargetOpcode::G_SEXTLOAD:
2082 MIRBuilder.buildSExt(DstReg, TmpReg);
2083 break;
2084 case TargetOpcode::G_ZEXTLOAD:
2085 MIRBuilder.buildZExt(DstReg, TmpReg);
2086 break;
2088 MI.eraseFromParent();
2089 return Legalized;
2092 return UnableToLegalize;
2094 case TargetOpcode::G_STORE: {
2095 // Lower a non-power of 2 store into multiple pow-2 stores.
2096 // E.g. split an i24 store into an i16 store + i8 store.
2097 // We do this by first extending the stored value to the next largest power
2098 // of 2 type, and then using truncating stores to store the components.
2099 // By doing this, likewise with G_LOAD, generate an extend that can be
2100 // artifact-combined away instead of leaving behind extracts.
2101 Register SrcReg = MI.getOperand(0).getReg();
2102 Register PtrReg = MI.getOperand(1).getReg();
2103 LLT SrcTy = MRI.getType(SrcReg);
2104 MachineMemOperand &MMO = **MI.memoperands_begin();
2105 if (SrcTy.getSizeInBits() != MMO.getSizeInBits())
2106 return UnableToLegalize;
2107 if (SrcTy.isVector())
2108 return UnableToLegalize;
2109 if (isPowerOf2_32(SrcTy.getSizeInBits()))
2110 return UnableToLegalize; // Don't know what we're being asked to do.
2112 // Extend to the next pow-2.
2113 const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits()));
2114 auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg);
2116 // Obtain the smaller value by shifting away the larger value.
2117 uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits());
2118 uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize;
2119 auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
2120 auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
2122 // Generate the GEP and truncating stores.
2123 LLT PtrTy = MRI.getType(PtrReg);
2124 auto OffsetCst =
2125 MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8);
2126 Register GEPReg = MRI.createGenericVirtualRegister(PtrTy);
2127 auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0));
2129 MachineFunction &MF = MIRBuilder.getMF();
2130 MachineMemOperand *LargeMMO =
2131 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
2132 MachineMemOperand *SmallMMO =
2133 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2134 MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO);
2135 MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO);
2136 MI.eraseFromParent();
2137 return Legalized;
2139 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2140 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2141 case TargetOpcode::G_CTLZ:
2142 case TargetOpcode::G_CTTZ:
2143 case TargetOpcode::G_CTPOP:
2144 return lowerBitCount(MI, TypeIdx, Ty);
2145 case G_UADDO: {
2146 Register Res = MI.getOperand(0).getReg();
2147 Register CarryOut = MI.getOperand(1).getReg();
2148 Register LHS = MI.getOperand(2).getReg();
2149 Register RHS = MI.getOperand(3).getReg();
2151 MIRBuilder.buildAdd(Res, LHS, RHS);
2152 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
2154 MI.eraseFromParent();
2155 return Legalized;
2157 case G_UADDE: {
2158 Register Res = MI.getOperand(0).getReg();
2159 Register CarryOut = MI.getOperand(1).getReg();
2160 Register LHS = MI.getOperand(2).getReg();
2161 Register RHS = MI.getOperand(3).getReg();
2162 Register CarryIn = MI.getOperand(4).getReg();
2164 Register TmpRes = MRI.createGenericVirtualRegister(Ty);
2165 Register ZExtCarryIn = MRI.createGenericVirtualRegister(Ty);
2167 MIRBuilder.buildAdd(TmpRes, LHS, RHS);
2168 MIRBuilder.buildZExt(ZExtCarryIn, CarryIn);
2169 MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
2170 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
2172 MI.eraseFromParent();
2173 return Legalized;
2175 case G_USUBO: {
2176 Register Res = MI.getOperand(0).getReg();
2177 Register BorrowOut = MI.getOperand(1).getReg();
2178 Register LHS = MI.getOperand(2).getReg();
2179 Register RHS = MI.getOperand(3).getReg();
2181 MIRBuilder.buildSub(Res, LHS, RHS);
2182 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
2184 MI.eraseFromParent();
2185 return Legalized;
2187 case G_USUBE: {
2188 Register Res = MI.getOperand(0).getReg();
2189 Register BorrowOut = MI.getOperand(1).getReg();
2190 Register LHS = MI.getOperand(2).getReg();
2191 Register RHS = MI.getOperand(3).getReg();
2192 Register BorrowIn = MI.getOperand(4).getReg();
2194 Register TmpRes = MRI.createGenericVirtualRegister(Ty);
2195 Register ZExtBorrowIn = MRI.createGenericVirtualRegister(Ty);
2196 Register LHS_EQ_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
2197 Register LHS_ULT_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
2199 MIRBuilder.buildSub(TmpRes, LHS, RHS);
2200 MIRBuilder.buildZExt(ZExtBorrowIn, BorrowIn);
2201 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
2202 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LHS_EQ_RHS, LHS, RHS);
2203 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, LHS_ULT_RHS, LHS, RHS);
2204 MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
2206 MI.eraseFromParent();
2207 return Legalized;
2209 case G_UITOFP:
2210 return lowerUITOFP(MI, TypeIdx, Ty);
2211 case G_SITOFP:
2212 return lowerSITOFP(MI, TypeIdx, Ty);
2213 case G_FPTOUI:
2214 return lowerFPTOUI(MI, TypeIdx, Ty);
2215 case G_SMIN:
2216 case G_SMAX:
2217 case G_UMIN:
2218 case G_UMAX:
2219 return lowerMinMax(MI, TypeIdx, Ty);
2220 case G_FCOPYSIGN:
2221 return lowerFCopySign(MI, TypeIdx, Ty);
2222 case G_FMINNUM:
2223 case G_FMAXNUM:
2224 return lowerFMinNumMaxNum(MI);
2225 case G_UNMERGE_VALUES:
2226 return lowerUnmergeValues(MI);
2227 case TargetOpcode::G_SEXT_INREG: {
2228 assert(MI.getOperand(2).isImm() && "Expected immediate");
2229 int64_t SizeInBits = MI.getOperand(2).getImm();
2231 Register DstReg = MI.getOperand(0).getReg();
2232 Register SrcReg = MI.getOperand(1).getReg();
2233 LLT DstTy = MRI.getType(DstReg);
2234 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
2236 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
2237 MIRBuilder.buildInstr(TargetOpcode::G_SHL, {TmpRes}, {SrcReg, MIBSz->getOperand(0).getReg()});
2238 MIRBuilder.buildInstr(TargetOpcode::G_ASHR, {DstReg}, {TmpRes, MIBSz->getOperand(0).getReg()});
2239 MI.eraseFromParent();
2240 return Legalized;
2242 case G_SHUFFLE_VECTOR:
2243 return lowerShuffleVector(MI);
2244 case G_DYN_STACKALLOC:
2245 return lowerDynStackAlloc(MI);
2249 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
2250 MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
2251 SmallVector<Register, 2> DstRegs;
2253 unsigned NarrowSize = NarrowTy.getSizeInBits();
2254 Register DstReg = MI.getOperand(0).getReg();
2255 unsigned Size = MRI.getType(DstReg).getSizeInBits();
2256 int NumParts = Size / NarrowSize;
2257 // FIXME: Don't know how to handle the situation where the small vectors
2258 // aren't all the same size yet.
2259 if (Size % NarrowSize != 0)
2260 return UnableToLegalize;
2262 for (int i = 0; i < NumParts; ++i) {
2263 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
2264 MIRBuilder.buildUndef(TmpReg);
2265 DstRegs.push_back(TmpReg);
2268 if (NarrowTy.isVector())
2269 MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2270 else
2271 MIRBuilder.buildBuildVector(DstReg, DstRegs);
2273 MI.eraseFromParent();
2274 return Legalized;
2277 LegalizerHelper::LegalizeResult
2278 LegalizerHelper::fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx,
2279 LLT NarrowTy) {
2280 const unsigned Opc = MI.getOpcode();
2281 const unsigned NumOps = MI.getNumOperands() - 1;
2282 const unsigned NarrowSize = NarrowTy.getSizeInBits();
2283 const Register DstReg = MI.getOperand(0).getReg();
2284 const unsigned Flags = MI.getFlags();
2285 const LLT DstTy = MRI.getType(DstReg);
2286 const unsigned Size = DstTy.getSizeInBits();
2287 const int NumParts = Size / NarrowSize;
2288 const LLT EltTy = DstTy.getElementType();
2289 const unsigned EltSize = EltTy.getSizeInBits();
2290 const unsigned BitsForNumParts = NarrowSize * NumParts;
2292 // Check if we have any leftovers. If we do, then only handle the case where
2293 // the leftover is one element.
2294 if (BitsForNumParts != Size && BitsForNumParts + EltSize != Size)
2295 return UnableToLegalize;
2297 if (BitsForNumParts != Size) {
2298 Register AccumDstReg = MRI.createGenericVirtualRegister(DstTy);
2299 MIRBuilder.buildUndef(AccumDstReg);
2301 // Handle the pieces which evenly divide into the requested type with
2302 // extract/op/insert sequence.
2303 for (unsigned Offset = 0; Offset < BitsForNumParts; Offset += NarrowSize) {
2304 SmallVector<SrcOp, 4> SrcOps;
2305 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
2306 Register PartOpReg = MRI.createGenericVirtualRegister(NarrowTy);
2307 MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), Offset);
2308 SrcOps.push_back(PartOpReg);
2311 Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy);
2312 MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
2314 Register PartInsertReg = MRI.createGenericVirtualRegister(DstTy);
2315 MIRBuilder.buildInsert(PartInsertReg, AccumDstReg, PartDstReg, Offset);
2316 AccumDstReg = PartInsertReg;
2319 // Handle the remaining element sized leftover piece.
2320 SmallVector<SrcOp, 4> SrcOps;
2321 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
2322 Register PartOpReg = MRI.createGenericVirtualRegister(EltTy);
2323 MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(),
2324 BitsForNumParts);
2325 SrcOps.push_back(PartOpReg);
2328 Register PartDstReg = MRI.createGenericVirtualRegister(EltTy);
2329 MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
2330 MIRBuilder.buildInsert(DstReg, AccumDstReg, PartDstReg, BitsForNumParts);
2331 MI.eraseFromParent();
2333 return Legalized;
2336 SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
2338 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src0Regs);
2340 if (NumOps >= 2)
2341 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src1Regs);
2343 if (NumOps >= 3)
2344 extractParts(MI.getOperand(3).getReg(), NarrowTy, NumParts, Src2Regs);
2346 for (int i = 0; i < NumParts; ++i) {
2347 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
2349 if (NumOps == 1)
2350 MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i]}, Flags);
2351 else if (NumOps == 2) {
2352 MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i], Src1Regs[i]}, Flags);
2353 } else if (NumOps == 3) {
2354 MIRBuilder.buildInstr(Opc, {DstReg},
2355 {Src0Regs[i], Src1Regs[i], Src2Regs[i]}, Flags);
2358 DstRegs.push_back(DstReg);
2361 if (NarrowTy.isVector())
2362 MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2363 else
2364 MIRBuilder.buildBuildVector(DstReg, DstRegs);
2366 MI.eraseFromParent();
2367 return Legalized;
2370 // Handle splitting vector operations which need to have the same number of
2371 // elements in each type index, but each type index may have a different element
2372 // type.
2374 // e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
2375 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2376 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2378 // Also handles some irregular breakdown cases, e.g.
2379 // e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
2380 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2381 // s64 = G_SHL s64, s32
2382 LegalizerHelper::LegalizeResult
2383 LegalizerHelper::fewerElementsVectorMultiEltType(
2384 MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) {
2385 if (TypeIdx != 0)
2386 return UnableToLegalize;
2388 const LLT NarrowTy0 = NarrowTyArg;
2389 const unsigned NewNumElts =
2390 NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1;
2392 const Register DstReg = MI.getOperand(0).getReg();
2393 LLT DstTy = MRI.getType(DstReg);
2394 LLT LeftoverTy0;
2396 // All of the operands need to have the same number of elements, so if we can
2397 // determine a type breakdown for the result type, we can for all of the
2398 // source types.
2399 int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first;
2400 if (NumParts < 0)
2401 return UnableToLegalize;
2403 SmallVector<MachineInstrBuilder, 4> NewInsts;
2405 SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
2406 SmallVector<Register, 4> PartRegs, LeftoverRegs;
2408 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
2409 LLT LeftoverTy;
2410 Register SrcReg = MI.getOperand(I).getReg();
2411 LLT SrcTyI = MRI.getType(SrcReg);
2412 LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType());
2413 LLT LeftoverTyI;
2415 // Split this operand into the requested typed registers, and any leftover
2416 // required to reproduce the original type.
2417 if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs,
2418 LeftoverRegs))
2419 return UnableToLegalize;
2421 if (I == 1) {
2422 // For the first operand, create an instruction for each part and setup
2423 // the result.
2424 for (Register PartReg : PartRegs) {
2425 Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2426 NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
2427 .addDef(PartDstReg)
2428 .addUse(PartReg));
2429 DstRegs.push_back(PartDstReg);
2432 for (Register LeftoverReg : LeftoverRegs) {
2433 Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0);
2434 NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
2435 .addDef(PartDstReg)
2436 .addUse(LeftoverReg));
2437 LeftoverDstRegs.push_back(PartDstReg);
2439 } else {
2440 assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size());
2442 // Add the newly created operand splits to the existing instructions. The
2443 // odd-sized pieces are ordered after the requested NarrowTyArg sized
2444 // pieces.
2445 unsigned InstCount = 0;
2446 for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J)
2447 NewInsts[InstCount++].addUse(PartRegs[J]);
2448 for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J)
2449 NewInsts[InstCount++].addUse(LeftoverRegs[J]);
2452 PartRegs.clear();
2453 LeftoverRegs.clear();
2456 // Insert the newly built operations and rebuild the result register.
2457 for (auto &MIB : NewInsts)
2458 MIRBuilder.insertInstr(MIB);
2460 insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs);
2462 MI.eraseFromParent();
2463 return Legalized;
2466 LegalizerHelper::LegalizeResult
2467 LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
2468 LLT NarrowTy) {
2469 if (TypeIdx != 0)
2470 return UnableToLegalize;
2472 Register DstReg = MI.getOperand(0).getReg();
2473 Register SrcReg = MI.getOperand(1).getReg();
2474 LLT DstTy = MRI.getType(DstReg);
2475 LLT SrcTy = MRI.getType(SrcReg);
2477 LLT NarrowTy0 = NarrowTy;
2478 LLT NarrowTy1;
2479 unsigned NumParts;
2481 if (NarrowTy.isVector()) {
2482 // Uneven breakdown not handled.
2483 NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
2484 if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
2485 return UnableToLegalize;
2487 NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits());
2488 } else {
2489 NumParts = DstTy.getNumElements();
2490 NarrowTy1 = SrcTy.getElementType();
2493 SmallVector<Register, 4> SrcRegs, DstRegs;
2494 extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
2496 for (unsigned I = 0; I < NumParts; ++I) {
2497 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2498 MachineInstr *NewInst = MIRBuilder.buildInstr(MI.getOpcode())
2499 .addDef(DstReg)
2500 .addUse(SrcRegs[I]);
2502 NewInst->setFlags(MI.getFlags());
2503 DstRegs.push_back(DstReg);
2506 if (NarrowTy.isVector())
2507 MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2508 else
2509 MIRBuilder.buildBuildVector(DstReg, DstRegs);
2511 MI.eraseFromParent();
2512 return Legalized;
2515 LegalizerHelper::LegalizeResult
2516 LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx,
2517 LLT NarrowTy) {
2518 Register DstReg = MI.getOperand(0).getReg();
2519 Register Src0Reg = MI.getOperand(2).getReg();
2520 LLT DstTy = MRI.getType(DstReg);
2521 LLT SrcTy = MRI.getType(Src0Reg);
2523 unsigned NumParts;
2524 LLT NarrowTy0, NarrowTy1;
2526 if (TypeIdx == 0) {
2527 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
2528 unsigned OldElts = DstTy.getNumElements();
2530 NarrowTy0 = NarrowTy;
2531 NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements();
2532 NarrowTy1 = NarrowTy.isVector() ?
2533 LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) :
2534 SrcTy.getElementType();
2536 } else {
2537 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
2538 unsigned OldElts = SrcTy.getNumElements();
2540 NumParts = NarrowTy.isVector() ? (OldElts / NewElts) :
2541 NarrowTy.getNumElements();
2542 NarrowTy0 = LLT::vector(NarrowTy.getNumElements(),
2543 DstTy.getScalarSizeInBits());
2544 NarrowTy1 = NarrowTy;
2547 // FIXME: Don't know how to handle the situation where the small vectors
2548 // aren't all the same size yet.
2549 if (NarrowTy1.isVector() &&
2550 NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements())
2551 return UnableToLegalize;
2553 CmpInst::Predicate Pred
2554 = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
2556 SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
2557 extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
2558 extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
2560 for (unsigned I = 0; I < NumParts; ++I) {
2561 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2562 DstRegs.push_back(DstReg);
2564 if (MI.getOpcode() == TargetOpcode::G_ICMP)
2565 MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
2566 else {
2567 MachineInstr *NewCmp
2568 = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
2569 NewCmp->setFlags(MI.getFlags());
2573 if (NarrowTy1.isVector())
2574 MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2575 else
2576 MIRBuilder.buildBuildVector(DstReg, DstRegs);
2578 MI.eraseFromParent();
2579 return Legalized;
2582 LegalizerHelper::LegalizeResult
2583 LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx,
2584 LLT NarrowTy) {
2585 Register DstReg = MI.getOperand(0).getReg();
2586 Register CondReg = MI.getOperand(1).getReg();
2588 unsigned NumParts = 0;
2589 LLT NarrowTy0, NarrowTy1;
2591 LLT DstTy = MRI.getType(DstReg);
2592 LLT CondTy = MRI.getType(CondReg);
2593 unsigned Size = DstTy.getSizeInBits();
2595 assert(TypeIdx == 0 || CondTy.isVector());
2597 if (TypeIdx == 0) {
2598 NarrowTy0 = NarrowTy;
2599 NarrowTy1 = CondTy;
2601 unsigned NarrowSize = NarrowTy0.getSizeInBits();
2602 // FIXME: Don't know how to handle the situation where the small vectors
2603 // aren't all the same size yet.
2604 if (Size % NarrowSize != 0)
2605 return UnableToLegalize;
2607 NumParts = Size / NarrowSize;
2609 // Need to break down the condition type
2610 if (CondTy.isVector()) {
2611 if (CondTy.getNumElements() == NumParts)
2612 NarrowTy1 = CondTy.getElementType();
2613 else
2614 NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts,
2615 CondTy.getScalarSizeInBits());
2617 } else {
2618 NumParts = CondTy.getNumElements();
2619 if (NarrowTy.isVector()) {
2620 // TODO: Handle uneven breakdown.
2621 if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements())
2622 return UnableToLegalize;
2624 return UnableToLegalize;
2625 } else {
2626 NarrowTy0 = DstTy.getElementType();
2627 NarrowTy1 = NarrowTy;
2631 SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
2632 if (CondTy.isVector())
2633 extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs);
2635 extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs);
2636 extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs);
2638 for (unsigned i = 0; i < NumParts; ++i) {
2639 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2640 MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg,
2641 Src1Regs[i], Src2Regs[i]);
2642 DstRegs.push_back(DstReg);
2645 if (NarrowTy0.isVector())
2646 MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2647 else
2648 MIRBuilder.buildBuildVector(DstReg, DstRegs);
2650 MI.eraseFromParent();
2651 return Legalized;
2654 LegalizerHelper::LegalizeResult
2655 LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
2656 LLT NarrowTy) {
2657 const Register DstReg = MI.getOperand(0).getReg();
2658 LLT PhiTy = MRI.getType(DstReg);
2659 LLT LeftoverTy;
2661 // All of the operands need to have the same number of elements, so if we can
2662 // determine a type breakdown for the result type, we can for all of the
2663 // source types.
2664 int NumParts, NumLeftover;
2665 std::tie(NumParts, NumLeftover)
2666 = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy);
2667 if (NumParts < 0)
2668 return UnableToLegalize;
2670 SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
2671 SmallVector<MachineInstrBuilder, 4> NewInsts;
2673 const int TotalNumParts = NumParts + NumLeftover;
2675 // Insert the new phis in the result block first.
2676 for (int I = 0; I != TotalNumParts; ++I) {
2677 LLT Ty = I < NumParts ? NarrowTy : LeftoverTy;
2678 Register PartDstReg = MRI.createGenericVirtualRegister(Ty);
2679 NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI)
2680 .addDef(PartDstReg));
2681 if (I < NumParts)
2682 DstRegs.push_back(PartDstReg);
2683 else
2684 LeftoverDstRegs.push_back(PartDstReg);
2687 MachineBasicBlock *MBB = MI.getParent();
2688 MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI());
2689 insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs);
2691 SmallVector<Register, 4> PartRegs, LeftoverRegs;
2693 // Insert code to extract the incoming values in each predecessor block.
2694 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
2695 PartRegs.clear();
2696 LeftoverRegs.clear();
2698 Register SrcReg = MI.getOperand(I).getReg();
2699 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2700 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
2702 LLT Unused;
2703 if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs,
2704 LeftoverRegs))
2705 return UnableToLegalize;
2707 // Add the newly created operand splits to the existing instructions. The
2708 // odd-sized pieces are ordered after the requested NarrowTyArg sized
2709 // pieces.
2710 for (int J = 0; J != TotalNumParts; ++J) {
2711 MachineInstrBuilder MIB = NewInsts[J];
2712 MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]);
2713 MIB.addMBB(&OpMBB);
2717 MI.eraseFromParent();
2718 return Legalized;
2721 LegalizerHelper::LegalizeResult
2722 LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
2723 unsigned TypeIdx,
2724 LLT NarrowTy) {
2725 if (TypeIdx != 1)
2726 return UnableToLegalize;
2728 const int NumDst = MI.getNumOperands() - 1;
2729 const Register SrcReg = MI.getOperand(NumDst).getReg();
2730 LLT SrcTy = MRI.getType(SrcReg);
2732 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2734 // TODO: Create sequence of extracts.
2735 if (DstTy == NarrowTy)
2736 return UnableToLegalize;
2738 LLT GCDTy = getGCDType(SrcTy, NarrowTy);
2739 if (DstTy == GCDTy) {
2740 // This would just be a copy of the same unmerge.
2741 // TODO: Create extracts, pad with undef and create intermediate merges.
2742 return UnableToLegalize;
2745 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2746 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2747 const int PartsPerUnmerge = NumDst / NumUnmerge;
2749 for (int I = 0; I != NumUnmerge; ++I) {
2750 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2752 for (int J = 0; J != PartsPerUnmerge; ++J)
2753 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
2754 MIB.addUse(Unmerge.getReg(I));
2757 MI.eraseFromParent();
2758 return Legalized;
2761 LegalizerHelper::LegalizeResult
2762 LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
2763 LLT NarrowTy) {
2764 // FIXME: Don't know how to handle secondary types yet.
2765 if (TypeIdx != 0)
2766 return UnableToLegalize;
2768 MachineMemOperand *MMO = *MI.memoperands_begin();
2770 // This implementation doesn't work for atomics. Give up instead of doing
2771 // something invalid.
2772 if (MMO->getOrdering() != AtomicOrdering::NotAtomic ||
2773 MMO->getFailureOrdering() != AtomicOrdering::NotAtomic)
2774 return UnableToLegalize;
2776 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
2777 Register ValReg = MI.getOperand(0).getReg();
2778 Register AddrReg = MI.getOperand(1).getReg();
2779 LLT ValTy = MRI.getType(ValReg);
2781 int NumParts = -1;
2782 int NumLeftover = -1;
2783 LLT LeftoverTy;
2784 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
2785 if (IsLoad) {
2786 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
2787 } else {
2788 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
2789 NarrowLeftoverRegs)) {
2790 NumParts = NarrowRegs.size();
2791 NumLeftover = NarrowLeftoverRegs.size();
2795 if (NumParts == -1)
2796 return UnableToLegalize;
2798 const LLT OffsetTy = LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits());
2800 unsigned TotalSize = ValTy.getSizeInBits();
2802 // Split the load/store into PartTy sized pieces starting at Offset. If this
2803 // is a load, return the new registers in ValRegs. For a store, each elements
2804 // of ValRegs should be PartTy. Returns the next offset that needs to be
2805 // handled.
2806 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
2807 unsigned Offset) -> unsigned {
2808 MachineFunction &MF = MIRBuilder.getMF();
2809 unsigned PartSize = PartTy.getSizeInBits();
2810 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
2811 Offset += PartSize, ++Idx) {
2812 unsigned ByteSize = PartSize / 8;
2813 unsigned ByteOffset = Offset / 8;
2814 Register NewAddrReg;
2816 MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
2818 MachineMemOperand *NewMMO =
2819 MF.getMachineMemOperand(MMO, ByteOffset, ByteSize);
2821 if (IsLoad) {
2822 Register Dst = MRI.createGenericVirtualRegister(PartTy);
2823 ValRegs.push_back(Dst);
2824 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
2825 } else {
2826 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
2830 return Offset;
2833 unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
2835 // Handle the rest of the register if this isn't an even type breakdown.
2836 if (LeftoverTy.isValid())
2837 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
2839 if (IsLoad) {
2840 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
2841 LeftoverTy, NarrowLeftoverRegs);
2844 MI.eraseFromParent();
2845 return Legalized;
2848 LegalizerHelper::LegalizeResult
2849 LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
2850 LLT NarrowTy) {
2851 using namespace TargetOpcode;
2853 MIRBuilder.setInstr(MI);
2854 switch (MI.getOpcode()) {
2855 case G_IMPLICIT_DEF:
2856 return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
2857 case G_AND:
2858 case G_OR:
2859 case G_XOR:
2860 case G_ADD:
2861 case G_SUB:
2862 case G_MUL:
2863 case G_SMULH:
2864 case G_UMULH:
2865 case G_FADD:
2866 case G_FMUL:
2867 case G_FSUB:
2868 case G_FNEG:
2869 case G_FABS:
2870 case G_FCANONICALIZE:
2871 case G_FDIV:
2872 case G_FREM:
2873 case G_FMA:
2874 case G_FMAD:
2875 case G_FPOW:
2876 case G_FEXP:
2877 case G_FEXP2:
2878 case G_FLOG:
2879 case G_FLOG2:
2880 case G_FLOG10:
2881 case G_FNEARBYINT:
2882 case G_FCEIL:
2883 case G_FFLOOR:
2884 case G_FRINT:
2885 case G_INTRINSIC_ROUND:
2886 case G_INTRINSIC_TRUNC:
2887 case G_FCOS:
2888 case G_FSIN:
2889 case G_FSQRT:
2890 case G_BSWAP:
2891 case G_BITREVERSE:
2892 case G_SDIV:
2893 case G_SMIN:
2894 case G_SMAX:
2895 case G_UMIN:
2896 case G_UMAX:
2897 case G_FMINNUM:
2898 case G_FMAXNUM:
2899 case G_FMINNUM_IEEE:
2900 case G_FMAXNUM_IEEE:
2901 case G_FMINIMUM:
2902 case G_FMAXIMUM:
2903 return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy);
2904 case G_SHL:
2905 case G_LSHR:
2906 case G_ASHR:
2907 case G_CTLZ:
2908 case G_CTLZ_ZERO_UNDEF:
2909 case G_CTTZ:
2910 case G_CTTZ_ZERO_UNDEF:
2911 case G_CTPOP:
2912 case G_FCOPYSIGN:
2913 return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy);
2914 case G_ZEXT:
2915 case G_SEXT:
2916 case G_ANYEXT:
2917 case G_FPEXT:
2918 case G_FPTRUNC:
2919 case G_SITOFP:
2920 case G_UITOFP:
2921 case G_FPTOSI:
2922 case G_FPTOUI:
2923 case G_INTTOPTR:
2924 case G_PTRTOINT:
2925 case G_ADDRSPACE_CAST:
2926 return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
2927 case G_ICMP:
2928 case G_FCMP:
2929 return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy);
2930 case G_SELECT:
2931 return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy);
2932 case G_PHI:
2933 return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy);
2934 case G_UNMERGE_VALUES:
2935 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
2936 case G_LOAD:
2937 case G_STORE:
2938 return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
2939 default:
2940 return UnableToLegalize;
2944 LegalizerHelper::LegalizeResult
2945 LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
2946 const LLT HalfTy, const LLT AmtTy) {
2948 Register InL = MRI.createGenericVirtualRegister(HalfTy);
2949 Register InH = MRI.createGenericVirtualRegister(HalfTy);
2950 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg());
2952 if (Amt.isNullValue()) {
2953 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {InL, InH});
2954 MI.eraseFromParent();
2955 return Legalized;
2958 LLT NVT = HalfTy;
2959 unsigned NVTBits = HalfTy.getSizeInBits();
2960 unsigned VTBits = 2 * NVTBits;
2962 SrcOp Lo(Register(0)), Hi(Register(0));
2963 if (MI.getOpcode() == TargetOpcode::G_SHL) {
2964 if (Amt.ugt(VTBits)) {
2965 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
2966 } else if (Amt.ugt(NVTBits)) {
2967 Lo = MIRBuilder.buildConstant(NVT, 0);
2968 Hi = MIRBuilder.buildShl(NVT, InL,
2969 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
2970 } else if (Amt == NVTBits) {
2971 Lo = MIRBuilder.buildConstant(NVT, 0);
2972 Hi = InL;
2973 } else {
2974 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
2975 auto OrLHS =
2976 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
2977 auto OrRHS = MIRBuilder.buildLShr(
2978 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
2979 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
2981 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2982 if (Amt.ugt(VTBits)) {
2983 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
2984 } else if (Amt.ugt(NVTBits)) {
2985 Lo = MIRBuilder.buildLShr(NVT, InH,
2986 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
2987 Hi = MIRBuilder.buildConstant(NVT, 0);
2988 } else if (Amt == NVTBits) {
2989 Lo = InH;
2990 Hi = MIRBuilder.buildConstant(NVT, 0);
2991 } else {
2992 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
2994 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
2995 auto OrRHS = MIRBuilder.buildShl(
2996 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
2998 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
2999 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
3001 } else {
3002 if (Amt.ugt(VTBits)) {
3003 Hi = Lo = MIRBuilder.buildAShr(
3004 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
3005 } else if (Amt.ugt(NVTBits)) {
3006 Lo = MIRBuilder.buildAShr(NVT, InH,
3007 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
3008 Hi = MIRBuilder.buildAShr(NVT, InH,
3009 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
3010 } else if (Amt == NVTBits) {
3011 Lo = InH;
3012 Hi = MIRBuilder.buildAShr(NVT, InH,
3013 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
3014 } else {
3015 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
3017 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
3018 auto OrRHS = MIRBuilder.buildShl(
3019 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
3021 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
3022 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
3026 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {Lo.getReg(), Hi.getReg()});
3027 MI.eraseFromParent();
3029 return Legalized;
3032 // TODO: Optimize if constant shift amount.
3033 LegalizerHelper::LegalizeResult
3034 LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
3035 LLT RequestedTy) {
3036 if (TypeIdx == 1) {
3037 Observer.changingInstr(MI);
3038 narrowScalarSrc(MI, RequestedTy, 2);
3039 Observer.changedInstr(MI);
3040 return Legalized;
3043 Register DstReg = MI.getOperand(0).getReg();
3044 LLT DstTy = MRI.getType(DstReg);
3045 if (DstTy.isVector())
3046 return UnableToLegalize;
3048 Register Amt = MI.getOperand(2).getReg();
3049 LLT ShiftAmtTy = MRI.getType(Amt);
3050 const unsigned DstEltSize = DstTy.getScalarSizeInBits();
3051 if (DstEltSize % 2 != 0)
3052 return UnableToLegalize;
3054 // Ignore the input type. We can only go to exactly half the size of the
3055 // input. If that isn't small enough, the resulting pieces will be further
3056 // legalized.
3057 const unsigned NewBitSize = DstEltSize / 2;
3058 const LLT HalfTy = LLT::scalar(NewBitSize);
3059 const LLT CondTy = LLT::scalar(1);
3061 if (const MachineInstr *KShiftAmt =
3062 getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) {
3063 return narrowScalarShiftByConstant(
3064 MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy);
3067 // TODO: Expand with known bits.
3069 // Handle the fully general expansion by an unknown amount.
3070 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
3072 Register InL = MRI.createGenericVirtualRegister(HalfTy);
3073 Register InH = MRI.createGenericVirtualRegister(HalfTy);
3074 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg());
3076 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
3077 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
3079 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
3080 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
3081 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
3083 Register ResultRegs[2];
3084 switch (MI.getOpcode()) {
3085 case TargetOpcode::G_SHL: {
3086 // Short: ShAmt < NewBitSize
3087 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
3089 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
3090 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
3091 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
3093 // Long: ShAmt >= NewBitSize
3094 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
3095 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
3097 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
3098 auto Hi = MIRBuilder.buildSelect(
3099 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
3101 ResultRegs[0] = Lo.getReg(0);
3102 ResultRegs[1] = Hi.getReg(0);
3103 break;
3105 case TargetOpcode::G_LSHR:
3106 case TargetOpcode::G_ASHR: {
3107 // Short: ShAmt < NewBitSize
3108 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
3110 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
3111 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
3112 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
3114 // Long: ShAmt >= NewBitSize
3115 MachineInstrBuilder HiL;
3116 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
3117 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
3118 } else {
3119 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
3120 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
3122 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
3123 {InH, AmtExcess}); // Lo from Hi part.
3125 auto Lo = MIRBuilder.buildSelect(
3126 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
3128 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
3130 ResultRegs[0] = Lo.getReg(0);
3131 ResultRegs[1] = Hi.getReg(0);
3132 break;
3134 default:
3135 llvm_unreachable("not a shift");
3138 MIRBuilder.buildMerge(DstReg, ResultRegs);
3139 MI.eraseFromParent();
3140 return Legalized;
3143 LegalizerHelper::LegalizeResult
3144 LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
3145 LLT MoreTy) {
3146 assert(TypeIdx == 0 && "Expecting only Idx 0");
3148 Observer.changingInstr(MI);
3149 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
3150 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3151 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
3152 moreElementsVectorSrc(MI, MoreTy, I);
3155 MachineBasicBlock &MBB = *MI.getParent();
3156 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
3157 moreElementsVectorDst(MI, MoreTy, 0);
3158 Observer.changedInstr(MI);
3159 return Legalized;
3162 LegalizerHelper::LegalizeResult
3163 LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
3164 LLT MoreTy) {
3165 MIRBuilder.setInstr(MI);
3166 unsigned Opc = MI.getOpcode();
3167 switch (Opc) {
3168 case TargetOpcode::G_IMPLICIT_DEF:
3169 case TargetOpcode::G_LOAD: {
3170 if (TypeIdx != 0)
3171 return UnableToLegalize;
3172 Observer.changingInstr(MI);
3173 moreElementsVectorDst(MI, MoreTy, 0);
3174 Observer.changedInstr(MI);
3175 return Legalized;
3177 case TargetOpcode::G_STORE:
3178 if (TypeIdx != 0)
3179 return UnableToLegalize;
3180 Observer.changingInstr(MI);
3181 moreElementsVectorSrc(MI, MoreTy, 0);
3182 Observer.changedInstr(MI);
3183 return Legalized;
3184 case TargetOpcode::G_AND:
3185 case TargetOpcode::G_OR:
3186 case TargetOpcode::G_XOR:
3187 case TargetOpcode::G_SMIN:
3188 case TargetOpcode::G_SMAX:
3189 case TargetOpcode::G_UMIN:
3190 case TargetOpcode::G_UMAX: {
3191 Observer.changingInstr(MI);
3192 moreElementsVectorSrc(MI, MoreTy, 1);
3193 moreElementsVectorSrc(MI, MoreTy, 2);
3194 moreElementsVectorDst(MI, MoreTy, 0);
3195 Observer.changedInstr(MI);
3196 return Legalized;
3198 case TargetOpcode::G_EXTRACT:
3199 if (TypeIdx != 1)
3200 return UnableToLegalize;
3201 Observer.changingInstr(MI);
3202 moreElementsVectorSrc(MI, MoreTy, 1);
3203 Observer.changedInstr(MI);
3204 return Legalized;
3205 case TargetOpcode::G_INSERT:
3206 if (TypeIdx != 0)
3207 return UnableToLegalize;
3208 Observer.changingInstr(MI);
3209 moreElementsVectorSrc(MI, MoreTy, 1);
3210 moreElementsVectorDst(MI, MoreTy, 0);
3211 Observer.changedInstr(MI);
3212 return Legalized;
3213 case TargetOpcode::G_SELECT:
3214 if (TypeIdx != 0)
3215 return UnableToLegalize;
3216 if (MRI.getType(MI.getOperand(1).getReg()).isVector())
3217 return UnableToLegalize;
3219 Observer.changingInstr(MI);
3220 moreElementsVectorSrc(MI, MoreTy, 2);
3221 moreElementsVectorSrc(MI, MoreTy, 3);
3222 moreElementsVectorDst(MI, MoreTy, 0);
3223 Observer.changedInstr(MI);
3224 return Legalized;
3225 case TargetOpcode::G_UNMERGE_VALUES: {
3226 if (TypeIdx != 1)
3227 return UnableToLegalize;
3229 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3230 int NumDst = MI.getNumOperands() - 1;
3231 moreElementsVectorSrc(MI, MoreTy, NumDst);
3233 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
3234 for (int I = 0; I != NumDst; ++I)
3235 MIB.addDef(MI.getOperand(I).getReg());
3237 int NewNumDst = MoreTy.getSizeInBits() / DstTy.getSizeInBits();
3238 for (int I = NumDst; I != NewNumDst; ++I)
3239 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
3241 MIB.addUse(MI.getOperand(NumDst).getReg());
3242 MI.eraseFromParent();
3243 return Legalized;
3245 case TargetOpcode::G_PHI:
3246 return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
3247 default:
3248 return UnableToLegalize;
3252 void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
3253 ArrayRef<Register> Src1Regs,
3254 ArrayRef<Register> Src2Regs,
3255 LLT NarrowTy) {
3256 MachineIRBuilder &B = MIRBuilder;
3257 unsigned SrcParts = Src1Regs.size();
3258 unsigned DstParts = DstRegs.size();
3260 unsigned DstIdx = 0; // Low bits of the result.
3261 Register FactorSum =
3262 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
3263 DstRegs[DstIdx] = FactorSum;
3265 unsigned CarrySumPrevDstIdx;
3266 SmallVector<Register, 4> Factors;
3268 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
3269 // Collect low parts of muls for DstIdx.
3270 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
3271 i <= std::min(DstIdx, SrcParts - 1); ++i) {
3272 MachineInstrBuilder Mul =
3273 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
3274 Factors.push_back(Mul.getReg(0));
3276 // Collect high parts of muls from previous DstIdx.
3277 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
3278 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
3279 MachineInstrBuilder Umulh =
3280 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
3281 Factors.push_back(Umulh.getReg(0));
3283 // Add CarrySum from additons calculated for previous DstIdx.
3284 if (DstIdx != 1) {
3285 Factors.push_back(CarrySumPrevDstIdx);
3288 Register CarrySum;
3289 // Add all factors and accumulate all carries into CarrySum.
3290 if (DstIdx != DstParts - 1) {
3291 MachineInstrBuilder Uaddo =
3292 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
3293 FactorSum = Uaddo.getReg(0);
3294 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
3295 for (unsigned i = 2; i < Factors.size(); ++i) {
3296 MachineInstrBuilder Uaddo =
3297 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
3298 FactorSum = Uaddo.getReg(0);
3299 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
3300 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
3302 } else {
3303 // Since value for the next index is not calculated, neither is CarrySum.
3304 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
3305 for (unsigned i = 2; i < Factors.size(); ++i)
3306 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
3309 CarrySumPrevDstIdx = CarrySum;
3310 DstRegs[DstIdx] = FactorSum;
3311 Factors.clear();
3315 LegalizerHelper::LegalizeResult
3316 LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
3317 Register DstReg = MI.getOperand(0).getReg();
3318 Register Src1 = MI.getOperand(1).getReg();
3319 Register Src2 = MI.getOperand(2).getReg();
3321 LLT Ty = MRI.getType(DstReg);
3322 if (Ty.isVector())
3323 return UnableToLegalize;
3325 unsigned SrcSize = MRI.getType(Src1).getSizeInBits();
3326 unsigned DstSize = Ty.getSizeInBits();
3327 unsigned NarrowSize = NarrowTy.getSizeInBits();
3328 if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0)
3329 return UnableToLegalize;
3331 unsigned NumDstParts = DstSize / NarrowSize;
3332 unsigned NumSrcParts = SrcSize / NarrowSize;
3333 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
3334 unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1);
3336 SmallVector<Register, 2> Src1Parts, Src2Parts, DstTmpRegs;
3337 extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts);
3338 extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts);
3339 DstTmpRegs.resize(DstTmpParts);
3340 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
3342 // Take only high half of registers if this is high mul.
3343 ArrayRef<Register> DstRegs(
3344 IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts);
3345 MIRBuilder.buildMerge(DstReg, DstRegs);
3346 MI.eraseFromParent();
3347 return Legalized;
3350 LegalizerHelper::LegalizeResult
3351 LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
3352 LLT NarrowTy) {
3353 if (TypeIdx != 1)
3354 return UnableToLegalize;
3356 uint64_t NarrowSize = NarrowTy.getSizeInBits();
3358 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
3359 // FIXME: add support for when SizeOp1 isn't an exact multiple of
3360 // NarrowSize.
3361 if (SizeOp1 % NarrowSize != 0)
3362 return UnableToLegalize;
3363 int NumParts = SizeOp1 / NarrowSize;
3365 SmallVector<Register, 2> SrcRegs, DstRegs;
3366 SmallVector<uint64_t, 2> Indexes;
3367 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
3369 Register OpReg = MI.getOperand(0).getReg();
3370 uint64_t OpStart = MI.getOperand(2).getImm();
3371 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
3372 for (int i = 0; i < NumParts; ++i) {
3373 unsigned SrcStart = i * NarrowSize;
3375 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
3376 // No part of the extract uses this subregister, ignore it.
3377 continue;
3378 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
3379 // The entire subregister is extracted, forward the value.
3380 DstRegs.push_back(SrcRegs[i]);
3381 continue;
3384 // OpSegStart is where this destination segment would start in OpReg if it
3385 // extended infinitely in both directions.
3386 int64_t ExtractOffset;
3387 uint64_t SegSize;
3388 if (OpStart < SrcStart) {
3389 ExtractOffset = 0;
3390 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
3391 } else {
3392 ExtractOffset = OpStart - SrcStart;
3393 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
3396 Register SegReg = SrcRegs[i];
3397 if (ExtractOffset != 0 || SegSize != NarrowSize) {
3398 // A genuine extract is needed.
3399 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
3400 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
3403 DstRegs.push_back(SegReg);
3406 Register DstReg = MI.getOperand(0).getReg();
3407 if(MRI.getType(DstReg).isVector())
3408 MIRBuilder.buildBuildVector(DstReg, DstRegs);
3409 else
3410 MIRBuilder.buildMerge(DstReg, DstRegs);
3411 MI.eraseFromParent();
3412 return Legalized;
3415 LegalizerHelper::LegalizeResult
3416 LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
3417 LLT NarrowTy) {
3418 // FIXME: Don't know how to handle secondary types yet.
3419 if (TypeIdx != 0)
3420 return UnableToLegalize;
3422 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
3423 uint64_t NarrowSize = NarrowTy.getSizeInBits();
3425 // FIXME: add support for when SizeOp0 isn't an exact multiple of
3426 // NarrowSize.
3427 if (SizeOp0 % NarrowSize != 0)
3428 return UnableToLegalize;
3430 int NumParts = SizeOp0 / NarrowSize;
3432 SmallVector<Register, 2> SrcRegs, DstRegs;
3433 SmallVector<uint64_t, 2> Indexes;
3434 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
3436 Register OpReg = MI.getOperand(2).getReg();
3437 uint64_t OpStart = MI.getOperand(3).getImm();
3438 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
3439 for (int i = 0; i < NumParts; ++i) {
3440 unsigned DstStart = i * NarrowSize;
3442 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
3443 // No part of the insert affects this subregister, forward the original.
3444 DstRegs.push_back(SrcRegs[i]);
3445 continue;
3446 } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
3447 // The entire subregister is defined by this insert, forward the new
3448 // value.
3449 DstRegs.push_back(OpReg);
3450 continue;
3453 // OpSegStart is where this destination segment would start in OpReg if it
3454 // extended infinitely in both directions.
3455 int64_t ExtractOffset, InsertOffset;
3456 uint64_t SegSize;
3457 if (OpStart < DstStart) {
3458 InsertOffset = 0;
3459 ExtractOffset = DstStart - OpStart;
3460 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
3461 } else {
3462 InsertOffset = OpStart - DstStart;
3463 ExtractOffset = 0;
3464 SegSize =
3465 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
3468 Register SegReg = OpReg;
3469 if (ExtractOffset != 0 || SegSize != OpSize) {
3470 // A genuine extract is needed.
3471 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
3472 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
3475 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
3476 MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
3477 DstRegs.push_back(DstReg);
3480 assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
3481 Register DstReg = MI.getOperand(0).getReg();
3482 if(MRI.getType(DstReg).isVector())
3483 MIRBuilder.buildBuildVector(DstReg, DstRegs);
3484 else
3485 MIRBuilder.buildMerge(DstReg, DstRegs);
3486 MI.eraseFromParent();
3487 return Legalized;
3490 LegalizerHelper::LegalizeResult
3491 LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
3492 LLT NarrowTy) {
3493 Register DstReg = MI.getOperand(0).getReg();
3494 LLT DstTy = MRI.getType(DstReg);
3496 assert(MI.getNumOperands() == 3 && TypeIdx == 0);
3498 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
3499 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
3500 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
3501 LLT LeftoverTy;
3502 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
3503 Src0Regs, Src0LeftoverRegs))
3504 return UnableToLegalize;
3506 LLT Unused;
3507 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
3508 Src1Regs, Src1LeftoverRegs))
3509 llvm_unreachable("inconsistent extractParts result");
3511 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
3512 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
3513 {Src0Regs[I], Src1Regs[I]});
3514 DstRegs.push_back(Inst->getOperand(0).getReg());
3517 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
3518 auto Inst = MIRBuilder.buildInstr(
3519 MI.getOpcode(),
3520 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
3521 DstLeftoverRegs.push_back(Inst->getOperand(0).getReg());
3524 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
3525 LeftoverTy, DstLeftoverRegs);
3527 MI.eraseFromParent();
3528 return Legalized;
3531 LegalizerHelper::LegalizeResult
3532 LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
3533 LLT NarrowTy) {
3534 if (TypeIdx != 0)
3535 return UnableToLegalize;
3537 Register CondReg = MI.getOperand(1).getReg();
3538 LLT CondTy = MRI.getType(CondReg);
3539 if (CondTy.isVector()) // TODO: Handle vselect
3540 return UnableToLegalize;
3542 Register DstReg = MI.getOperand(0).getReg();
3543 LLT DstTy = MRI.getType(DstReg);
3545 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
3546 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
3547 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
3548 LLT LeftoverTy;
3549 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
3550 Src1Regs, Src1LeftoverRegs))
3551 return UnableToLegalize;
3553 LLT Unused;
3554 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
3555 Src2Regs, Src2LeftoverRegs))
3556 llvm_unreachable("inconsistent extractParts result");
3558 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
3559 auto Select = MIRBuilder.buildSelect(NarrowTy,
3560 CondReg, Src1Regs[I], Src2Regs[I]);
3561 DstRegs.push_back(Select->getOperand(0).getReg());
3564 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
3565 auto Select = MIRBuilder.buildSelect(
3566 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
3567 DstLeftoverRegs.push_back(Select->getOperand(0).getReg());
3570 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
3571 LeftoverTy, DstLeftoverRegs);
3573 MI.eraseFromParent();
3574 return Legalized;
3577 LegalizerHelper::LegalizeResult
3578 LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3579 unsigned Opc = MI.getOpcode();
3580 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
3581 auto isSupported = [this](const LegalityQuery &Q) {
3582 auto QAction = LI.getAction(Q).Action;
3583 return QAction == Legal || QAction == Libcall || QAction == Custom;
3585 switch (Opc) {
3586 default:
3587 return UnableToLegalize;
3588 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
3589 // This trivially expands to CTLZ.
3590 Observer.changingInstr(MI);
3591 MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
3592 Observer.changedInstr(MI);
3593 return Legalized;
3595 case TargetOpcode::G_CTLZ: {
3596 Register SrcReg = MI.getOperand(1).getReg();
3597 unsigned Len = Ty.getSizeInBits();
3598 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty, Ty}})) {
3599 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
3600 auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF,
3601 {Ty}, {SrcReg});
3602 auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
3603 auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
3604 auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
3605 SrcReg, MIBZero);
3606 MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
3607 MIBCtlzZU);
3608 MI.eraseFromParent();
3609 return Legalized;
3611 // for now, we do this:
3612 // NewLen = NextPowerOf2(Len);
3613 // x = x | (x >> 1);
3614 // x = x | (x >> 2);
3615 // ...
3616 // x = x | (x >>16);
3617 // x = x | (x >>32); // for 64-bit input
3618 // Upto NewLen/2
3619 // return Len - popcount(x);
3621 // Ref: "Hacker's Delight" by Henry Warren
3622 Register Op = SrcReg;
3623 unsigned NewLen = PowerOf2Ceil(Len);
3624 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
3625 auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i);
3626 auto MIBOp = MIRBuilder.buildInstr(
3627 TargetOpcode::G_OR, {Ty},
3628 {Op, MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {Ty},
3629 {Op, MIBShiftAmt})});
3630 Op = MIBOp->getOperand(0).getReg();
3632 auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, {Ty}, {Op});
3633 MIRBuilder.buildInstr(TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
3634 {MIRBuilder.buildConstant(Ty, Len), MIBPop});
3635 MI.eraseFromParent();
3636 return Legalized;
3638 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
3639 // This trivially expands to CTTZ.
3640 Observer.changingInstr(MI);
3641 MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
3642 Observer.changedInstr(MI);
3643 return Legalized;
3645 case TargetOpcode::G_CTTZ: {
3646 Register SrcReg = MI.getOperand(1).getReg();
3647 unsigned Len = Ty.getSizeInBits();
3648 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty, Ty}})) {
3649 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
3650 // zero.
3651 auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF,
3652 {Ty}, {SrcReg});
3653 auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
3654 auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
3655 auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
3656 SrcReg, MIBZero);
3657 MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
3658 MIBCttzZU);
3659 MI.eraseFromParent();
3660 return Legalized;
3662 // for now, we use: { return popcount(~x & (x - 1)); }
3663 // unless the target has ctlz but not ctpop, in which case we use:
3664 // { return 32 - nlz(~x & (x-1)); }
3665 // Ref: "Hacker's Delight" by Henry Warren
3666 auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1);
3667 auto MIBNot =
3668 MIRBuilder.buildInstr(TargetOpcode::G_XOR, {Ty}, {SrcReg, MIBCstNeg1});
3669 auto MIBTmp = MIRBuilder.buildInstr(
3670 TargetOpcode::G_AND, {Ty},
3671 {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty},
3672 {SrcReg, MIBCstNeg1})});
3673 if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) &&
3674 isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})) {
3675 auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len);
3676 MIRBuilder.buildInstr(
3677 TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
3678 {MIBCstLen,
3679 MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, {Ty}, {MIBTmp})});
3680 MI.eraseFromParent();
3681 return Legalized;
3683 MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
3684 MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg());
3685 return Legalized;
3690 // Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
3691 // representation.
3692 LegalizerHelper::LegalizeResult
3693 LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
3694 Register Dst = MI.getOperand(0).getReg();
3695 Register Src = MI.getOperand(1).getReg();
3696 const LLT S64 = LLT::scalar(64);
3697 const LLT S32 = LLT::scalar(32);
3698 const LLT S1 = LLT::scalar(1);
3700 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
3702 // unsigned cul2f(ulong u) {
3703 // uint lz = clz(u);
3704 // uint e = (u != 0) ? 127U + 63U - lz : 0;
3705 // u = (u << lz) & 0x7fffffffffffffffUL;
3706 // ulong t = u & 0xffffffffffUL;
3707 // uint v = (e << 23) | (uint)(u >> 40);
3708 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
3709 // return as_float(v + r);
3710 // }
3712 auto Zero32 = MIRBuilder.buildConstant(S32, 0);
3713 auto Zero64 = MIRBuilder.buildConstant(S64, 0);
3715 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
3717 auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
3718 auto Sub = MIRBuilder.buildSub(S32, K, LZ);
3720 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
3721 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
3723 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
3724 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
3726 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
3728 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
3729 auto T = MIRBuilder.buildAnd(S64, U, Mask1);
3731 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
3732 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
3733 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
3735 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
3736 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
3737 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
3738 auto One = MIRBuilder.buildConstant(S32, 1);
3740 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
3741 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
3742 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
3743 MIRBuilder.buildAdd(Dst, V, R);
3745 return Legalized;
3748 LegalizerHelper::LegalizeResult
3749 LegalizerHelper::lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3750 Register Dst = MI.getOperand(0).getReg();
3751 Register Src = MI.getOperand(1).getReg();
3752 LLT DstTy = MRI.getType(Dst);
3753 LLT SrcTy = MRI.getType(Src);
3755 if (SrcTy != LLT::scalar(64))
3756 return UnableToLegalize;
3758 if (DstTy == LLT::scalar(32)) {
3759 // TODO: SelectionDAG has several alternative expansions to port which may
3760 // be more reasonble depending on the available instructions. If a target
3761 // has sitofp, does not have CTLZ, or can efficiently use f64 as an
3762 // intermediate type, this is probably worse.
3763 return lowerU64ToF32BitOps(MI);
3766 return UnableToLegalize;
3769 LegalizerHelper::LegalizeResult
3770 LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3771 Register Dst = MI.getOperand(0).getReg();
3772 Register Src = MI.getOperand(1).getReg();
3773 LLT DstTy = MRI.getType(Dst);
3774 LLT SrcTy = MRI.getType(Src);
3776 const LLT S64 = LLT::scalar(64);
3777 const LLT S32 = LLT::scalar(32);
3778 const LLT S1 = LLT::scalar(1);
3780 if (SrcTy != S64)
3781 return UnableToLegalize;
3783 if (DstTy == S32) {
3784 // signed cl2f(long l) {
3785 // long s = l >> 63;
3786 // float r = cul2f((l + s) ^ s);
3787 // return s ? -r : r;
3788 // }
3789 Register L = Src;
3790 auto SignBit = MIRBuilder.buildConstant(S64, 63);
3791 auto S = MIRBuilder.buildAShr(S64, L, SignBit);
3793 auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
3794 auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
3795 auto R = MIRBuilder.buildUITOFP(S32, Xor);
3797 auto RNeg = MIRBuilder.buildFNeg(S32, R);
3798 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
3799 MIRBuilder.buildConstant(S64, 0));
3800 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
3801 return Legalized;
3804 return UnableToLegalize;
3807 LegalizerHelper::LegalizeResult
3808 LegalizerHelper::lowerFPTOUI(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3809 Register Dst = MI.getOperand(0).getReg();
3810 Register Src = MI.getOperand(1).getReg();
3811 LLT DstTy = MRI.getType(Dst);
3812 LLT SrcTy = MRI.getType(Src);
3813 const LLT S64 = LLT::scalar(64);
3814 const LLT S32 = LLT::scalar(32);
3816 if (SrcTy != S64 && SrcTy != S32)
3817 return UnableToLegalize;
3818 if (DstTy != S32 && DstTy != S64)
3819 return UnableToLegalize;
3821 // FPTOSI gives same result as FPTOUI for positive signed integers.
3822 // FPTOUI needs to deal with fp values that convert to unsigned integers
3823 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
3825 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
3826 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
3827 : APFloat::IEEEdouble(),
3828 APInt::getNullValue(SrcTy.getSizeInBits()));
3829 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
3831 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
3833 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
3834 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
3835 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
3836 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
3837 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
3838 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
3839 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
3841 MachineInstrBuilder FCMP =
3842 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, DstTy, Src, Threshold);
3843 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
3845 MI.eraseFromParent();
3846 return Legalized;
3849 static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
3850 switch (Opc) {
3851 case TargetOpcode::G_SMIN:
3852 return CmpInst::ICMP_SLT;
3853 case TargetOpcode::G_SMAX:
3854 return CmpInst::ICMP_SGT;
3855 case TargetOpcode::G_UMIN:
3856 return CmpInst::ICMP_ULT;
3857 case TargetOpcode::G_UMAX:
3858 return CmpInst::ICMP_UGT;
3859 default:
3860 llvm_unreachable("not in integer min/max");
3864 LegalizerHelper::LegalizeResult
3865 LegalizerHelper::lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3866 Register Dst = MI.getOperand(0).getReg();
3867 Register Src0 = MI.getOperand(1).getReg();
3868 Register Src1 = MI.getOperand(2).getReg();
3870 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
3871 LLT CmpType = MRI.getType(Dst).changeElementSize(1);
3873 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
3874 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
3876 MI.eraseFromParent();
3877 return Legalized;
3880 LegalizerHelper::LegalizeResult
3881 LegalizerHelper::lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3882 Register Dst = MI.getOperand(0).getReg();
3883 Register Src0 = MI.getOperand(1).getReg();
3884 Register Src1 = MI.getOperand(2).getReg();
3886 const LLT Src0Ty = MRI.getType(Src0);
3887 const LLT Src1Ty = MRI.getType(Src1);
3889 const int Src0Size = Src0Ty.getScalarSizeInBits();
3890 const int Src1Size = Src1Ty.getScalarSizeInBits();
3892 auto SignBitMask = MIRBuilder.buildConstant(
3893 Src0Ty, APInt::getSignMask(Src0Size));
3895 auto NotSignBitMask = MIRBuilder.buildConstant(
3896 Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
3898 auto And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask);
3899 MachineInstr *Or;
3901 if (Src0Ty == Src1Ty) {
3902 auto And1 = MIRBuilder.buildAnd(Src1Ty, Src0, SignBitMask);
3903 Or = MIRBuilder.buildOr(Dst, And0, And1);
3904 } else if (Src0Size > Src1Size) {
3905 auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
3906 auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
3907 auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
3908 auto And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask);
3909 Or = MIRBuilder.buildOr(Dst, And0, And1);
3910 } else {
3911 auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
3912 auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
3913 auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
3914 auto And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask);
3915 Or = MIRBuilder.buildOr(Dst, And0, And1);
3918 // Be careful about setting nsz/nnan/ninf on every instruction, since the
3919 // constants are a nan and -0.0, but the final result should preserve
3920 // everything.
3921 if (unsigned Flags = MI.getFlags())
3922 Or->setFlags(Flags);
3924 MI.eraseFromParent();
3925 return Legalized;
3928 LegalizerHelper::LegalizeResult
3929 LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
3930 unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
3931 TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
3933 Register Dst = MI.getOperand(0).getReg();
3934 Register Src0 = MI.getOperand(1).getReg();
3935 Register Src1 = MI.getOperand(2).getReg();
3936 LLT Ty = MRI.getType(Dst);
3938 if (!MI.getFlag(MachineInstr::FmNoNans)) {
3939 // Insert canonicalizes if it's possible we need to quiet to get correct
3940 // sNaN behavior.
3942 // Note this must be done here, and not as an optimization combine in the
3943 // absence of a dedicate quiet-snan instruction as we're using an
3944 // omni-purpose G_FCANONICALIZE.
3945 if (!isKnownNeverSNaN(Src0, MRI))
3946 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
3948 if (!isKnownNeverSNaN(Src1, MRI))
3949 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
3952 // If there are no nans, it's safe to simply replace this with the non-IEEE
3953 // version.
3954 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
3955 MI.eraseFromParent();
3956 return Legalized;
3959 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
3960 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
3961 Register DstReg = MI.getOperand(0).getReg();
3962 LLT Ty = MRI.getType(DstReg);
3963 unsigned Flags = MI.getFlags();
3965 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
3966 Flags);
3967 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
3968 MI.eraseFromParent();
3969 return Legalized;
3972 LegalizerHelper::LegalizeResult
3973 LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
3974 const unsigned NumDst = MI.getNumOperands() - 1;
3975 const Register SrcReg = MI.getOperand(NumDst).getReg();
3976 LLT SrcTy = MRI.getType(SrcReg);
3978 Register Dst0Reg = MI.getOperand(0).getReg();
3979 LLT DstTy = MRI.getType(Dst0Reg);
3982 // Expand scalarizing unmerge as bitcast to integer and shift.
3983 if (!DstTy.isVector() && SrcTy.isVector() &&
3984 SrcTy.getElementType() == DstTy) {
3985 LLT IntTy = LLT::scalar(SrcTy.getSizeInBits());
3986 Register Cast = MIRBuilder.buildBitcast(IntTy, SrcReg).getReg(0);
3988 MIRBuilder.buildTrunc(Dst0Reg, Cast);
3990 const unsigned DstSize = DstTy.getSizeInBits();
3991 unsigned Offset = DstSize;
3992 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
3993 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
3994 auto Shift = MIRBuilder.buildLShr(IntTy, Cast, ShiftAmt);
3995 MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
3998 MI.eraseFromParent();
3999 return Legalized;
4002 return UnableToLegalize;
4005 LegalizerHelper::LegalizeResult
4006 LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
4007 Register DstReg = MI.getOperand(0).getReg();
4008 Register Src0Reg = MI.getOperand(1).getReg();
4009 Register Src1Reg = MI.getOperand(2).getReg();
4010 LLT Src0Ty = MRI.getType(Src0Reg);
4011 LLT DstTy = MRI.getType(DstReg);
4012 LLT IdxTy = LLT::scalar(32);
4014 const Constant *ShufMask = MI.getOperand(3).getShuffleMask();
4016 SmallVector<int, 32> Mask;
4017 ShuffleVectorInst::getShuffleMask(ShufMask, Mask);
4019 if (DstTy.isScalar()) {
4020 if (Src0Ty.isVector())
4021 return UnableToLegalize;
4023 // This is just a SELECT.
4024 assert(Mask.size() == 1 && "Expected a single mask element");
4025 Register Val;
4026 if (Mask[0] < 0 || Mask[0] > 1)
4027 Val = MIRBuilder.buildUndef(DstTy).getReg(0);
4028 else
4029 Val = Mask[0] == 0 ? Src0Reg : Src1Reg;
4030 MIRBuilder.buildCopy(DstReg, Val);
4031 MI.eraseFromParent();
4032 return Legalized;
4035 Register Undef;
4036 SmallVector<Register, 32> BuildVec;
4037 LLT EltTy = DstTy.getElementType();
4039 for (int Idx : Mask) {
4040 if (Idx < 0) {
4041 if (!Undef.isValid())
4042 Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
4043 BuildVec.push_back(Undef);
4044 continue;
4047 if (Src0Ty.isScalar()) {
4048 BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
4049 } else {
4050 int NumElts = Src0Ty.getNumElements();
4051 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
4052 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
4053 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
4054 auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
4055 BuildVec.push_back(Extract.getReg(0));
4059 MIRBuilder.buildBuildVector(DstReg, BuildVec);
4060 MI.eraseFromParent();
4061 return Legalized;
4064 LegalizerHelper::LegalizeResult
4065 LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
4066 Register Dst = MI.getOperand(0).getReg();
4067 Register AllocSize = MI.getOperand(1).getReg();
4068 unsigned Align = MI.getOperand(2).getImm();
4070 const auto &MF = *MI.getMF();
4071 const auto &TLI = *MF.getSubtarget().getTargetLowering();
4073 LLT PtrTy = MRI.getType(Dst);
4074 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
4076 Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
4077 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
4078 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
4080 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
4081 // have to generate an extra instruction to negate the alloc and then use
4082 // G_GEP to add the negative offset.
4083 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
4084 if (Align) {
4085 APInt AlignMask(IntPtrTy.getSizeInBits(), Align, true);
4086 AlignMask.negate();
4087 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
4088 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
4091 SPTmp = MIRBuilder.buildCast(PtrTy, Alloc);
4092 MIRBuilder.buildCopy(SPReg, SPTmp);
4093 MIRBuilder.buildCopy(Dst, SPTmp);
4095 MI.eraseFromParent();
4096 return Legalized;