1 //===-- lib/CodeGen/GlobalISel/CallLowering.cpp - Call lowering -----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file implements some simple delegations needed for call lowering.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
15 #include "llvm/CodeGen/Analysis.h"
16 #include "llvm/CodeGen/CallingConvLower.h"
17 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
18 #include "llvm/CodeGen/GlobalISel/Utils.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineOperand.h"
21 #include "llvm/CodeGen/MachineRegisterInfo.h"
22 #include "llvm/CodeGen/TargetLowering.h"
23 #include "llvm/IR/DataLayout.h"
24 #include "llvm/IR/LLVMContext.h"
25 #include "llvm/IR/Module.h"
26 #include "llvm/Target/TargetMachine.h"
28 #define DEBUG_TYPE "call-lowering"
32 void CallLowering::anchor() {}
34 /// Helper function which updates \p Flags when \p AttrFn returns true.
36 addFlagsUsingAttrFn(ISD::ArgFlagsTy
&Flags
,
37 const std::function
<bool(Attribute::AttrKind
)> &AttrFn
) {
38 // TODO: There are missing flags. Add them here.
39 if (AttrFn(Attribute::SExt
))
41 if (AttrFn(Attribute::ZExt
))
43 if (AttrFn(Attribute::InReg
))
45 if (AttrFn(Attribute::StructRet
))
47 if (AttrFn(Attribute::Nest
))
49 if (AttrFn(Attribute::ByVal
))
51 if (AttrFn(Attribute::ByRef
))
53 if (AttrFn(Attribute::Preallocated
))
54 Flags
.setPreallocated();
55 if (AttrFn(Attribute::InAlloca
))
57 if (AttrFn(Attribute::Returned
))
59 if (AttrFn(Attribute::SwiftSelf
))
61 if (AttrFn(Attribute::SwiftAsync
))
62 Flags
.setSwiftAsync();
63 if (AttrFn(Attribute::SwiftError
))
64 Flags
.setSwiftError();
67 ISD::ArgFlagsTy
CallLowering::getAttributesForArgIdx(const CallBase
&Call
,
68 unsigned ArgIdx
) const {
69 ISD::ArgFlagsTy Flags
;
70 addFlagsUsingAttrFn(Flags
, [&Call
, &ArgIdx
](Attribute::AttrKind Attr
) {
71 return Call
.paramHasAttr(ArgIdx
, Attr
);
77 CallLowering::getAttributesForReturn(const CallBase
&Call
) const {
78 ISD::ArgFlagsTy Flags
;
79 addFlagsUsingAttrFn(Flags
, [&Call
](Attribute::AttrKind Attr
) {
80 return Call
.hasRetAttr(Attr
);
85 void CallLowering::addArgFlagsFromAttributes(ISD::ArgFlagsTy
&Flags
,
86 const AttributeList
&Attrs
,
87 unsigned OpIdx
) const {
88 addFlagsUsingAttrFn(Flags
, [&Attrs
, &OpIdx
](Attribute::AttrKind Attr
) {
89 return Attrs
.hasAttributeAtIndex(OpIdx
, Attr
);
93 bool CallLowering::lowerCall(MachineIRBuilder
&MIRBuilder
, const CallBase
&CB
,
94 ArrayRef
<Register
> ResRegs
,
95 ArrayRef
<ArrayRef
<Register
>> ArgRegs
,
96 Register SwiftErrorVReg
,
97 std::optional
<PtrAuthInfo
> PAI
,
98 Register ConvergenceCtrlToken
,
99 std::function
<unsigned()> GetCalleeReg
) const {
100 CallLoweringInfo Info
;
101 const DataLayout
&DL
= MIRBuilder
.getDataLayout();
102 MachineFunction
&MF
= MIRBuilder
.getMF();
103 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
104 bool CanBeTailCalled
= CB
.isTailCall() &&
105 isInTailCallPosition(CB
, MF
.getTarget()) &&
107 .getFnAttribute("disable-tail-calls")
108 .getValueAsString() != "true");
110 CallingConv::ID CallConv
= CB
.getCallingConv();
111 Type
*RetTy
= CB
.getType();
112 bool IsVarArg
= CB
.getFunctionType()->isVarArg();
114 SmallVector
<BaseArgInfo
, 4> SplitArgs
;
115 getReturnInfo(CallConv
, RetTy
, CB
.getAttributes(), SplitArgs
, DL
);
116 Info
.CanLowerReturn
= canLowerReturn(MF
, CallConv
, SplitArgs
, IsVarArg
);
118 Info
.IsConvergent
= CB
.isConvergent();
120 if (!Info
.CanLowerReturn
) {
121 // Callee requires sret demotion.
122 insertSRetOutgoingArgument(MIRBuilder
, CB
, Info
);
124 // The sret demotion isn't compatible with tail-calls, since the sret
125 // argument points into the caller's stack frame.
126 CanBeTailCalled
= false;
129 // First step is to marshall all the function's parameters into the correct
130 // physregs and memory locations. Gather the sequence of argument types that
131 // we'll pass to the assigner function.
133 unsigned NumFixedArgs
= CB
.getFunctionType()->getNumParams();
134 for (const auto &Arg
: CB
.args()) {
135 ArgInfo OrigArg
{ArgRegs
[i
], *Arg
.get(), i
, getAttributesForArgIdx(CB
, i
),
137 setArgFlags(OrigArg
, i
+ AttributeList::FirstArgIndex
, DL
, CB
);
139 // If we have an explicit sret argument that is an Instruction, (i.e., it
140 // might point to function-local memory), we can't meaningfully tail-call.
141 if (OrigArg
.Flags
[0].isSRet() && isa
<Instruction
>(&Arg
))
142 CanBeTailCalled
= false;
144 Info
.OrigArgs
.push_back(OrigArg
);
148 // Try looking through a bitcast from one function type to another.
149 // Commonly happens with calls to objc_msgSend().
150 const Value
*CalleeV
= CB
.getCalledOperand()->stripPointerCasts();
152 // If IRTranslator chose to drop the ptrauth info, we can turn this into
154 if (!PAI
&& CB
.countOperandBundlesOfType(LLVMContext::OB_ptrauth
)) {
155 CalleeV
= cast
<ConstantPtrAuth
>(CalleeV
)->getPointer();
156 assert(isa
<Function
>(CalleeV
));
159 if (const Function
*F
= dyn_cast
<Function
>(CalleeV
)) {
160 if (F
->hasFnAttribute(Attribute::NonLazyBind
)) {
161 LLT Ty
= getLLTForType(*F
->getType(), DL
);
162 Register Reg
= MIRBuilder
.buildGlobalValue(Ty
, F
).getReg(0);
163 Info
.Callee
= MachineOperand::CreateReg(Reg
, false);
165 Info
.Callee
= MachineOperand::CreateGA(F
, 0);
167 } else if (isa
<GlobalIFunc
>(CalleeV
) || isa
<GlobalAlias
>(CalleeV
)) {
168 // IR IFuncs and Aliases can't be forward declared (only defined), so the
169 // callee must be in the same TU and therefore we can direct-call it without
170 // worrying about it being out of range.
171 Info
.Callee
= MachineOperand::CreateGA(cast
<GlobalValue
>(CalleeV
), 0);
173 Info
.Callee
= MachineOperand::CreateReg(GetCalleeReg(), false);
175 Register ReturnHintAlignReg
;
176 Align ReturnHintAlign
;
178 Info
.OrigRet
= ArgInfo
{ResRegs
, RetTy
, 0, getAttributesForReturn(CB
)};
180 if (!Info
.OrigRet
.Ty
->isVoidTy()) {
181 setArgFlags(Info
.OrigRet
, AttributeList::ReturnIndex
, DL
, CB
);
183 if (MaybeAlign Alignment
= CB
.getRetAlign()) {
184 if (*Alignment
> Align(1)) {
185 ReturnHintAlignReg
= MRI
.cloneVirtualRegister(ResRegs
[0]);
186 Info
.OrigRet
.Regs
[0] = ReturnHintAlignReg
;
187 ReturnHintAlign
= *Alignment
;
192 auto Bundle
= CB
.getOperandBundle(LLVMContext::OB_kcfi
);
193 if (Bundle
&& CB
.isIndirectCall()) {
194 Info
.CFIType
= cast
<ConstantInt
>(Bundle
->Inputs
[0]);
195 assert(Info
.CFIType
->getType()->isIntegerTy(32) && "Invalid CFI type");
199 Info
.KnownCallees
= CB
.getMetadata(LLVMContext::MD_callees
);
200 Info
.CallConv
= CallConv
;
201 Info
.SwiftErrorVReg
= SwiftErrorVReg
;
203 Info
.ConvergenceCtrlToken
= ConvergenceCtrlToken
;
204 Info
.IsMustTailCall
= CB
.isMustTailCall();
205 Info
.IsTailCall
= CanBeTailCalled
;
206 Info
.IsVarArg
= IsVarArg
;
207 if (!lowerCall(MIRBuilder
, Info
))
210 if (ReturnHintAlignReg
&& !Info
.LoweredTailCall
) {
211 MIRBuilder
.buildAssertAlign(ResRegs
[0], ReturnHintAlignReg
,
218 template <typename FuncInfoTy
>
219 void CallLowering::setArgFlags(CallLowering::ArgInfo
&Arg
, unsigned OpIdx
,
220 const DataLayout
&DL
,
221 const FuncInfoTy
&FuncInfo
) const {
222 auto &Flags
= Arg
.Flags
[0];
223 const AttributeList
&Attrs
= FuncInfo
.getAttributes();
224 addArgFlagsFromAttributes(Flags
, Attrs
, OpIdx
);
226 PointerType
*PtrTy
= dyn_cast
<PointerType
>(Arg
.Ty
->getScalarType());
229 Flags
.setPointerAddrSpace(PtrTy
->getPointerAddressSpace());
232 Align MemAlign
= DL
.getABITypeAlign(Arg
.Ty
);
233 if (Flags
.isByVal() || Flags
.isInAlloca() || Flags
.isPreallocated() ||
235 assert(OpIdx
>= AttributeList::FirstArgIndex
);
236 unsigned ParamIdx
= OpIdx
- AttributeList::FirstArgIndex
;
238 Type
*ElementTy
= FuncInfo
.getParamByValType(ParamIdx
);
240 ElementTy
= FuncInfo
.getParamByRefType(ParamIdx
);
242 ElementTy
= FuncInfo
.getParamInAllocaType(ParamIdx
);
244 ElementTy
= FuncInfo
.getParamPreallocatedType(ParamIdx
);
246 assert(ElementTy
&& "Must have byval, inalloca or preallocated type");
248 uint64_t MemSize
= DL
.getTypeAllocSize(ElementTy
);
250 Flags
.setByRefSize(MemSize
);
252 Flags
.setByValSize(MemSize
);
254 // For ByVal, alignment should be passed from FE. BE will guess if
255 // this info is not there but there are cases it cannot get right.
256 if (auto ParamAlign
= FuncInfo
.getParamStackAlign(ParamIdx
))
257 MemAlign
= *ParamAlign
;
258 else if ((ParamAlign
= FuncInfo
.getParamAlign(ParamIdx
)))
259 MemAlign
= *ParamAlign
;
261 MemAlign
= getTLI()->getByValTypeAlignment(ElementTy
, DL
);
262 } else if (OpIdx
>= AttributeList::FirstArgIndex
) {
263 if (auto ParamAlign
=
264 FuncInfo
.getParamStackAlign(OpIdx
- AttributeList::FirstArgIndex
))
265 MemAlign
= *ParamAlign
;
267 Flags
.setMemAlign(MemAlign
);
268 Flags
.setOrigAlign(DL
.getABITypeAlign(Arg
.Ty
));
270 // Don't try to use the returned attribute if the argument is marked as
271 // swiftself, since it won't be passed in x0.
272 if (Flags
.isSwiftSelf())
273 Flags
.setReturned(false);
277 CallLowering::setArgFlags
<Function
>(CallLowering::ArgInfo
&Arg
, unsigned OpIdx
,
278 const DataLayout
&DL
,
279 const Function
&FuncInfo
) const;
282 CallLowering::setArgFlags
<CallBase
>(CallLowering::ArgInfo
&Arg
, unsigned OpIdx
,
283 const DataLayout
&DL
,
284 const CallBase
&FuncInfo
) const;
286 void CallLowering::splitToValueTypes(const ArgInfo
&OrigArg
,
287 SmallVectorImpl
<ArgInfo
> &SplitArgs
,
288 const DataLayout
&DL
,
289 CallingConv::ID CallConv
,
290 SmallVectorImpl
<uint64_t> *Offsets
) const {
291 LLVMContext
&Ctx
= OrigArg
.Ty
->getContext();
293 SmallVector
<EVT
, 4> SplitVTs
;
294 ComputeValueVTs(*TLI
, DL
, OrigArg
.Ty
, SplitVTs
, Offsets
, 0);
296 if (SplitVTs
.size() == 0)
299 if (SplitVTs
.size() == 1) {
300 // No splitting to do, but we want to replace the original type (e.g. [1 x
301 // double] -> double).
302 SplitArgs
.emplace_back(OrigArg
.Regs
[0], SplitVTs
[0].getTypeForEVT(Ctx
),
303 OrigArg
.OrigArgIndex
, OrigArg
.Flags
[0],
304 OrigArg
.IsFixed
, OrigArg
.OrigValue
);
308 // Create one ArgInfo for each virtual register in the original ArgInfo.
309 assert(OrigArg
.Regs
.size() == SplitVTs
.size() && "Regs / types mismatch");
311 bool NeedsRegBlock
= TLI
->functionArgumentNeedsConsecutiveRegisters(
312 OrigArg
.Ty
, CallConv
, false, DL
);
313 for (unsigned i
= 0, e
= SplitVTs
.size(); i
< e
; ++i
) {
314 Type
*SplitTy
= SplitVTs
[i
].getTypeForEVT(Ctx
);
315 SplitArgs
.emplace_back(OrigArg
.Regs
[i
], SplitTy
, OrigArg
.OrigArgIndex
,
316 OrigArg
.Flags
[0], OrigArg
.IsFixed
);
318 SplitArgs
.back().Flags
[0].setInConsecutiveRegs();
321 SplitArgs
.back().Flags
[0].setInConsecutiveRegsLast();
324 /// Pack values \p SrcRegs to cover the vector type result \p DstRegs.
325 static MachineInstrBuilder
326 mergeVectorRegsToResultRegs(MachineIRBuilder
&B
, ArrayRef
<Register
> DstRegs
,
327 ArrayRef
<Register
> SrcRegs
) {
328 MachineRegisterInfo
&MRI
= *B
.getMRI();
329 LLT LLTy
= MRI
.getType(DstRegs
[0]);
330 LLT PartLLT
= MRI
.getType(SrcRegs
[0]);
332 // Deal with v3s16 split into v2s16
333 LLT LCMTy
= getCoverTy(LLTy
, PartLLT
);
335 // Common case where no padding is needed.
336 assert(DstRegs
.size() == 1);
337 return B
.buildConcatVectors(DstRegs
[0], SrcRegs
);
340 // We need to create an unmerge to the result registers, which may require
341 // widening the original value.
342 Register UnmergeSrcReg
;
343 if (LCMTy
!= PartLLT
) {
344 assert(DstRegs
.size() == 1);
345 return B
.buildDeleteTrailingVectorElements(
346 DstRegs
[0], B
.buildMergeLikeInstr(LCMTy
, SrcRegs
));
348 // We don't need to widen anything if we're extracting a scalar which was
349 // promoted to a vector e.g. s8 -> v4s8 -> s8
350 assert(SrcRegs
.size() == 1);
351 UnmergeSrcReg
= SrcRegs
[0];
354 int NumDst
= LCMTy
.getSizeInBits() / LLTy
.getSizeInBits();
356 SmallVector
<Register
, 8> PadDstRegs(NumDst
);
357 std::copy(DstRegs
.begin(), DstRegs
.end(), PadDstRegs
.begin());
359 // Create the excess dead defs for the unmerge.
360 for (int I
= DstRegs
.size(); I
!= NumDst
; ++I
)
361 PadDstRegs
[I
] = MRI
.createGenericVirtualRegister(LLTy
);
363 if (PadDstRegs
.size() == 1)
364 return B
.buildDeleteTrailingVectorElements(DstRegs
[0], UnmergeSrcReg
);
365 return B
.buildUnmerge(PadDstRegs
, UnmergeSrcReg
);
368 /// Create a sequence of instructions to combine pieces split into register
369 /// typed values to the original IR value. \p OrigRegs contains the destination
370 /// value registers of type \p LLTy, and \p Regs contains the legalized pieces
371 /// with type \p PartLLT. This is used for incoming values (physregs to vregs).
372 static void buildCopyFromRegs(MachineIRBuilder
&B
, ArrayRef
<Register
> OrigRegs
,
373 ArrayRef
<Register
> Regs
, LLT LLTy
, LLT PartLLT
,
374 const ISD::ArgFlagsTy Flags
) {
375 MachineRegisterInfo
&MRI
= *B
.getMRI();
377 if (PartLLT
== LLTy
) {
378 // We should have avoided introducing a new virtual register, and just
379 // directly assigned here.
380 assert(OrigRegs
[0] == Regs
[0]);
384 if (PartLLT
.getSizeInBits() == LLTy
.getSizeInBits() && OrigRegs
.size() == 1 &&
386 B
.buildBitcast(OrigRegs
[0], Regs
[0]);
390 // A vector PartLLT needs extending to LLTy's element size.
391 // E.g. <2 x s64> = G_SEXT <2 x s32>.
392 if (PartLLT
.isVector() == LLTy
.isVector() &&
393 PartLLT
.getScalarSizeInBits() > LLTy
.getScalarSizeInBits() &&
394 (!PartLLT
.isVector() ||
395 PartLLT
.getElementCount() == LLTy
.getElementCount()) &&
396 OrigRegs
.size() == 1 && Regs
.size() == 1) {
397 Register SrcReg
= Regs
[0];
399 LLT LocTy
= MRI
.getType(SrcReg
);
401 if (Flags
.isSExt()) {
402 SrcReg
= B
.buildAssertSExt(LocTy
, SrcReg
, LLTy
.getScalarSizeInBits())
404 } else if (Flags
.isZExt()) {
405 SrcReg
= B
.buildAssertZExt(LocTy
, SrcReg
, LLTy
.getScalarSizeInBits())
409 // Sometimes pointers are passed zero extended.
410 LLT OrigTy
= MRI
.getType(OrigRegs
[0]);
411 if (OrigTy
.isPointer()) {
412 LLT IntPtrTy
= LLT::scalar(OrigTy
.getSizeInBits());
413 B
.buildIntToPtr(OrigRegs
[0], B
.buildTrunc(IntPtrTy
, SrcReg
));
417 B
.buildTrunc(OrigRegs
[0], SrcReg
);
421 if (!LLTy
.isVector() && !PartLLT
.isVector()) {
422 assert(OrigRegs
.size() == 1);
423 LLT OrigTy
= MRI
.getType(OrigRegs
[0]);
425 unsigned SrcSize
= PartLLT
.getSizeInBits().getFixedValue() * Regs
.size();
426 if (SrcSize
== OrigTy
.getSizeInBits())
427 B
.buildMergeValues(OrigRegs
[0], Regs
);
429 auto Widened
= B
.buildMergeLikeInstr(LLT::scalar(SrcSize
), Regs
);
430 B
.buildTrunc(OrigRegs
[0], Widened
);
436 if (PartLLT
.isVector()) {
437 assert(OrigRegs
.size() == 1);
438 SmallVector
<Register
> CastRegs(Regs
);
440 // If PartLLT is a mismatched vector in both number of elements and element
441 // size, e.g. PartLLT == v2s64 and LLTy is v3s32, then first coerce it to
442 // have the same elt type, i.e. v4s32.
443 // TODO: Extend this coersion to element multiples other than just 2.
444 if (TypeSize::isKnownGT(PartLLT
.getSizeInBits(), LLTy
.getSizeInBits()) &&
445 PartLLT
.getScalarSizeInBits() == LLTy
.getScalarSizeInBits() * 2 &&
447 LLT NewTy
= PartLLT
.changeElementType(LLTy
.getElementType())
448 .changeElementCount(PartLLT
.getElementCount() * 2);
449 CastRegs
[0] = B
.buildBitcast(NewTy
, Regs
[0]).getReg(0);
453 if (LLTy
.getScalarType() == PartLLT
.getElementType()) {
454 mergeVectorRegsToResultRegs(B
, OrigRegs
, CastRegs
);
457 LLT GCDTy
= getGCDType(LLTy
, PartLLT
);
459 // We are both splitting a vector, and bitcasting its element types. Cast
460 // the source pieces into the appropriate number of pieces with the result
462 for (Register SrcReg
: CastRegs
)
463 CastRegs
[I
++] = B
.buildBitcast(GCDTy
, SrcReg
).getReg(0);
464 mergeVectorRegsToResultRegs(B
, OrigRegs
, CastRegs
);
470 assert(LLTy
.isVector() && !PartLLT
.isVector());
472 LLT DstEltTy
= LLTy
.getElementType();
474 // Pointer information was discarded. We'll need to coerce some register types
475 // to avoid violating type constraints.
476 LLT RealDstEltTy
= MRI
.getType(OrigRegs
[0]).getElementType();
478 assert(DstEltTy
.getSizeInBits() == RealDstEltTy
.getSizeInBits());
480 if (DstEltTy
== PartLLT
) {
481 // Vector was trivially scalarized.
483 if (RealDstEltTy
.isPointer()) {
484 for (Register Reg
: Regs
)
485 MRI
.setType(Reg
, RealDstEltTy
);
488 B
.buildBuildVector(OrigRegs
[0], Regs
);
489 } else if (DstEltTy
.getSizeInBits() > PartLLT
.getSizeInBits()) {
490 // Deal with vector with 64-bit elements decomposed to 32-bit
491 // registers. Need to create intermediate 64-bit elements.
492 SmallVector
<Register
, 8> EltMerges
;
494 divideCeil(DstEltTy
.getSizeInBits(), PartLLT
.getSizeInBits());
495 LLT ExtendedPartTy
= LLT::scalar(PartLLT
.getSizeInBits() * PartsPerElt
);
497 for (int I
= 0, NumElts
= LLTy
.getNumElements(); I
!= NumElts
; ++I
) {
499 B
.buildMergeLikeInstr(ExtendedPartTy
, Regs
.take_front(PartsPerElt
));
500 if (ExtendedPartTy
.getSizeInBits() > RealDstEltTy
.getSizeInBits())
501 Merge
= B
.buildTrunc(RealDstEltTy
, Merge
);
502 // Fix the type in case this is really a vector of pointers.
503 MRI
.setType(Merge
.getReg(0), RealDstEltTy
);
504 EltMerges
.push_back(Merge
.getReg(0));
505 Regs
= Regs
.drop_front(PartsPerElt
);
508 B
.buildBuildVector(OrigRegs
[0], EltMerges
);
510 // Vector was split, and elements promoted to a wider type.
511 // FIXME: Should handle floating point promotions.
512 unsigned NumElts
= LLTy
.getNumElements();
513 LLT BVType
= LLT::fixed_vector(NumElts
, PartLLT
);
516 if (NumElts
== Regs
.size())
517 BuildVec
= B
.buildBuildVector(BVType
, Regs
).getReg(0);
519 // Vector elements are packed in the inputs.
520 // e.g. we have a <4 x s16> but 2 x s32 in regs.
521 assert(NumElts
> Regs
.size());
522 LLT SrcEltTy
= MRI
.getType(Regs
[0]);
524 LLT OriginalEltTy
= MRI
.getType(OrigRegs
[0]).getElementType();
526 // Input registers contain packed elements.
527 // Determine how many elements per reg.
528 assert((SrcEltTy
.getSizeInBits() % OriginalEltTy
.getSizeInBits()) == 0);
530 (SrcEltTy
.getSizeInBits() / OriginalEltTy
.getSizeInBits());
532 SmallVector
<Register
, 0> BVRegs
;
533 BVRegs
.reserve(Regs
.size() * EltPerReg
);
534 for (Register R
: Regs
) {
535 auto Unmerge
= B
.buildUnmerge(OriginalEltTy
, R
);
536 for (unsigned K
= 0; K
< EltPerReg
; ++K
)
537 BVRegs
.push_back(B
.buildAnyExt(PartLLT
, Unmerge
.getReg(K
)).getReg(0));
540 // We may have some more elements in BVRegs, e.g. if we have 2 s32 pieces
541 // for a <3 x s16> vector. We should have less than EltPerReg extra items.
542 if (BVRegs
.size() > NumElts
) {
543 assert((BVRegs
.size() - NumElts
) < EltPerReg
);
544 BVRegs
.truncate(NumElts
);
546 BuildVec
= B
.buildBuildVector(BVType
, BVRegs
).getReg(0);
548 B
.buildTrunc(OrigRegs
[0], BuildVec
);
552 /// Create a sequence of instructions to expand the value in \p SrcReg (of type
553 /// \p SrcTy) to the types in \p DstRegs (of type \p PartTy). \p ExtendOp should
554 /// contain the type of scalar value extension if necessary.
556 /// This is used for outgoing values (vregs to physregs)
557 static void buildCopyToRegs(MachineIRBuilder
&B
, ArrayRef
<Register
> DstRegs
,
558 Register SrcReg
, LLT SrcTy
, LLT PartTy
,
559 unsigned ExtendOp
= TargetOpcode::G_ANYEXT
) {
560 // We could just insert a regular copy, but this is unreachable at the moment.
561 assert(SrcTy
!= PartTy
&& "identical part types shouldn't reach here");
563 const TypeSize PartSize
= PartTy
.getSizeInBits();
565 if (PartTy
.isVector() == SrcTy
.isVector() &&
566 PartTy
.getScalarSizeInBits() > SrcTy
.getScalarSizeInBits()) {
567 assert(DstRegs
.size() == 1);
568 B
.buildInstr(ExtendOp
, {DstRegs
[0]}, {SrcReg
});
572 if (SrcTy
.isVector() && !PartTy
.isVector() &&
573 TypeSize::isKnownGT(PartSize
, SrcTy
.getElementType().getSizeInBits())) {
574 // Vector was scalarized, and the elements extended.
575 auto UnmergeToEltTy
= B
.buildUnmerge(SrcTy
.getElementType(), SrcReg
);
576 for (int i
= 0, e
= DstRegs
.size(); i
!= e
; ++i
)
577 B
.buildAnyExt(DstRegs
[i
], UnmergeToEltTy
.getReg(i
));
581 if (SrcTy
.isVector() && PartTy
.isVector() &&
582 PartTy
.getSizeInBits() == SrcTy
.getSizeInBits() &&
583 ElementCount::isKnownLT(SrcTy
.getElementCount(),
584 PartTy
.getElementCount())) {
585 // A coercion like: v2f32 -> v4f32 or nxv2f32 -> nxv4f32
586 Register DstReg
= DstRegs
.front();
587 B
.buildPadVectorWithUndefElements(DstReg
, SrcReg
);
591 LLT GCDTy
= getGCDType(SrcTy
, PartTy
);
592 if (GCDTy
== PartTy
) {
593 // If this already evenly divisible, we can create a simple unmerge.
594 B
.buildUnmerge(DstRegs
, SrcReg
);
598 if (SrcTy
.isVector() && !PartTy
.isVector() &&
599 SrcTy
.getScalarSizeInBits() > PartTy
.getSizeInBits()) {
601 LLT::vector(SrcTy
.getElementCount(),
602 LLT::scalar(PartTy
.getScalarSizeInBits() * DstRegs
.size() /
603 SrcTy
.getNumElements()));
604 auto Ext
= B
.buildAnyExt(ExtTy
, SrcReg
);
605 B
.buildUnmerge(DstRegs
, Ext
);
609 MachineRegisterInfo
&MRI
= *B
.getMRI();
610 LLT DstTy
= MRI
.getType(DstRegs
[0]);
611 LLT LCMTy
= getCoverTy(SrcTy
, PartTy
);
613 if (PartTy
.isVector() && LCMTy
== PartTy
) {
614 assert(DstRegs
.size() == 1);
615 B
.buildPadVectorWithUndefElements(DstRegs
[0], SrcReg
);
619 const unsigned DstSize
= DstTy
.getSizeInBits();
620 const unsigned SrcSize
= SrcTy
.getSizeInBits();
621 unsigned CoveringSize
= LCMTy
.getSizeInBits();
623 Register UnmergeSrc
= SrcReg
;
625 if (!LCMTy
.isVector() && CoveringSize
!= SrcSize
) {
626 // For scalars, it's common to be able to use a simple extension.
627 if (SrcTy
.isScalar() && DstTy
.isScalar()) {
628 CoveringSize
= alignTo(SrcSize
, DstSize
);
629 LLT CoverTy
= LLT::scalar(CoveringSize
);
630 UnmergeSrc
= B
.buildInstr(ExtendOp
, {CoverTy
}, {SrcReg
}).getReg(0);
632 // Widen to the common type.
633 // FIXME: This should respect the extend type
634 Register Undef
= B
.buildUndef(SrcTy
).getReg(0);
635 SmallVector
<Register
, 8> MergeParts(1, SrcReg
);
636 for (unsigned Size
= SrcSize
; Size
!= CoveringSize
; Size
+= SrcSize
)
637 MergeParts
.push_back(Undef
);
638 UnmergeSrc
= B
.buildMergeLikeInstr(LCMTy
, MergeParts
).getReg(0);
642 if (LCMTy
.isVector() && CoveringSize
!= SrcSize
)
643 UnmergeSrc
= B
.buildPadVectorWithUndefElements(LCMTy
, SrcReg
).getReg(0);
645 B
.buildUnmerge(DstRegs
, UnmergeSrc
);
648 bool CallLowering::determineAndHandleAssignments(
649 ValueHandler
&Handler
, ValueAssigner
&Assigner
,
650 SmallVectorImpl
<ArgInfo
> &Args
, MachineIRBuilder
&MIRBuilder
,
651 CallingConv::ID CallConv
, bool IsVarArg
,
652 ArrayRef
<Register
> ThisReturnRegs
) const {
653 MachineFunction
&MF
= MIRBuilder
.getMF();
654 const Function
&F
= MF
.getFunction();
655 SmallVector
<CCValAssign
, 16> ArgLocs
;
657 CCState
CCInfo(CallConv
, IsVarArg
, MF
, ArgLocs
, F
.getContext());
658 if (!determineAssignments(Assigner
, Args
, CCInfo
))
661 return handleAssignments(Handler
, Args
, CCInfo
, ArgLocs
, MIRBuilder
,
665 static unsigned extendOpFromFlags(llvm::ISD::ArgFlagsTy Flags
) {
667 return TargetOpcode::G_SEXT
;
669 return TargetOpcode::G_ZEXT
;
670 return TargetOpcode::G_ANYEXT
;
673 bool CallLowering::determineAssignments(ValueAssigner
&Assigner
,
674 SmallVectorImpl
<ArgInfo
> &Args
,
675 CCState
&CCInfo
) const {
676 LLVMContext
&Ctx
= CCInfo
.getContext();
677 const CallingConv::ID CallConv
= CCInfo
.getCallingConv();
679 unsigned NumArgs
= Args
.size();
680 for (unsigned i
= 0; i
!= NumArgs
; ++i
) {
681 EVT CurVT
= EVT::getEVT(Args
[i
].Ty
);
683 MVT NewVT
= TLI
->getRegisterTypeForCallingConv(Ctx
, CallConv
, CurVT
);
685 // If we need to split the type over multiple regs, check it's a scenario
686 // we currently support.
688 TLI
->getNumRegistersForCallingConv(Ctx
, CallConv
, CurVT
);
691 // Try to use the register type if we couldn't assign the VT.
692 if (Assigner
.assignArg(i
, CurVT
, NewVT
, NewVT
, CCValAssign::Full
, Args
[i
],
693 Args
[i
].Flags
[0], CCInfo
))
698 // For incoming arguments (physregs to vregs), we could have values in
699 // physregs (or memlocs) which we want to extract and copy to vregs.
700 // During this, we might have to deal with the LLT being split across
701 // multiple regs, so we have to record this information for later.
703 // If we have outgoing args, then we have the opposite case. We have a
704 // vreg with an LLT which we want to assign to a physical location, and
705 // we might have to record that the value has to be split later.
707 // We're handling an incoming arg which is split over multiple regs.
708 // E.g. passing an s128 on AArch64.
709 ISD::ArgFlagsTy OrigFlags
= Args
[i
].Flags
[0];
710 Args
[i
].Flags
.clear();
712 for (unsigned Part
= 0; Part
< NumParts
; ++Part
) {
713 ISD::ArgFlagsTy Flags
= OrigFlags
;
717 Flags
.setOrigAlign(Align(1));
718 if (Part
== NumParts
- 1)
722 Args
[i
].Flags
.push_back(Flags
);
723 if (Assigner
.assignArg(i
, CurVT
, NewVT
, NewVT
, CCValAssign::Full
, Args
[i
],
724 Args
[i
].Flags
[Part
], CCInfo
)) {
725 // Still couldn't assign this smaller part type for some reason.
734 bool CallLowering::handleAssignments(ValueHandler
&Handler
,
735 SmallVectorImpl
<ArgInfo
> &Args
,
737 SmallVectorImpl
<CCValAssign
> &ArgLocs
,
738 MachineIRBuilder
&MIRBuilder
,
739 ArrayRef
<Register
> ThisReturnRegs
) const {
740 MachineFunction
&MF
= MIRBuilder
.getMF();
741 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
742 const Function
&F
= MF
.getFunction();
743 const DataLayout
&DL
= F
.getDataLayout();
745 const unsigned NumArgs
= Args
.size();
747 // Stores thunks for outgoing register assignments. This is used so we delay
748 // generating register copies until mem loc assignments are done. We do this
749 // so that if the target is using the delayed stack protector feature, we can
750 // find the split point of the block accurately. E.g. if we have:
751 // G_STORE %val, %memloc
755 // ... then the split point for the block will correctly be at, and including,
756 // the copy to $x0. If instead the G_STORE instruction immediately precedes
757 // the CALL, then we'd prematurely choose the CALL as the split point, thus
758 // generating a split block with a CALL that uses undefined physregs.
759 SmallVector
<std::function
<void()>> DelayedOutgoingRegAssignments
;
761 for (unsigned i
= 0, j
= 0; i
!= NumArgs
; ++i
, ++j
) {
762 assert(j
< ArgLocs
.size() && "Skipped too many arg locs");
763 CCValAssign
&VA
= ArgLocs
[j
];
764 assert(VA
.getValNo() == i
&& "Location doesn't correspond to current arg");
766 if (VA
.needsCustom()) {
767 std::function
<void()> Thunk
;
768 unsigned NumArgRegs
= Handler
.assignCustomValue(
769 Args
[i
], ArrayRef(ArgLocs
).slice(j
), &Thunk
);
771 DelayedOutgoingRegAssignments
.emplace_back(Thunk
);
774 j
+= (NumArgRegs
- 1);
778 auto AllocaAddressSpace
= MF
.getDataLayout().getAllocaAddrSpace();
780 const MVT ValVT
= VA
.getValVT();
781 const MVT LocVT
= VA
.getLocVT();
783 const LLT
LocTy(LocVT
);
784 const LLT
ValTy(ValVT
);
785 const LLT NewLLT
= Handler
.isIncomingArgumentHandler() ? LocTy
: ValTy
;
786 const EVT OrigVT
= EVT::getEVT(Args
[i
].Ty
);
787 const LLT OrigTy
= getLLTForType(*Args
[i
].Ty
, DL
);
788 const LLT PointerTy
= LLT::pointer(
789 AllocaAddressSpace
, DL
.getPointerSizeInBits(AllocaAddressSpace
));
791 // Expected to be multiple regs for a single incoming arg.
792 // There should be Regs.size() ArgLocs per argument.
793 // This should be the same as getNumRegistersForCallingConv
794 const unsigned NumParts
= Args
[i
].Flags
.size();
796 // Now split the registers into the assigned types.
797 Args
[i
].OrigRegs
.assign(Args
[i
].Regs
.begin(), Args
[i
].Regs
.end());
799 if (NumParts
!= 1 || NewLLT
!= OrigTy
) {
800 // If we can't directly assign the register, we need one or more
801 // intermediate values.
802 Args
[i
].Regs
.resize(NumParts
);
804 // When we have indirect parameter passing we are receiving a pointer,
805 // that points to the actual value, so we need one "temporary" pointer.
806 if (VA
.getLocInfo() == CCValAssign::Indirect
) {
807 if (Handler
.isIncomingArgumentHandler())
808 Args
[i
].Regs
[0] = MRI
.createGenericVirtualRegister(PointerTy
);
810 // For each split register, create and assign a vreg that will store
811 // the incoming component of the larger value. These will later be
812 // merged to form the final vreg.
813 for (unsigned Part
= 0; Part
< NumParts
; ++Part
)
814 Args
[i
].Regs
[Part
] = MRI
.createGenericVirtualRegister(NewLLT
);
818 assert((j
+ (NumParts
- 1)) < ArgLocs
.size() &&
819 "Too many regs for number of args");
821 // Coerce into outgoing value types before register assignment.
822 if (!Handler
.isIncomingArgumentHandler() && OrigTy
!= ValTy
&&
823 VA
.getLocInfo() != CCValAssign::Indirect
) {
824 assert(Args
[i
].OrigRegs
.size() == 1);
825 buildCopyToRegs(MIRBuilder
, Args
[i
].Regs
, Args
[i
].OrigRegs
[0], OrigTy
,
826 ValTy
, extendOpFromFlags(Args
[i
].Flags
[0]));
829 bool IndirectParameterPassingHandled
= false;
830 bool BigEndianPartOrdering
= TLI
->hasBigEndianPartOrdering(OrigVT
, DL
);
831 for (unsigned Part
= 0; Part
< NumParts
; ++Part
) {
832 assert((VA
.getLocInfo() != CCValAssign::Indirect
|| Part
== 0) &&
833 "Only the first parameter should be processed when "
834 "handling indirect passing!");
835 Register ArgReg
= Args
[i
].Regs
[Part
];
836 // There should be Regs.size() ArgLocs per argument.
837 unsigned Idx
= BigEndianPartOrdering
? NumParts
- 1 - Part
: Part
;
838 CCValAssign
&VA
= ArgLocs
[j
+ Idx
];
839 const ISD::ArgFlagsTy Flags
= Args
[i
].Flags
[Part
];
841 // We found an indirect parameter passing, and we have an
842 // OutgoingValueHandler as our handler (so we are at the call site or the
843 // return value). In this case, start the construction of the following
844 // GMIR, that is responsible for the preparation of indirect parameter
847 // %1(indirectly passed type) = The value to pass
848 // %3(pointer) = G_FRAME_INDEX %stack.0
849 // G_STORE %1, %3 :: (store (s128), align 8)
851 // After this GMIR, the remaining part of the loop body will decide how
852 // to get the value to the caller and we break out of the loop.
853 if (VA
.getLocInfo() == CCValAssign::Indirect
&&
854 !Handler
.isIncomingArgumentHandler()) {
855 Align AlignmentForStored
= DL
.getPrefTypeAlign(Args
[i
].Ty
);
856 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
857 // Get some space on the stack for the value, so later we can pass it
859 int FrameIdx
= MFI
.CreateStackObject(OrigTy
.getScalarSizeInBits(),
860 AlignmentForStored
, false);
861 Register PointerToStackReg
=
862 MIRBuilder
.buildFrameIndex(PointerTy
, FrameIdx
).getReg(0);
863 MachinePointerInfo StackPointerMPO
=
864 MachinePointerInfo::getFixedStack(MF
, FrameIdx
);
865 // Store the value in the previously created stack space.
866 MIRBuilder
.buildStore(Args
[i
].OrigRegs
[Part
], PointerToStackReg
,
868 inferAlignFromPtrInfo(MF
, StackPointerMPO
));
870 ArgReg
= PointerToStackReg
;
871 IndirectParameterPassingHandled
= true;
874 if (VA
.isMemLoc() && !Flags
.isByVal()) {
875 // Individual pieces may have been spilled to the stack and others
876 // passed in registers.
878 // TODO: The memory size may be larger than the value we need to
879 // store. We may need to adjust the offset for big endian targets.
880 LLT MemTy
= Handler
.getStackValueStoreType(DL
, VA
, Flags
);
882 MachinePointerInfo MPO
;
884 Handler
.getStackAddress(VA
.getLocInfo() == CCValAssign::Indirect
885 ? PointerTy
.getSizeInBytes()
886 : MemTy
.getSizeInBytes(),
887 VA
.getLocMemOffset(), MPO
, Flags
);
889 // Finish the handling of indirect passing from the passers
890 // (OutgoingParameterHandler) side.
891 // This branch is needed, so the pointer to the value is loaded onto the
893 if (VA
.getLocInfo() == CCValAssign::Indirect
)
894 Handler
.assignValueToAddress(ArgReg
, StackAddr
, PointerTy
, MPO
, VA
);
896 Handler
.assignValueToAddress(Args
[i
], Part
, StackAddr
, MemTy
, MPO
,
898 } else if (VA
.isMemLoc() && Flags
.isByVal()) {
899 assert(Args
[i
].Regs
.size() == 1 && "didn't expect split byval pointer");
901 if (Handler
.isIncomingArgumentHandler()) {
902 // We just need to copy the frame index value to the pointer.
903 MachinePointerInfo MPO
;
904 Register StackAddr
= Handler
.getStackAddress(
905 Flags
.getByValSize(), VA
.getLocMemOffset(), MPO
, Flags
);
906 MIRBuilder
.buildCopy(Args
[i
].Regs
[0], StackAddr
);
908 // For outgoing byval arguments, insert the implicit copy byval
909 // implies, such that writes in the callee do not modify the caller's
911 uint64_t MemSize
= Flags
.getByValSize();
912 int64_t Offset
= VA
.getLocMemOffset();
914 MachinePointerInfo DstMPO
;
916 Handler
.getStackAddress(MemSize
, Offset
, DstMPO
, Flags
);
918 MachinePointerInfo
SrcMPO(Args
[i
].OrigValue
);
919 if (!Args
[i
].OrigValue
) {
920 // We still need to accurately track the stack address space if we
921 // don't know the underlying value.
922 const LLT PtrTy
= MRI
.getType(StackAddr
);
923 SrcMPO
= MachinePointerInfo(PtrTy
.getAddressSpace());
926 Align DstAlign
= std::max(Flags
.getNonZeroByValAlign(),
927 inferAlignFromPtrInfo(MF
, DstMPO
));
929 Align SrcAlign
= std::max(Flags
.getNonZeroByValAlign(),
930 inferAlignFromPtrInfo(MF
, SrcMPO
));
932 Handler
.copyArgumentMemory(Args
[i
], StackAddr
, Args
[i
].Regs
[0],
933 DstMPO
, DstAlign
, SrcMPO
, SrcAlign
,
936 } else if (i
== 0 && !ThisReturnRegs
.empty() &&
937 Handler
.isIncomingArgumentHandler() &&
938 isTypeIsValidForThisReturn(ValVT
)) {
939 Handler
.assignValueToReg(ArgReg
, ThisReturnRegs
[Part
], VA
);
940 } else if (Handler
.isIncomingArgumentHandler()) {
941 Handler
.assignValueToReg(ArgReg
, VA
.getLocReg(), VA
);
943 DelayedOutgoingRegAssignments
.emplace_back([=, &Handler
]() {
944 Handler
.assignValueToReg(ArgReg
, VA
.getLocReg(), VA
);
948 // Finish the handling of indirect parameter passing when receiving
949 // the value (we are in the called function or the caller when receiving
950 // the return value).
951 if (VA
.getLocInfo() == CCValAssign::Indirect
&&
952 Handler
.isIncomingArgumentHandler()) {
953 Align Alignment
= DL
.getABITypeAlign(Args
[i
].Ty
);
954 MachinePointerInfo MPO
= MachinePointerInfo::getUnknownStack(MF
);
956 // Since we are doing indirect parameter passing, we know that the value
957 // in the temporary register is not the value passed to the function,
958 // but rather a pointer to that value. Let's load that value into the
959 // virtual register where the parameter should go.
960 MIRBuilder
.buildLoad(Args
[i
].OrigRegs
[0], Args
[i
].Regs
[0], MPO
,
963 IndirectParameterPassingHandled
= true;
966 if (IndirectParameterPassingHandled
)
970 // Now that all pieces have been assigned, re-pack the register typed values
971 // into the original value typed registers. This is only necessary, when
972 // the value was passed in multiple registers, not indirectly.
973 if (Handler
.isIncomingArgumentHandler() && OrigVT
!= LocVT
&&
974 !IndirectParameterPassingHandled
) {
975 // Merge the split registers into the expected larger result vregs of
976 // the original call.
977 buildCopyFromRegs(MIRBuilder
, Args
[i
].OrigRegs
, Args
[i
].Regs
, OrigTy
,
978 LocTy
, Args
[i
].Flags
[0]);
983 for (auto &Fn
: DelayedOutgoingRegAssignments
)
989 void CallLowering::insertSRetLoads(MachineIRBuilder
&MIRBuilder
, Type
*RetTy
,
990 ArrayRef
<Register
> VRegs
, Register DemoteReg
,
992 MachineFunction
&MF
= MIRBuilder
.getMF();
993 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
994 const DataLayout
&DL
= MF
.getDataLayout();
996 SmallVector
<EVT
, 4> SplitVTs
;
997 SmallVector
<uint64_t, 4> Offsets
;
998 ComputeValueVTs(*TLI
, DL
, RetTy
, SplitVTs
, &Offsets
, 0);
1000 assert(VRegs
.size() == SplitVTs
.size());
1002 unsigned NumValues
= SplitVTs
.size();
1003 Align BaseAlign
= DL
.getPrefTypeAlign(RetTy
);
1005 PointerType::get(RetTy
->getContext(), DL
.getAllocaAddrSpace());
1006 LLT OffsetLLTy
= getLLTForType(*DL
.getIndexType(RetPtrTy
), DL
);
1008 MachinePointerInfo PtrInfo
= MachinePointerInfo::getFixedStack(MF
, FI
);
1010 for (unsigned I
= 0; I
< NumValues
; ++I
) {
1012 MIRBuilder
.materializePtrAdd(Addr
, DemoteReg
, OffsetLLTy
, Offsets
[I
]);
1013 auto *MMO
= MF
.getMachineMemOperand(PtrInfo
, MachineMemOperand::MOLoad
,
1014 MRI
.getType(VRegs
[I
]),
1015 commonAlignment(BaseAlign
, Offsets
[I
]));
1016 MIRBuilder
.buildLoad(VRegs
[I
], Addr
, *MMO
);
1020 void CallLowering::insertSRetStores(MachineIRBuilder
&MIRBuilder
, Type
*RetTy
,
1021 ArrayRef
<Register
> VRegs
,
1022 Register DemoteReg
) const {
1023 MachineFunction
&MF
= MIRBuilder
.getMF();
1024 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
1025 const DataLayout
&DL
= MF
.getDataLayout();
1027 SmallVector
<EVT
, 4> SplitVTs
;
1028 SmallVector
<uint64_t, 4> Offsets
;
1029 ComputeValueVTs(*TLI
, DL
, RetTy
, SplitVTs
, &Offsets
, 0);
1031 assert(VRegs
.size() == SplitVTs
.size());
1033 unsigned NumValues
= SplitVTs
.size();
1034 Align BaseAlign
= DL
.getPrefTypeAlign(RetTy
);
1035 unsigned AS
= DL
.getAllocaAddrSpace();
1036 LLT OffsetLLTy
= getLLTForType(*DL
.getIndexType(RetTy
->getContext(), AS
), DL
);
1038 MachinePointerInfo
PtrInfo(AS
);
1040 for (unsigned I
= 0; I
< NumValues
; ++I
) {
1042 MIRBuilder
.materializePtrAdd(Addr
, DemoteReg
, OffsetLLTy
, Offsets
[I
]);
1043 auto *MMO
= MF
.getMachineMemOperand(PtrInfo
, MachineMemOperand::MOStore
,
1044 MRI
.getType(VRegs
[I
]),
1045 commonAlignment(BaseAlign
, Offsets
[I
]));
1046 MIRBuilder
.buildStore(VRegs
[I
], Addr
, *MMO
);
1050 void CallLowering::insertSRetIncomingArgument(
1051 const Function
&F
, SmallVectorImpl
<ArgInfo
> &SplitArgs
, Register
&DemoteReg
,
1052 MachineRegisterInfo
&MRI
, const DataLayout
&DL
) const {
1053 unsigned AS
= DL
.getAllocaAddrSpace();
1054 DemoteReg
= MRI
.createGenericVirtualRegister(
1055 LLT::pointer(AS
, DL
.getPointerSizeInBits(AS
)));
1057 Type
*PtrTy
= PointerType::get(F
.getContext(), AS
);
1059 SmallVector
<EVT
, 1> ValueVTs
;
1060 ComputeValueVTs(*TLI
, DL
, PtrTy
, ValueVTs
);
1062 // NOTE: Assume that a pointer won't get split into more than one VT.
1063 assert(ValueVTs
.size() == 1);
1065 ArgInfo
DemoteArg(DemoteReg
, ValueVTs
[0].getTypeForEVT(PtrTy
->getContext()),
1066 ArgInfo::NoArgIndex
);
1067 setArgFlags(DemoteArg
, AttributeList::ReturnIndex
, DL
, F
);
1068 DemoteArg
.Flags
[0].setSRet();
1069 SplitArgs
.insert(SplitArgs
.begin(), DemoteArg
);
1072 void CallLowering::insertSRetOutgoingArgument(MachineIRBuilder
&MIRBuilder
,
1074 CallLoweringInfo
&Info
) const {
1075 const DataLayout
&DL
= MIRBuilder
.getDataLayout();
1076 Type
*RetTy
= CB
.getType();
1077 unsigned AS
= DL
.getAllocaAddrSpace();
1078 LLT FramePtrTy
= LLT::pointer(AS
, DL
.getPointerSizeInBits(AS
));
1080 int FI
= MIRBuilder
.getMF().getFrameInfo().CreateStackObject(
1081 DL
.getTypeAllocSize(RetTy
), DL
.getPrefTypeAlign(RetTy
), false);
1083 Register DemoteReg
= MIRBuilder
.buildFrameIndex(FramePtrTy
, FI
).getReg(0);
1084 ArgInfo
DemoteArg(DemoteReg
, PointerType::get(RetTy
->getContext(), AS
),
1085 ArgInfo::NoArgIndex
);
1086 setArgFlags(DemoteArg
, AttributeList::ReturnIndex
, DL
, CB
);
1087 DemoteArg
.Flags
[0].setSRet();
1089 Info
.OrigArgs
.insert(Info
.OrigArgs
.begin(), DemoteArg
);
1090 Info
.DemoteStackIndex
= FI
;
1091 Info
.DemoteRegister
= DemoteReg
;
1094 bool CallLowering::checkReturn(CCState
&CCInfo
,
1095 SmallVectorImpl
<BaseArgInfo
> &Outs
,
1096 CCAssignFn
*Fn
) const {
1097 for (unsigned I
= 0, E
= Outs
.size(); I
< E
; ++I
) {
1098 MVT VT
= MVT::getVT(Outs
[I
].Ty
);
1099 if (Fn(I
, VT
, VT
, CCValAssign::Full
, Outs
[I
].Flags
[0], CCInfo
))
1105 void CallLowering::getReturnInfo(CallingConv::ID CallConv
, Type
*RetTy
,
1106 AttributeList Attrs
,
1107 SmallVectorImpl
<BaseArgInfo
> &Outs
,
1108 const DataLayout
&DL
) const {
1109 LLVMContext
&Context
= RetTy
->getContext();
1110 ISD::ArgFlagsTy Flags
= ISD::ArgFlagsTy();
1112 SmallVector
<EVT
, 4> SplitVTs
;
1113 ComputeValueVTs(*TLI
, DL
, RetTy
, SplitVTs
);
1114 addArgFlagsFromAttributes(Flags
, Attrs
, AttributeList::ReturnIndex
);
1116 for (EVT VT
: SplitVTs
) {
1118 TLI
->getNumRegistersForCallingConv(Context
, CallConv
, VT
);
1119 MVT RegVT
= TLI
->getRegisterTypeForCallingConv(Context
, CallConv
, VT
);
1120 Type
*PartTy
= EVT(RegVT
).getTypeForEVT(Context
);
1122 for (unsigned I
= 0; I
< NumParts
; ++I
) {
1123 Outs
.emplace_back(PartTy
, Flags
);
1128 bool CallLowering::checkReturnTypeForCallConv(MachineFunction
&MF
) const {
1129 const auto &F
= MF
.getFunction();
1130 Type
*ReturnType
= F
.getReturnType();
1131 CallingConv::ID CallConv
= F
.getCallingConv();
1133 SmallVector
<BaseArgInfo
, 4> SplitArgs
;
1134 getReturnInfo(CallConv
, ReturnType
, F
.getAttributes(), SplitArgs
,
1135 MF
.getDataLayout());
1136 return canLowerReturn(MF
, CallConv
, SplitArgs
, F
.isVarArg());
1139 bool CallLowering::parametersInCSRMatch(
1140 const MachineRegisterInfo
&MRI
, const uint32_t *CallerPreservedMask
,
1141 const SmallVectorImpl
<CCValAssign
> &OutLocs
,
1142 const SmallVectorImpl
<ArgInfo
> &OutArgs
) const {
1143 for (unsigned i
= 0; i
< OutLocs
.size(); ++i
) {
1144 const auto &ArgLoc
= OutLocs
[i
];
1145 // If it's not a register, it's fine.
1146 if (!ArgLoc
.isRegLoc())
1149 MCRegister PhysReg
= ArgLoc
.getLocReg();
1151 // Only look at callee-saved registers.
1152 if (MachineOperand::clobbersPhysReg(CallerPreservedMask
, PhysReg
))
1157 << "... Call has an argument passed in a callee-saved register.\n");
1159 // Check if it was copied from.
1160 const ArgInfo
&OutInfo
= OutArgs
[i
];
1162 if (OutInfo
.Regs
.size() > 1) {
1164 dbgs() << "... Cannot handle arguments in multiple registers.\n");
1168 // Check if we copy the register, walking through copies from virtual
1169 // registers. Note that getDefIgnoringCopies does not ignore copies from
1170 // physical registers.
1171 MachineInstr
*RegDef
= getDefIgnoringCopies(OutInfo
.Regs
[0], MRI
);
1172 if (!RegDef
|| RegDef
->getOpcode() != TargetOpcode::COPY
) {
1175 << "... Parameter was not copied into a VReg, cannot tail call.\n");
1179 // Got a copy. Verify that it's the same as the register we want.
1180 Register CopyRHS
= RegDef
->getOperand(1).getReg();
1181 if (CopyRHS
!= PhysReg
) {
1182 LLVM_DEBUG(dbgs() << "... Callee-saved register was not copied into "
1183 "VReg, cannot tail call.\n");
1191 bool CallLowering::resultsCompatible(CallLoweringInfo
&Info
,
1192 MachineFunction
&MF
,
1193 SmallVectorImpl
<ArgInfo
> &InArgs
,
1194 ValueAssigner
&CalleeAssigner
,
1195 ValueAssigner
&CallerAssigner
) const {
1196 const Function
&F
= MF
.getFunction();
1197 CallingConv::ID CalleeCC
= Info
.CallConv
;
1198 CallingConv::ID CallerCC
= F
.getCallingConv();
1200 if (CallerCC
== CalleeCC
)
1203 SmallVector
<CCValAssign
, 16> ArgLocs1
;
1204 CCState
CCInfo1(CalleeCC
, Info
.IsVarArg
, MF
, ArgLocs1
, F
.getContext());
1205 if (!determineAssignments(CalleeAssigner
, InArgs
, CCInfo1
))
1208 SmallVector
<CCValAssign
, 16> ArgLocs2
;
1209 CCState
CCInfo2(CallerCC
, F
.isVarArg(), MF
, ArgLocs2
, F
.getContext());
1210 if (!determineAssignments(CallerAssigner
, InArgs
, CCInfo2
))
1213 // We need the argument locations to match up exactly. If there's more in
1214 // one than the other, then we are done.
1215 if (ArgLocs1
.size() != ArgLocs2
.size())
1218 // Make sure that each location is passed in exactly the same way.
1219 for (unsigned i
= 0, e
= ArgLocs1
.size(); i
< e
; ++i
) {
1220 const CCValAssign
&Loc1
= ArgLocs1
[i
];
1221 const CCValAssign
&Loc2
= ArgLocs2
[i
];
1223 // We need both of them to be the same. So if one is a register and one
1224 // isn't, we're done.
1225 if (Loc1
.isRegLoc() != Loc2
.isRegLoc())
1228 if (Loc1
.isRegLoc()) {
1229 // If they don't have the same register location, we're done.
1230 if (Loc1
.getLocReg() != Loc2
.getLocReg())
1233 // They matched, so we can move to the next ArgLoc.
1237 // Loc1 wasn't a RegLoc, so they both must be MemLocs. Check if they match.
1238 if (Loc1
.getLocMemOffset() != Loc2
.getLocMemOffset())
1245 LLT
CallLowering::ValueHandler::getStackValueStoreType(
1246 const DataLayout
&DL
, const CCValAssign
&VA
, ISD::ArgFlagsTy Flags
) const {
1247 const MVT ValVT
= VA
.getValVT();
1248 if (ValVT
!= MVT::iPTR
) {
1251 // We lost the pointeriness going through CCValAssign, so try to restore it
1252 // based on the flags.
1253 if (Flags
.isPointer()) {
1254 LLT PtrTy
= LLT::pointer(Flags
.getPointerAddrSpace(),
1255 ValTy
.getScalarSizeInBits());
1256 if (ValVT
.isVector())
1257 return LLT::vector(ValTy
.getElementCount(), PtrTy
);
1264 unsigned AddrSpace
= Flags
.getPointerAddrSpace();
1265 return LLT::pointer(AddrSpace
, DL
.getPointerSize(AddrSpace
));
1268 void CallLowering::ValueHandler::copyArgumentMemory(
1269 const ArgInfo
&Arg
, Register DstPtr
, Register SrcPtr
,
1270 const MachinePointerInfo
&DstPtrInfo
, Align DstAlign
,
1271 const MachinePointerInfo
&SrcPtrInfo
, Align SrcAlign
, uint64_t MemSize
,
1272 CCValAssign
&VA
) const {
1273 MachineFunction
&MF
= MIRBuilder
.getMF();
1274 MachineMemOperand
*SrcMMO
= MF
.getMachineMemOperand(
1276 MachineMemOperand::MOLoad
| MachineMemOperand::MODereferenceable
, MemSize
,
1279 MachineMemOperand
*DstMMO
= MF
.getMachineMemOperand(
1281 MachineMemOperand::MOStore
| MachineMemOperand::MODereferenceable
,
1284 const LLT PtrTy
= MRI
.getType(DstPtr
);
1285 const LLT SizeTy
= LLT::scalar(PtrTy
.getSizeInBits());
1287 auto SizeConst
= MIRBuilder
.buildConstant(SizeTy
, MemSize
);
1288 MIRBuilder
.buildMemCpy(DstPtr
, SrcPtr
, SizeConst
, *DstMMO
, *SrcMMO
);
1291 Register
CallLowering::ValueHandler::extendRegister(Register ValReg
,
1292 const CCValAssign
&VA
,
1293 unsigned MaxSizeBits
) {
1294 LLT LocTy
{VA
.getLocVT()};
1295 LLT ValTy
{VA
.getValVT()};
1297 if (LocTy
.getSizeInBits() == ValTy
.getSizeInBits())
1300 if (LocTy
.isScalar() && MaxSizeBits
&& MaxSizeBits
< LocTy
.getSizeInBits()) {
1301 if (MaxSizeBits
<= ValTy
.getSizeInBits())
1303 LocTy
= LLT::scalar(MaxSizeBits
);
1306 const LLT ValRegTy
= MRI
.getType(ValReg
);
1307 if (ValRegTy
.isPointer()) {
1308 // The x32 ABI wants to zero extend 32-bit pointers to 64-bit registers, so
1309 // we have to cast to do the extension.
1310 LLT IntPtrTy
= LLT::scalar(ValRegTy
.getSizeInBits());
1311 ValReg
= MIRBuilder
.buildPtrToInt(IntPtrTy
, ValReg
).getReg(0);
1314 switch (VA
.getLocInfo()) {
1317 case CCValAssign::Full
:
1318 case CCValAssign::BCvt
:
1319 // FIXME: bitconverting between vector types may or may not be a
1320 // nop in big-endian situations.
1322 case CCValAssign::AExt
: {
1323 auto MIB
= MIRBuilder
.buildAnyExt(LocTy
, ValReg
);
1324 return MIB
.getReg(0);
1326 case CCValAssign::SExt
: {
1327 Register NewReg
= MRI
.createGenericVirtualRegister(LocTy
);
1328 MIRBuilder
.buildSExt(NewReg
, ValReg
);
1331 case CCValAssign::ZExt
: {
1332 Register NewReg
= MRI
.createGenericVirtualRegister(LocTy
);
1333 MIRBuilder
.buildZExt(NewReg
, ValReg
);
1337 llvm_unreachable("unable to extend register");
1340 void CallLowering::ValueAssigner::anchor() {}
1342 Register
CallLowering::IncomingValueHandler::buildExtensionHint(
1343 const CCValAssign
&VA
, Register SrcReg
, LLT NarrowTy
) {
1344 switch (VA
.getLocInfo()) {
1345 case CCValAssign::LocInfo::ZExt
: {
1347 .buildAssertZExt(MRI
.cloneVirtualRegister(SrcReg
), SrcReg
,
1348 NarrowTy
.getScalarSizeInBits())
1351 case CCValAssign::LocInfo::SExt
: {
1353 .buildAssertSExt(MRI
.cloneVirtualRegister(SrcReg
), SrcReg
,
1354 NarrowTy
.getScalarSizeInBits())
1363 /// Check if we can use a basic COPY instruction between the two types.
1365 /// We're currently building on top of the infrastructure using MVT, which loses
1366 /// pointer information in the CCValAssign. We accept copies from physical
1367 /// registers that have been reported as integers if it's to an equivalent sized
1369 static bool isCopyCompatibleType(LLT SrcTy
, LLT DstTy
) {
1373 if (SrcTy
.getSizeInBits() != DstTy
.getSizeInBits())
1376 SrcTy
= SrcTy
.getScalarType();
1377 DstTy
= DstTy
.getScalarType();
1379 return (SrcTy
.isPointer() && DstTy
.isScalar()) ||
1380 (DstTy
.isPointer() && SrcTy
.isScalar());
1383 void CallLowering::IncomingValueHandler::assignValueToReg(
1384 Register ValVReg
, Register PhysReg
, const CCValAssign
&VA
) {
1385 const MVT LocVT
= VA
.getLocVT();
1386 const LLT
LocTy(LocVT
);
1387 const LLT RegTy
= MRI
.getType(ValVReg
);
1389 if (isCopyCompatibleType(RegTy
, LocTy
)) {
1390 MIRBuilder
.buildCopy(ValVReg
, PhysReg
);
1394 auto Copy
= MIRBuilder
.buildCopy(LocTy
, PhysReg
);
1395 auto Hint
= buildExtensionHint(VA
, Copy
.getReg(0), RegTy
);
1396 MIRBuilder
.buildTrunc(ValVReg
, Hint
);