1 //===-- lib/CodeGen/GlobalISel/CallLowering.cpp - Call lowering -----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file implements some simple delegations needed for call lowering.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
15 #include "llvm/CodeGen/Analysis.h"
16 #include "llvm/CodeGen/CallingConvLower.h"
17 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
18 #include "llvm/CodeGen/GlobalISel/Utils.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineOperand.h"
21 #include "llvm/CodeGen/MachineRegisterInfo.h"
22 #include "llvm/CodeGen/TargetLowering.h"
23 #include "llvm/IR/DataLayout.h"
24 #include "llvm/IR/LLVMContext.h"
25 #include "llvm/IR/Module.h"
26 #include "llvm/Target/TargetMachine.h"
28 #define DEBUG_TYPE "call-lowering"
32 void CallLowering::anchor() {}
34 /// Helper function which updates \p Flags when \p AttrFn returns true.
36 addFlagsUsingAttrFn(ISD::ArgFlagsTy
&Flags
,
37 const std::function
<bool(Attribute::AttrKind
)> &AttrFn
) {
38 if (AttrFn(Attribute::SExt
))
40 if (AttrFn(Attribute::ZExt
))
42 if (AttrFn(Attribute::InReg
))
44 if (AttrFn(Attribute::StructRet
))
46 if (AttrFn(Attribute::Nest
))
48 if (AttrFn(Attribute::ByVal
))
50 if (AttrFn(Attribute::Preallocated
))
51 Flags
.setPreallocated();
52 if (AttrFn(Attribute::InAlloca
))
54 if (AttrFn(Attribute::Returned
))
56 if (AttrFn(Attribute::SwiftSelf
))
58 if (AttrFn(Attribute::SwiftAsync
))
59 Flags
.setSwiftAsync();
60 if (AttrFn(Attribute::SwiftError
))
61 Flags
.setSwiftError();
64 ISD::ArgFlagsTy
CallLowering::getAttributesForArgIdx(const CallBase
&Call
,
65 unsigned ArgIdx
) const {
66 ISD::ArgFlagsTy Flags
;
67 addFlagsUsingAttrFn(Flags
, [&Call
, &ArgIdx
](Attribute::AttrKind Attr
) {
68 return Call
.paramHasAttr(ArgIdx
, Attr
);
74 CallLowering::getAttributesForReturn(const CallBase
&Call
) const {
75 ISD::ArgFlagsTy Flags
;
76 addFlagsUsingAttrFn(Flags
, [&Call
](Attribute::AttrKind Attr
) {
77 return Call
.hasRetAttr(Attr
);
82 void CallLowering::addArgFlagsFromAttributes(ISD::ArgFlagsTy
&Flags
,
83 const AttributeList
&Attrs
,
84 unsigned OpIdx
) const {
85 addFlagsUsingAttrFn(Flags
, [&Attrs
, &OpIdx
](Attribute::AttrKind Attr
) {
86 return Attrs
.hasAttributeAtIndex(OpIdx
, Attr
);
90 bool CallLowering::lowerCall(MachineIRBuilder
&MIRBuilder
, const CallBase
&CB
,
91 ArrayRef
<Register
> ResRegs
,
92 ArrayRef
<ArrayRef
<Register
>> ArgRegs
,
93 Register SwiftErrorVReg
,
94 std::function
<unsigned()> GetCalleeReg
) const {
95 CallLoweringInfo Info
;
96 const DataLayout
&DL
= MIRBuilder
.getDataLayout();
97 MachineFunction
&MF
= MIRBuilder
.getMF();
98 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
99 bool CanBeTailCalled
= CB
.isTailCall() &&
100 isInTailCallPosition(CB
, MF
.getTarget()) &&
102 .getFnAttribute("disable-tail-calls")
103 .getValueAsString() != "true");
105 CallingConv::ID CallConv
= CB
.getCallingConv();
106 Type
*RetTy
= CB
.getType();
107 bool IsVarArg
= CB
.getFunctionType()->isVarArg();
109 SmallVector
<BaseArgInfo
, 4> SplitArgs
;
110 getReturnInfo(CallConv
, RetTy
, CB
.getAttributes(), SplitArgs
, DL
);
111 Info
.CanLowerReturn
= canLowerReturn(MF
, CallConv
, SplitArgs
, IsVarArg
);
113 Info
.IsConvergent
= CB
.isConvergent();
115 if (!Info
.CanLowerReturn
) {
116 // Callee requires sret demotion.
117 insertSRetOutgoingArgument(MIRBuilder
, CB
, Info
);
119 // The sret demotion isn't compatible with tail-calls, since the sret
120 // argument points into the caller's stack frame.
121 CanBeTailCalled
= false;
125 // First step is to marshall all the function's parameters into the correct
126 // physregs and memory locations. Gather the sequence of argument types that
127 // we'll pass to the assigner function.
129 unsigned NumFixedArgs
= CB
.getFunctionType()->getNumParams();
130 for (const auto &Arg
: CB
.args()) {
131 ArgInfo OrigArg
{ArgRegs
[i
], *Arg
.get(), i
, getAttributesForArgIdx(CB
, i
),
133 setArgFlags(OrigArg
, i
+ AttributeList::FirstArgIndex
, DL
, CB
);
135 // If we have an explicit sret argument that is an Instruction, (i.e., it
136 // might point to function-local memory), we can't meaningfully tail-call.
137 if (OrigArg
.Flags
[0].isSRet() && isa
<Instruction
>(&Arg
))
138 CanBeTailCalled
= false;
140 Info
.OrigArgs
.push_back(OrigArg
);
144 // Try looking through a bitcast from one function type to another.
145 // Commonly happens with calls to objc_msgSend().
146 const Value
*CalleeV
= CB
.getCalledOperand()->stripPointerCasts();
147 if (const Function
*F
= dyn_cast
<Function
>(CalleeV
))
148 Info
.Callee
= MachineOperand::CreateGA(F
, 0);
149 else if (isa
<GlobalIFunc
>(CalleeV
) || isa
<GlobalAlias
>(CalleeV
)) {
150 // IR IFuncs and Aliases can't be forward declared (only defined), so the
151 // callee must be in the same TU and therefore we can direct-call it without
152 // worrying about it being out of range.
153 Info
.Callee
= MachineOperand::CreateGA(cast
<GlobalValue
>(CalleeV
), 0);
155 Info
.Callee
= MachineOperand::CreateReg(GetCalleeReg(), false);
157 Register ReturnHintAlignReg
;
158 Align ReturnHintAlign
;
160 Info
.OrigRet
= ArgInfo
{ResRegs
, RetTy
, 0, getAttributesForReturn(CB
)};
162 if (!Info
.OrigRet
.Ty
->isVoidTy()) {
163 setArgFlags(Info
.OrigRet
, AttributeList::ReturnIndex
, DL
, CB
);
165 if (MaybeAlign Alignment
= CB
.getRetAlign()) {
166 if (*Alignment
> Align(1)) {
167 ReturnHintAlignReg
= MRI
.cloneVirtualRegister(ResRegs
[0]);
168 Info
.OrigRet
.Regs
[0] = ReturnHintAlignReg
;
169 ReturnHintAlign
= *Alignment
;
174 auto Bundle
= CB
.getOperandBundle(LLVMContext::OB_kcfi
);
175 if (Bundle
&& CB
.isIndirectCall()) {
176 Info
.CFIType
= cast
<ConstantInt
>(Bundle
->Inputs
[0]);
177 assert(Info
.CFIType
->getType()->isIntegerTy(32) && "Invalid CFI type");
181 Info
.KnownCallees
= CB
.getMetadata(LLVMContext::MD_callees
);
182 Info
.CallConv
= CallConv
;
183 Info
.SwiftErrorVReg
= SwiftErrorVReg
;
184 Info
.IsMustTailCall
= CB
.isMustTailCall();
185 Info
.IsTailCall
= CanBeTailCalled
;
186 Info
.IsVarArg
= IsVarArg
;
187 if (!lowerCall(MIRBuilder
, Info
))
190 if (ReturnHintAlignReg
&& !Info
.IsTailCall
) {
191 MIRBuilder
.buildAssertAlign(ResRegs
[0], ReturnHintAlignReg
,
198 template <typename FuncInfoTy
>
199 void CallLowering::setArgFlags(CallLowering::ArgInfo
&Arg
, unsigned OpIdx
,
200 const DataLayout
&DL
,
201 const FuncInfoTy
&FuncInfo
) const {
202 auto &Flags
= Arg
.Flags
[0];
203 const AttributeList
&Attrs
= FuncInfo
.getAttributes();
204 addArgFlagsFromAttributes(Flags
, Attrs
, OpIdx
);
206 PointerType
*PtrTy
= dyn_cast
<PointerType
>(Arg
.Ty
->getScalarType());
209 Flags
.setPointerAddrSpace(PtrTy
->getPointerAddressSpace());
212 Align MemAlign
= DL
.getABITypeAlign(Arg
.Ty
);
213 if (Flags
.isByVal() || Flags
.isInAlloca() || Flags
.isPreallocated()) {
214 assert(OpIdx
>= AttributeList::FirstArgIndex
);
215 unsigned ParamIdx
= OpIdx
- AttributeList::FirstArgIndex
;
217 Type
*ElementTy
= FuncInfo
.getParamByValType(ParamIdx
);
219 ElementTy
= FuncInfo
.getParamInAllocaType(ParamIdx
);
221 ElementTy
= FuncInfo
.getParamPreallocatedType(ParamIdx
);
222 assert(ElementTy
&& "Must have byval, inalloca or preallocated type");
223 Flags
.setByValSize(DL
.getTypeAllocSize(ElementTy
));
225 // For ByVal, alignment should be passed from FE. BE will guess if
226 // this info is not there but there are cases it cannot get right.
227 if (auto ParamAlign
= FuncInfo
.getParamStackAlign(ParamIdx
))
228 MemAlign
= *ParamAlign
;
229 else if ((ParamAlign
= FuncInfo
.getParamAlign(ParamIdx
)))
230 MemAlign
= *ParamAlign
;
232 MemAlign
= Align(getTLI()->getByValTypeAlignment(ElementTy
, DL
));
233 } else if (OpIdx
>= AttributeList::FirstArgIndex
) {
234 if (auto ParamAlign
=
235 FuncInfo
.getParamStackAlign(OpIdx
- AttributeList::FirstArgIndex
))
236 MemAlign
= *ParamAlign
;
238 Flags
.setMemAlign(MemAlign
);
239 Flags
.setOrigAlign(DL
.getABITypeAlign(Arg
.Ty
));
241 // Don't try to use the returned attribute if the argument is marked as
242 // swiftself, since it won't be passed in x0.
243 if (Flags
.isSwiftSelf())
244 Flags
.setReturned(false);
248 CallLowering::setArgFlags
<Function
>(CallLowering::ArgInfo
&Arg
, unsigned OpIdx
,
249 const DataLayout
&DL
,
250 const Function
&FuncInfo
) const;
253 CallLowering::setArgFlags
<CallBase
>(CallLowering::ArgInfo
&Arg
, unsigned OpIdx
,
254 const DataLayout
&DL
,
255 const CallBase
&FuncInfo
) const;
257 void CallLowering::splitToValueTypes(const ArgInfo
&OrigArg
,
258 SmallVectorImpl
<ArgInfo
> &SplitArgs
,
259 const DataLayout
&DL
,
260 CallingConv::ID CallConv
,
261 SmallVectorImpl
<uint64_t> *Offsets
) const {
262 LLVMContext
&Ctx
= OrigArg
.Ty
->getContext();
264 SmallVector
<EVT
, 4> SplitVTs
;
265 ComputeValueVTs(*TLI
, DL
, OrigArg
.Ty
, SplitVTs
, Offsets
, 0);
267 if (SplitVTs
.size() == 0)
270 if (SplitVTs
.size() == 1) {
271 // No splitting to do, but we want to replace the original type (e.g. [1 x
272 // double] -> double).
273 SplitArgs
.emplace_back(OrigArg
.Regs
[0], SplitVTs
[0].getTypeForEVT(Ctx
),
274 OrigArg
.OrigArgIndex
, OrigArg
.Flags
[0],
275 OrigArg
.IsFixed
, OrigArg
.OrigValue
);
279 // Create one ArgInfo for each virtual register in the original ArgInfo.
280 assert(OrigArg
.Regs
.size() == SplitVTs
.size() && "Regs / types mismatch");
282 bool NeedsRegBlock
= TLI
->functionArgumentNeedsConsecutiveRegisters(
283 OrigArg
.Ty
, CallConv
, false, DL
);
284 for (unsigned i
= 0, e
= SplitVTs
.size(); i
< e
; ++i
) {
285 Type
*SplitTy
= SplitVTs
[i
].getTypeForEVT(Ctx
);
286 SplitArgs
.emplace_back(OrigArg
.Regs
[i
], SplitTy
, OrigArg
.OrigArgIndex
,
287 OrigArg
.Flags
[0], OrigArg
.IsFixed
);
289 SplitArgs
.back().Flags
[0].setInConsecutiveRegs();
292 SplitArgs
.back().Flags
[0].setInConsecutiveRegsLast();
295 /// Pack values \p SrcRegs to cover the vector type result \p DstRegs.
296 static MachineInstrBuilder
297 mergeVectorRegsToResultRegs(MachineIRBuilder
&B
, ArrayRef
<Register
> DstRegs
,
298 ArrayRef
<Register
> SrcRegs
) {
299 MachineRegisterInfo
&MRI
= *B
.getMRI();
300 LLT LLTy
= MRI
.getType(DstRegs
[0]);
301 LLT PartLLT
= MRI
.getType(SrcRegs
[0]);
303 // Deal with v3s16 split into v2s16
304 LLT LCMTy
= getCoverTy(LLTy
, PartLLT
);
306 // Common case where no padding is needed.
307 assert(DstRegs
.size() == 1);
308 return B
.buildConcatVectors(DstRegs
[0], SrcRegs
);
311 // We need to create an unmerge to the result registers, which may require
312 // widening the original value.
313 Register UnmergeSrcReg
;
314 if (LCMTy
!= PartLLT
) {
315 assert(DstRegs
.size() == 1);
316 return B
.buildDeleteTrailingVectorElements(
317 DstRegs
[0], B
.buildMergeLikeInstr(LCMTy
, SrcRegs
));
319 // We don't need to widen anything if we're extracting a scalar which was
320 // promoted to a vector e.g. s8 -> v4s8 -> s8
321 assert(SrcRegs
.size() == 1);
322 UnmergeSrcReg
= SrcRegs
[0];
325 int NumDst
= LCMTy
.getSizeInBits() / LLTy
.getSizeInBits();
327 SmallVector
<Register
, 8> PadDstRegs(NumDst
);
328 std::copy(DstRegs
.begin(), DstRegs
.end(), PadDstRegs
.begin());
330 // Create the excess dead defs for the unmerge.
331 for (int I
= DstRegs
.size(); I
!= NumDst
; ++I
)
332 PadDstRegs
[I
] = MRI
.createGenericVirtualRegister(LLTy
);
334 if (PadDstRegs
.size() == 1)
335 return B
.buildDeleteTrailingVectorElements(DstRegs
[0], UnmergeSrcReg
);
336 return B
.buildUnmerge(PadDstRegs
, UnmergeSrcReg
);
339 /// Create a sequence of instructions to combine pieces split into register
340 /// typed values to the original IR value. \p OrigRegs contains the destination
341 /// value registers of type \p LLTy, and \p Regs contains the legalized pieces
342 /// with type \p PartLLT. This is used for incoming values (physregs to vregs).
343 static void buildCopyFromRegs(MachineIRBuilder
&B
, ArrayRef
<Register
> OrigRegs
,
344 ArrayRef
<Register
> Regs
, LLT LLTy
, LLT PartLLT
,
345 const ISD::ArgFlagsTy Flags
) {
346 MachineRegisterInfo
&MRI
= *B
.getMRI();
348 if (PartLLT
== LLTy
) {
349 // We should have avoided introducing a new virtual register, and just
350 // directly assigned here.
351 assert(OrigRegs
[0] == Regs
[0]);
355 if (PartLLT
.getSizeInBits() == LLTy
.getSizeInBits() && OrigRegs
.size() == 1 &&
357 B
.buildBitcast(OrigRegs
[0], Regs
[0]);
361 // A vector PartLLT needs extending to LLTy's element size.
362 // E.g. <2 x s64> = G_SEXT <2 x s32>.
363 if (PartLLT
.isVector() == LLTy
.isVector() &&
364 PartLLT
.getScalarSizeInBits() > LLTy
.getScalarSizeInBits() &&
365 (!PartLLT
.isVector() ||
366 PartLLT
.getElementCount() == LLTy
.getElementCount()) &&
367 OrigRegs
.size() == 1 && Regs
.size() == 1) {
368 Register SrcReg
= Regs
[0];
370 LLT LocTy
= MRI
.getType(SrcReg
);
372 if (Flags
.isSExt()) {
373 SrcReg
= B
.buildAssertSExt(LocTy
, SrcReg
, LLTy
.getScalarSizeInBits())
375 } else if (Flags
.isZExt()) {
376 SrcReg
= B
.buildAssertZExt(LocTy
, SrcReg
, LLTy
.getScalarSizeInBits())
380 // Sometimes pointers are passed zero extended.
381 LLT OrigTy
= MRI
.getType(OrigRegs
[0]);
382 if (OrigTy
.isPointer()) {
383 LLT IntPtrTy
= LLT::scalar(OrigTy
.getSizeInBits());
384 B
.buildIntToPtr(OrigRegs
[0], B
.buildTrunc(IntPtrTy
, SrcReg
));
388 B
.buildTrunc(OrigRegs
[0], SrcReg
);
392 if (!LLTy
.isVector() && !PartLLT
.isVector()) {
393 assert(OrigRegs
.size() == 1);
394 LLT OrigTy
= MRI
.getType(OrigRegs
[0]);
396 unsigned SrcSize
= PartLLT
.getSizeInBits().getFixedValue() * Regs
.size();
397 if (SrcSize
== OrigTy
.getSizeInBits())
398 B
.buildMergeValues(OrigRegs
[0], Regs
);
400 auto Widened
= B
.buildMergeLikeInstr(LLT::scalar(SrcSize
), Regs
);
401 B
.buildTrunc(OrigRegs
[0], Widened
);
407 if (PartLLT
.isVector()) {
408 assert(OrigRegs
.size() == 1);
409 SmallVector
<Register
> CastRegs(Regs
.begin(), Regs
.end());
411 // If PartLLT is a mismatched vector in both number of elements and element
412 // size, e.g. PartLLT == v2s64 and LLTy is v3s32, then first coerce it to
413 // have the same elt type, i.e. v4s32.
414 // TODO: Extend this coersion to element multiples other than just 2.
415 if (PartLLT
.getSizeInBits() > LLTy
.getSizeInBits() &&
416 PartLLT
.getScalarSizeInBits() == LLTy
.getScalarSizeInBits() * 2 &&
418 LLT NewTy
= PartLLT
.changeElementType(LLTy
.getElementType())
419 .changeElementCount(PartLLT
.getElementCount() * 2);
420 CastRegs
[0] = B
.buildBitcast(NewTy
, Regs
[0]).getReg(0);
424 if (LLTy
.getScalarType() == PartLLT
.getElementType()) {
425 mergeVectorRegsToResultRegs(B
, OrigRegs
, CastRegs
);
428 LLT GCDTy
= getGCDType(LLTy
, PartLLT
);
430 // We are both splitting a vector, and bitcasting its element types. Cast
431 // the source pieces into the appropriate number of pieces with the result
433 for (Register SrcReg
: CastRegs
)
434 CastRegs
[I
++] = B
.buildBitcast(GCDTy
, SrcReg
).getReg(0);
435 mergeVectorRegsToResultRegs(B
, OrigRegs
, CastRegs
);
441 assert(LLTy
.isVector() && !PartLLT
.isVector());
443 LLT DstEltTy
= LLTy
.getElementType();
445 // Pointer information was discarded. We'll need to coerce some register types
446 // to avoid violating type constraints.
447 LLT RealDstEltTy
= MRI
.getType(OrigRegs
[0]).getElementType();
449 assert(DstEltTy
.getSizeInBits() == RealDstEltTy
.getSizeInBits());
451 if (DstEltTy
== PartLLT
) {
452 // Vector was trivially scalarized.
454 if (RealDstEltTy
.isPointer()) {
455 for (Register Reg
: Regs
)
456 MRI
.setType(Reg
, RealDstEltTy
);
459 B
.buildBuildVector(OrigRegs
[0], Regs
);
460 } else if (DstEltTy
.getSizeInBits() > PartLLT
.getSizeInBits()) {
461 // Deal with vector with 64-bit elements decomposed to 32-bit
462 // registers. Need to create intermediate 64-bit elements.
463 SmallVector
<Register
, 8> EltMerges
;
464 int PartsPerElt
= DstEltTy
.getSizeInBits() / PartLLT
.getSizeInBits();
466 assert(DstEltTy
.getSizeInBits() % PartLLT
.getSizeInBits() == 0);
468 for (int I
= 0, NumElts
= LLTy
.getNumElements(); I
!= NumElts
; ++I
) {
470 B
.buildMergeLikeInstr(RealDstEltTy
, Regs
.take_front(PartsPerElt
));
471 // Fix the type in case this is really a vector of pointers.
472 MRI
.setType(Merge
.getReg(0), RealDstEltTy
);
473 EltMerges
.push_back(Merge
.getReg(0));
474 Regs
= Regs
.drop_front(PartsPerElt
);
477 B
.buildBuildVector(OrigRegs
[0], EltMerges
);
479 // Vector was split, and elements promoted to a wider type.
480 // FIXME: Should handle floating point promotions.
481 unsigned NumElts
= LLTy
.getNumElements();
482 LLT BVType
= LLT::fixed_vector(NumElts
, PartLLT
);
485 if (NumElts
== Regs
.size())
486 BuildVec
= B
.buildBuildVector(BVType
, Regs
).getReg(0);
488 // Vector elements are packed in the inputs.
489 // e.g. we have a <4 x s16> but 2 x s32 in regs.
490 assert(NumElts
> Regs
.size());
491 LLT SrcEltTy
= MRI
.getType(Regs
[0]);
493 LLT OriginalEltTy
= MRI
.getType(OrigRegs
[0]).getElementType();
495 // Input registers contain packed elements.
496 // Determine how many elements per reg.
497 assert((SrcEltTy
.getSizeInBits() % OriginalEltTy
.getSizeInBits()) == 0);
499 (SrcEltTy
.getSizeInBits() / OriginalEltTy
.getSizeInBits());
501 SmallVector
<Register
, 0> BVRegs
;
502 BVRegs
.reserve(Regs
.size() * EltPerReg
);
503 for (Register R
: Regs
) {
504 auto Unmerge
= B
.buildUnmerge(OriginalEltTy
, R
);
505 for (unsigned K
= 0; K
< EltPerReg
; ++K
)
506 BVRegs
.push_back(B
.buildAnyExt(PartLLT
, Unmerge
.getReg(K
)).getReg(0));
509 // We may have some more elements in BVRegs, e.g. if we have 2 s32 pieces
510 // for a <3 x s16> vector. We should have less than EltPerReg extra items.
511 if (BVRegs
.size() > NumElts
) {
512 assert((BVRegs
.size() - NumElts
) < EltPerReg
);
513 BVRegs
.truncate(NumElts
);
515 BuildVec
= B
.buildBuildVector(BVType
, BVRegs
).getReg(0);
517 B
.buildTrunc(OrigRegs
[0], BuildVec
);
521 /// Create a sequence of instructions to expand the value in \p SrcReg (of type
522 /// \p SrcTy) to the types in \p DstRegs (of type \p PartTy). \p ExtendOp should
523 /// contain the type of scalar value extension if necessary.
525 /// This is used for outgoing values (vregs to physregs)
526 static void buildCopyToRegs(MachineIRBuilder
&B
, ArrayRef
<Register
> DstRegs
,
527 Register SrcReg
, LLT SrcTy
, LLT PartTy
,
528 unsigned ExtendOp
= TargetOpcode::G_ANYEXT
) {
529 // We could just insert a regular copy, but this is unreachable at the moment.
530 assert(SrcTy
!= PartTy
&& "identical part types shouldn't reach here");
532 const unsigned PartSize
= PartTy
.getSizeInBits();
534 if (PartTy
.isVector() == SrcTy
.isVector() &&
535 PartTy
.getScalarSizeInBits() > SrcTy
.getScalarSizeInBits()) {
536 assert(DstRegs
.size() == 1);
537 B
.buildInstr(ExtendOp
, {DstRegs
[0]}, {SrcReg
});
541 if (SrcTy
.isVector() && !PartTy
.isVector() &&
542 PartSize
> SrcTy
.getElementType().getSizeInBits()) {
543 // Vector was scalarized, and the elements extended.
544 auto UnmergeToEltTy
= B
.buildUnmerge(SrcTy
.getElementType(), SrcReg
);
545 for (int i
= 0, e
= DstRegs
.size(); i
!= e
; ++i
)
546 B
.buildAnyExt(DstRegs
[i
], UnmergeToEltTy
.getReg(i
));
550 if (SrcTy
.isVector() && PartTy
.isVector() &&
551 PartTy
.getScalarSizeInBits() == SrcTy
.getScalarSizeInBits() &&
552 SrcTy
.getNumElements() < PartTy
.getNumElements()) {
553 // A coercion like: v2f32 -> v4f32.
554 Register DstReg
= DstRegs
.front();
555 B
.buildPadVectorWithUndefElements(DstReg
, SrcReg
);
559 LLT GCDTy
= getGCDType(SrcTy
, PartTy
);
560 if (GCDTy
== PartTy
) {
561 // If this already evenly divisible, we can create a simple unmerge.
562 B
.buildUnmerge(DstRegs
, SrcReg
);
566 MachineRegisterInfo
&MRI
= *B
.getMRI();
567 LLT DstTy
= MRI
.getType(DstRegs
[0]);
568 LLT LCMTy
= getCoverTy(SrcTy
, PartTy
);
570 if (PartTy
.isVector() && LCMTy
== PartTy
) {
571 assert(DstRegs
.size() == 1);
572 B
.buildPadVectorWithUndefElements(DstRegs
[0], SrcReg
);
576 const unsigned DstSize
= DstTy
.getSizeInBits();
577 const unsigned SrcSize
= SrcTy
.getSizeInBits();
578 unsigned CoveringSize
= LCMTy
.getSizeInBits();
580 Register UnmergeSrc
= SrcReg
;
582 if (!LCMTy
.isVector() && CoveringSize
!= SrcSize
) {
583 // For scalars, it's common to be able to use a simple extension.
584 if (SrcTy
.isScalar() && DstTy
.isScalar()) {
585 CoveringSize
= alignTo(SrcSize
, DstSize
);
586 LLT CoverTy
= LLT::scalar(CoveringSize
);
587 UnmergeSrc
= B
.buildInstr(ExtendOp
, {CoverTy
}, {SrcReg
}).getReg(0);
589 // Widen to the common type.
590 // FIXME: This should respect the extend type
591 Register Undef
= B
.buildUndef(SrcTy
).getReg(0);
592 SmallVector
<Register
, 8> MergeParts(1, SrcReg
);
593 for (unsigned Size
= SrcSize
; Size
!= CoveringSize
; Size
+= SrcSize
)
594 MergeParts
.push_back(Undef
);
595 UnmergeSrc
= B
.buildMergeLikeInstr(LCMTy
, MergeParts
).getReg(0);
599 if (LCMTy
.isVector() && CoveringSize
!= SrcSize
)
600 UnmergeSrc
= B
.buildPadVectorWithUndefElements(LCMTy
, SrcReg
).getReg(0);
602 B
.buildUnmerge(DstRegs
, UnmergeSrc
);
605 bool CallLowering::determineAndHandleAssignments(
606 ValueHandler
&Handler
, ValueAssigner
&Assigner
,
607 SmallVectorImpl
<ArgInfo
> &Args
, MachineIRBuilder
&MIRBuilder
,
608 CallingConv::ID CallConv
, bool IsVarArg
,
609 ArrayRef
<Register
> ThisReturnRegs
) const {
610 MachineFunction
&MF
= MIRBuilder
.getMF();
611 const Function
&F
= MF
.getFunction();
612 SmallVector
<CCValAssign
, 16> ArgLocs
;
614 CCState
CCInfo(CallConv
, IsVarArg
, MF
, ArgLocs
, F
.getContext());
615 if (!determineAssignments(Assigner
, Args
, CCInfo
))
618 return handleAssignments(Handler
, Args
, CCInfo
, ArgLocs
, MIRBuilder
,
622 static unsigned extendOpFromFlags(llvm::ISD::ArgFlagsTy Flags
) {
624 return TargetOpcode::G_SEXT
;
626 return TargetOpcode::G_ZEXT
;
627 return TargetOpcode::G_ANYEXT
;
630 bool CallLowering::determineAssignments(ValueAssigner
&Assigner
,
631 SmallVectorImpl
<ArgInfo
> &Args
,
632 CCState
&CCInfo
) const {
633 LLVMContext
&Ctx
= CCInfo
.getContext();
634 const CallingConv::ID CallConv
= CCInfo
.getCallingConv();
636 unsigned NumArgs
= Args
.size();
637 for (unsigned i
= 0; i
!= NumArgs
; ++i
) {
638 EVT CurVT
= EVT::getEVT(Args
[i
].Ty
);
640 MVT NewVT
= TLI
->getRegisterTypeForCallingConv(Ctx
, CallConv
, CurVT
);
642 // If we need to split the type over multiple regs, check it's a scenario
643 // we currently support.
645 TLI
->getNumRegistersForCallingConv(Ctx
, CallConv
, CurVT
);
648 // Try to use the register type if we couldn't assign the VT.
649 if (Assigner
.assignArg(i
, CurVT
, NewVT
, NewVT
, CCValAssign::Full
, Args
[i
],
650 Args
[i
].Flags
[0], CCInfo
))
655 // For incoming arguments (physregs to vregs), we could have values in
656 // physregs (or memlocs) which we want to extract and copy to vregs.
657 // During this, we might have to deal with the LLT being split across
658 // multiple regs, so we have to record this information for later.
660 // If we have outgoing args, then we have the opposite case. We have a
661 // vreg with an LLT which we want to assign to a physical location, and
662 // we might have to record that the value has to be split later.
664 // We're handling an incoming arg which is split over multiple regs.
665 // E.g. passing an s128 on AArch64.
666 ISD::ArgFlagsTy OrigFlags
= Args
[i
].Flags
[0];
667 Args
[i
].Flags
.clear();
669 for (unsigned Part
= 0; Part
< NumParts
; ++Part
) {
670 ISD::ArgFlagsTy Flags
= OrigFlags
;
674 Flags
.setOrigAlign(Align(1));
675 if (Part
== NumParts
- 1)
679 Args
[i
].Flags
.push_back(Flags
);
680 if (Assigner
.assignArg(i
, CurVT
, NewVT
, NewVT
, CCValAssign::Full
, Args
[i
],
681 Args
[i
].Flags
[Part
], CCInfo
)) {
682 // Still couldn't assign this smaller part type for some reason.
691 bool CallLowering::handleAssignments(ValueHandler
&Handler
,
692 SmallVectorImpl
<ArgInfo
> &Args
,
694 SmallVectorImpl
<CCValAssign
> &ArgLocs
,
695 MachineIRBuilder
&MIRBuilder
,
696 ArrayRef
<Register
> ThisReturnRegs
) const {
697 MachineFunction
&MF
= MIRBuilder
.getMF();
698 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
699 const Function
&F
= MF
.getFunction();
700 const DataLayout
&DL
= F
.getParent()->getDataLayout();
702 const unsigned NumArgs
= Args
.size();
704 // Stores thunks for outgoing register assignments. This is used so we delay
705 // generating register copies until mem loc assignments are done. We do this
706 // so that if the target is using the delayed stack protector feature, we can
707 // find the split point of the block accurately. E.g. if we have:
708 // G_STORE %val, %memloc
712 // ... then the split point for the block will correctly be at, and including,
713 // the copy to $x0. If instead the G_STORE instruction immediately precedes
714 // the CALL, then we'd prematurely choose the CALL as the split point, thus
715 // generating a split block with a CALL that uses undefined physregs.
716 SmallVector
<std::function
<void()>> DelayedOutgoingRegAssignments
;
718 for (unsigned i
= 0, j
= 0; i
!= NumArgs
; ++i
, ++j
) {
719 assert(j
< ArgLocs
.size() && "Skipped too many arg locs");
720 CCValAssign
&VA
= ArgLocs
[j
];
721 assert(VA
.getValNo() == i
&& "Location doesn't correspond to current arg");
723 if (VA
.needsCustom()) {
724 std::function
<void()> Thunk
;
725 unsigned NumArgRegs
= Handler
.assignCustomValue(
726 Args
[i
], ArrayRef(ArgLocs
).slice(j
), &Thunk
);
728 DelayedOutgoingRegAssignments
.emplace_back(Thunk
);
731 j
+= (NumArgRegs
- 1);
735 const MVT ValVT
= VA
.getValVT();
736 const MVT LocVT
= VA
.getLocVT();
738 const LLT
LocTy(LocVT
);
739 const LLT
ValTy(ValVT
);
740 const LLT NewLLT
= Handler
.isIncomingArgumentHandler() ? LocTy
: ValTy
;
741 const EVT OrigVT
= EVT::getEVT(Args
[i
].Ty
);
742 const LLT OrigTy
= getLLTForType(*Args
[i
].Ty
, DL
);
744 // Expected to be multiple regs for a single incoming arg.
745 // There should be Regs.size() ArgLocs per argument.
746 // This should be the same as getNumRegistersForCallingConv
747 const unsigned NumParts
= Args
[i
].Flags
.size();
749 // Now split the registers into the assigned types.
750 Args
[i
].OrigRegs
.assign(Args
[i
].Regs
.begin(), Args
[i
].Regs
.end());
752 if (NumParts
!= 1 || NewLLT
!= OrigTy
) {
753 // If we can't directly assign the register, we need one or more
754 // intermediate values.
755 Args
[i
].Regs
.resize(NumParts
);
757 // For each split register, create and assign a vreg that will store
758 // the incoming component of the larger value. These will later be
759 // merged to form the final vreg.
760 for (unsigned Part
= 0; Part
< NumParts
; ++Part
)
761 Args
[i
].Regs
[Part
] = MRI
.createGenericVirtualRegister(NewLLT
);
764 assert((j
+ (NumParts
- 1)) < ArgLocs
.size() &&
765 "Too many regs for number of args");
767 // Coerce into outgoing value types before register assignment.
768 if (!Handler
.isIncomingArgumentHandler() && OrigTy
!= ValTy
) {
769 assert(Args
[i
].OrigRegs
.size() == 1);
770 buildCopyToRegs(MIRBuilder
, Args
[i
].Regs
, Args
[i
].OrigRegs
[0], OrigTy
,
771 ValTy
, extendOpFromFlags(Args
[i
].Flags
[0]));
774 bool BigEndianPartOrdering
= TLI
->hasBigEndianPartOrdering(OrigVT
, DL
);
775 for (unsigned Part
= 0; Part
< NumParts
; ++Part
) {
776 Register ArgReg
= Args
[i
].Regs
[Part
];
777 // There should be Regs.size() ArgLocs per argument.
778 unsigned Idx
= BigEndianPartOrdering
? NumParts
- 1 - Part
: Part
;
779 CCValAssign
&VA
= ArgLocs
[j
+ Idx
];
780 const ISD::ArgFlagsTy Flags
= Args
[i
].Flags
[Part
];
782 if (VA
.isMemLoc() && !Flags
.isByVal()) {
783 // Individual pieces may have been spilled to the stack and others
784 // passed in registers.
786 // TODO: The memory size may be larger than the value we need to
787 // store. We may need to adjust the offset for big endian targets.
788 LLT MemTy
= Handler
.getStackValueStoreType(DL
, VA
, Flags
);
790 MachinePointerInfo MPO
;
791 Register StackAddr
= Handler
.getStackAddress(
792 MemTy
.getSizeInBytes(), VA
.getLocMemOffset(), MPO
, Flags
);
794 Handler
.assignValueToAddress(Args
[i
], Part
, StackAddr
, MemTy
, MPO
, VA
);
798 if (VA
.isMemLoc() && Flags
.isByVal()) {
799 assert(Args
[i
].Regs
.size() == 1 &&
800 "didn't expect split byval pointer");
802 if (Handler
.isIncomingArgumentHandler()) {
803 // We just need to copy the frame index value to the pointer.
804 MachinePointerInfo MPO
;
805 Register StackAddr
= Handler
.getStackAddress(
806 Flags
.getByValSize(), VA
.getLocMemOffset(), MPO
, Flags
);
807 MIRBuilder
.buildCopy(Args
[i
].Regs
[0], StackAddr
);
809 // For outgoing byval arguments, insert the implicit copy byval
810 // implies, such that writes in the callee do not modify the caller's
812 uint64_t MemSize
= Flags
.getByValSize();
813 int64_t Offset
= VA
.getLocMemOffset();
815 MachinePointerInfo DstMPO
;
817 Handler
.getStackAddress(MemSize
, Offset
, DstMPO
, Flags
);
819 MachinePointerInfo
SrcMPO(Args
[i
].OrigValue
);
820 if (!Args
[i
].OrigValue
) {
821 // We still need to accurately track the stack address space if we
822 // don't know the underlying value.
823 const LLT PtrTy
= MRI
.getType(StackAddr
);
824 SrcMPO
= MachinePointerInfo(PtrTy
.getAddressSpace());
827 Align DstAlign
= std::max(Flags
.getNonZeroByValAlign(),
828 inferAlignFromPtrInfo(MF
, DstMPO
));
830 Align SrcAlign
= std::max(Flags
.getNonZeroByValAlign(),
831 inferAlignFromPtrInfo(MF
, SrcMPO
));
833 Handler
.copyArgumentMemory(Args
[i
], StackAddr
, Args
[i
].Regs
[0],
834 DstMPO
, DstAlign
, SrcMPO
, SrcAlign
,
840 assert(!VA
.needsCustom() && "custom loc should have been handled already");
842 if (i
== 0 && !ThisReturnRegs
.empty() &&
843 Handler
.isIncomingArgumentHandler() &&
844 isTypeIsValidForThisReturn(ValVT
)) {
845 Handler
.assignValueToReg(ArgReg
, ThisReturnRegs
[Part
], VA
);
849 if (Handler
.isIncomingArgumentHandler())
850 Handler
.assignValueToReg(ArgReg
, VA
.getLocReg(), VA
);
852 DelayedOutgoingRegAssignments
.emplace_back([=, &Handler
]() {
853 Handler
.assignValueToReg(ArgReg
, VA
.getLocReg(), VA
);
858 // Now that all pieces have been assigned, re-pack the register typed values
859 // into the original value typed registers.
860 if (Handler
.isIncomingArgumentHandler() && OrigVT
!= LocVT
) {
861 // Merge the split registers into the expected larger result vregs of
862 // the original call.
863 buildCopyFromRegs(MIRBuilder
, Args
[i
].OrigRegs
, Args
[i
].Regs
, OrigTy
,
864 LocTy
, Args
[i
].Flags
[0]);
869 for (auto &Fn
: DelayedOutgoingRegAssignments
)
875 void CallLowering::insertSRetLoads(MachineIRBuilder
&MIRBuilder
, Type
*RetTy
,
876 ArrayRef
<Register
> VRegs
, Register DemoteReg
,
878 MachineFunction
&MF
= MIRBuilder
.getMF();
879 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
880 const DataLayout
&DL
= MF
.getDataLayout();
882 SmallVector
<EVT
, 4> SplitVTs
;
883 SmallVector
<uint64_t, 4> Offsets
;
884 ComputeValueVTs(*TLI
, DL
, RetTy
, SplitVTs
, &Offsets
, 0);
886 assert(VRegs
.size() == SplitVTs
.size());
888 unsigned NumValues
= SplitVTs
.size();
889 Align BaseAlign
= DL
.getPrefTypeAlign(RetTy
);
891 PointerType::get(RetTy
->getContext(), DL
.getAllocaAddrSpace());
892 LLT OffsetLLTy
= getLLTForType(*DL
.getIndexType(RetPtrTy
), DL
);
894 MachinePointerInfo PtrInfo
= MachinePointerInfo::getFixedStack(MF
, FI
);
896 for (unsigned I
= 0; I
< NumValues
; ++I
) {
898 MIRBuilder
.materializePtrAdd(Addr
, DemoteReg
, OffsetLLTy
, Offsets
[I
]);
899 auto *MMO
= MF
.getMachineMemOperand(PtrInfo
, MachineMemOperand::MOLoad
,
900 MRI
.getType(VRegs
[I
]),
901 commonAlignment(BaseAlign
, Offsets
[I
]));
902 MIRBuilder
.buildLoad(VRegs
[I
], Addr
, *MMO
);
906 void CallLowering::insertSRetStores(MachineIRBuilder
&MIRBuilder
, Type
*RetTy
,
907 ArrayRef
<Register
> VRegs
,
908 Register DemoteReg
) const {
909 MachineFunction
&MF
= MIRBuilder
.getMF();
910 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
911 const DataLayout
&DL
= MF
.getDataLayout();
913 SmallVector
<EVT
, 4> SplitVTs
;
914 SmallVector
<uint64_t, 4> Offsets
;
915 ComputeValueVTs(*TLI
, DL
, RetTy
, SplitVTs
, &Offsets
, 0);
917 assert(VRegs
.size() == SplitVTs
.size());
919 unsigned NumValues
= SplitVTs
.size();
920 Align BaseAlign
= DL
.getPrefTypeAlign(RetTy
);
921 unsigned AS
= DL
.getAllocaAddrSpace();
922 LLT OffsetLLTy
= getLLTForType(*DL
.getIndexType(RetTy
->getPointerTo(AS
)), DL
);
924 MachinePointerInfo
PtrInfo(AS
);
926 for (unsigned I
= 0; I
< NumValues
; ++I
) {
928 MIRBuilder
.materializePtrAdd(Addr
, DemoteReg
, OffsetLLTy
, Offsets
[I
]);
929 auto *MMO
= MF
.getMachineMemOperand(PtrInfo
, MachineMemOperand::MOStore
,
930 MRI
.getType(VRegs
[I
]),
931 commonAlignment(BaseAlign
, Offsets
[I
]));
932 MIRBuilder
.buildStore(VRegs
[I
], Addr
, *MMO
);
936 void CallLowering::insertSRetIncomingArgument(
937 const Function
&F
, SmallVectorImpl
<ArgInfo
> &SplitArgs
, Register
&DemoteReg
,
938 MachineRegisterInfo
&MRI
, const DataLayout
&DL
) const {
939 unsigned AS
= DL
.getAllocaAddrSpace();
940 DemoteReg
= MRI
.createGenericVirtualRegister(
941 LLT::pointer(AS
, DL
.getPointerSizeInBits(AS
)));
943 Type
*PtrTy
= PointerType::get(F
.getReturnType(), AS
);
945 SmallVector
<EVT
, 1> ValueVTs
;
946 ComputeValueVTs(*TLI
, DL
, PtrTy
, ValueVTs
);
948 // NOTE: Assume that a pointer won't get split into more than one VT.
949 assert(ValueVTs
.size() == 1);
951 ArgInfo
DemoteArg(DemoteReg
, ValueVTs
[0].getTypeForEVT(PtrTy
->getContext()),
952 ArgInfo::NoArgIndex
);
953 setArgFlags(DemoteArg
, AttributeList::ReturnIndex
, DL
, F
);
954 DemoteArg
.Flags
[0].setSRet();
955 SplitArgs
.insert(SplitArgs
.begin(), DemoteArg
);
958 void CallLowering::insertSRetOutgoingArgument(MachineIRBuilder
&MIRBuilder
,
960 CallLoweringInfo
&Info
) const {
961 const DataLayout
&DL
= MIRBuilder
.getDataLayout();
962 Type
*RetTy
= CB
.getType();
963 unsigned AS
= DL
.getAllocaAddrSpace();
964 LLT FramePtrTy
= LLT::pointer(AS
, DL
.getPointerSizeInBits(AS
));
966 int FI
= MIRBuilder
.getMF().getFrameInfo().CreateStackObject(
967 DL
.getTypeAllocSize(RetTy
), DL
.getPrefTypeAlign(RetTy
), false);
969 Register DemoteReg
= MIRBuilder
.buildFrameIndex(FramePtrTy
, FI
).getReg(0);
970 ArgInfo
DemoteArg(DemoteReg
, PointerType::get(RetTy
, AS
),
971 ArgInfo::NoArgIndex
);
972 setArgFlags(DemoteArg
, AttributeList::ReturnIndex
, DL
, CB
);
973 DemoteArg
.Flags
[0].setSRet();
975 Info
.OrigArgs
.insert(Info
.OrigArgs
.begin(), DemoteArg
);
976 Info
.DemoteStackIndex
= FI
;
977 Info
.DemoteRegister
= DemoteReg
;
980 bool CallLowering::checkReturn(CCState
&CCInfo
,
981 SmallVectorImpl
<BaseArgInfo
> &Outs
,
982 CCAssignFn
*Fn
) const {
983 for (unsigned I
= 0, E
= Outs
.size(); I
< E
; ++I
) {
984 MVT VT
= MVT::getVT(Outs
[I
].Ty
);
985 if (Fn(I
, VT
, VT
, CCValAssign::Full
, Outs
[I
].Flags
[0], CCInfo
))
991 void CallLowering::getReturnInfo(CallingConv::ID CallConv
, Type
*RetTy
,
993 SmallVectorImpl
<BaseArgInfo
> &Outs
,
994 const DataLayout
&DL
) const {
995 LLVMContext
&Context
= RetTy
->getContext();
996 ISD::ArgFlagsTy Flags
= ISD::ArgFlagsTy();
998 SmallVector
<EVT
, 4> SplitVTs
;
999 ComputeValueVTs(*TLI
, DL
, RetTy
, SplitVTs
);
1000 addArgFlagsFromAttributes(Flags
, Attrs
, AttributeList::ReturnIndex
);
1002 for (EVT VT
: SplitVTs
) {
1004 TLI
->getNumRegistersForCallingConv(Context
, CallConv
, VT
);
1005 MVT RegVT
= TLI
->getRegisterTypeForCallingConv(Context
, CallConv
, VT
);
1006 Type
*PartTy
= EVT(RegVT
).getTypeForEVT(Context
);
1008 for (unsigned I
= 0; I
< NumParts
; ++I
) {
1009 Outs
.emplace_back(PartTy
, Flags
);
1014 bool CallLowering::checkReturnTypeForCallConv(MachineFunction
&MF
) const {
1015 const auto &F
= MF
.getFunction();
1016 Type
*ReturnType
= F
.getReturnType();
1017 CallingConv::ID CallConv
= F
.getCallingConv();
1019 SmallVector
<BaseArgInfo
, 4> SplitArgs
;
1020 getReturnInfo(CallConv
, ReturnType
, F
.getAttributes(), SplitArgs
,
1021 MF
.getDataLayout());
1022 return canLowerReturn(MF
, CallConv
, SplitArgs
, F
.isVarArg());
1025 bool CallLowering::parametersInCSRMatch(
1026 const MachineRegisterInfo
&MRI
, const uint32_t *CallerPreservedMask
,
1027 const SmallVectorImpl
<CCValAssign
> &OutLocs
,
1028 const SmallVectorImpl
<ArgInfo
> &OutArgs
) const {
1029 for (unsigned i
= 0; i
< OutLocs
.size(); ++i
) {
1030 const auto &ArgLoc
= OutLocs
[i
];
1031 // If it's not a register, it's fine.
1032 if (!ArgLoc
.isRegLoc())
1035 MCRegister PhysReg
= ArgLoc
.getLocReg();
1037 // Only look at callee-saved registers.
1038 if (MachineOperand::clobbersPhysReg(CallerPreservedMask
, PhysReg
))
1043 << "... Call has an argument passed in a callee-saved register.\n");
1045 // Check if it was copied from.
1046 const ArgInfo
&OutInfo
= OutArgs
[i
];
1048 if (OutInfo
.Regs
.size() > 1) {
1050 dbgs() << "... Cannot handle arguments in multiple registers.\n");
1054 // Check if we copy the register, walking through copies from virtual
1055 // registers. Note that getDefIgnoringCopies does not ignore copies from
1056 // physical registers.
1057 MachineInstr
*RegDef
= getDefIgnoringCopies(OutInfo
.Regs
[0], MRI
);
1058 if (!RegDef
|| RegDef
->getOpcode() != TargetOpcode::COPY
) {
1061 << "... Parameter was not copied into a VReg, cannot tail call.\n");
1065 // Got a copy. Verify that it's the same as the register we want.
1066 Register CopyRHS
= RegDef
->getOperand(1).getReg();
1067 if (CopyRHS
!= PhysReg
) {
1068 LLVM_DEBUG(dbgs() << "... Callee-saved register was not copied into "
1069 "VReg, cannot tail call.\n");
1077 bool CallLowering::resultsCompatible(CallLoweringInfo
&Info
,
1078 MachineFunction
&MF
,
1079 SmallVectorImpl
<ArgInfo
> &InArgs
,
1080 ValueAssigner
&CalleeAssigner
,
1081 ValueAssigner
&CallerAssigner
) const {
1082 const Function
&F
= MF
.getFunction();
1083 CallingConv::ID CalleeCC
= Info
.CallConv
;
1084 CallingConv::ID CallerCC
= F
.getCallingConv();
1086 if (CallerCC
== CalleeCC
)
1089 SmallVector
<CCValAssign
, 16> ArgLocs1
;
1090 CCState
CCInfo1(CalleeCC
, Info
.IsVarArg
, MF
, ArgLocs1
, F
.getContext());
1091 if (!determineAssignments(CalleeAssigner
, InArgs
, CCInfo1
))
1094 SmallVector
<CCValAssign
, 16> ArgLocs2
;
1095 CCState
CCInfo2(CallerCC
, F
.isVarArg(), MF
, ArgLocs2
, F
.getContext());
1096 if (!determineAssignments(CallerAssigner
, InArgs
, CCInfo2
))
1099 // We need the argument locations to match up exactly. If there's more in
1100 // one than the other, then we are done.
1101 if (ArgLocs1
.size() != ArgLocs2
.size())
1104 // Make sure that each location is passed in exactly the same way.
1105 for (unsigned i
= 0, e
= ArgLocs1
.size(); i
< e
; ++i
) {
1106 const CCValAssign
&Loc1
= ArgLocs1
[i
];
1107 const CCValAssign
&Loc2
= ArgLocs2
[i
];
1109 // We need both of them to be the same. So if one is a register and one
1110 // isn't, we're done.
1111 if (Loc1
.isRegLoc() != Loc2
.isRegLoc())
1114 if (Loc1
.isRegLoc()) {
1115 // If they don't have the same register location, we're done.
1116 if (Loc1
.getLocReg() != Loc2
.getLocReg())
1119 // They matched, so we can move to the next ArgLoc.
1123 // Loc1 wasn't a RegLoc, so they both must be MemLocs. Check if they match.
1124 if (Loc1
.getLocMemOffset() != Loc2
.getLocMemOffset())
1131 LLT
CallLowering::ValueHandler::getStackValueStoreType(
1132 const DataLayout
&DL
, const CCValAssign
&VA
, ISD::ArgFlagsTy Flags
) const {
1133 const MVT ValVT
= VA
.getValVT();
1134 if (ValVT
!= MVT::iPTR
) {
1137 // We lost the pointeriness going through CCValAssign, so try to restore it
1138 // based on the flags.
1139 if (Flags
.isPointer()) {
1140 LLT PtrTy
= LLT::pointer(Flags
.getPointerAddrSpace(),
1141 ValTy
.getScalarSizeInBits());
1142 if (ValVT
.isVector())
1143 return LLT::vector(ValTy
.getElementCount(), PtrTy
);
1150 unsigned AddrSpace
= Flags
.getPointerAddrSpace();
1151 return LLT::pointer(AddrSpace
, DL
.getPointerSize(AddrSpace
));
1154 void CallLowering::ValueHandler::copyArgumentMemory(
1155 const ArgInfo
&Arg
, Register DstPtr
, Register SrcPtr
,
1156 const MachinePointerInfo
&DstPtrInfo
, Align DstAlign
,
1157 const MachinePointerInfo
&SrcPtrInfo
, Align SrcAlign
, uint64_t MemSize
,
1158 CCValAssign
&VA
) const {
1159 MachineFunction
&MF
= MIRBuilder
.getMF();
1160 MachineMemOperand
*SrcMMO
= MF
.getMachineMemOperand(
1162 MachineMemOperand::MOLoad
| MachineMemOperand::MODereferenceable
, MemSize
,
1165 MachineMemOperand
*DstMMO
= MF
.getMachineMemOperand(
1167 MachineMemOperand::MOStore
| MachineMemOperand::MODereferenceable
,
1170 const LLT PtrTy
= MRI
.getType(DstPtr
);
1171 const LLT SizeTy
= LLT::scalar(PtrTy
.getSizeInBits());
1173 auto SizeConst
= MIRBuilder
.buildConstant(SizeTy
, MemSize
);
1174 MIRBuilder
.buildMemCpy(DstPtr
, SrcPtr
, SizeConst
, *DstMMO
, *SrcMMO
);
1177 Register
CallLowering::ValueHandler::extendRegister(Register ValReg
,
1178 const CCValAssign
&VA
,
1179 unsigned MaxSizeBits
) {
1180 LLT LocTy
{VA
.getLocVT()};
1181 LLT ValTy
{VA
.getValVT()};
1183 if (LocTy
.getSizeInBits() == ValTy
.getSizeInBits())
1186 if (LocTy
.isScalar() && MaxSizeBits
&& MaxSizeBits
< LocTy
.getSizeInBits()) {
1187 if (MaxSizeBits
<= ValTy
.getSizeInBits())
1189 LocTy
= LLT::scalar(MaxSizeBits
);
1192 const LLT ValRegTy
= MRI
.getType(ValReg
);
1193 if (ValRegTy
.isPointer()) {
1194 // The x32 ABI wants to zero extend 32-bit pointers to 64-bit registers, so
1195 // we have to cast to do the extension.
1196 LLT IntPtrTy
= LLT::scalar(ValRegTy
.getSizeInBits());
1197 ValReg
= MIRBuilder
.buildPtrToInt(IntPtrTy
, ValReg
).getReg(0);
1200 switch (VA
.getLocInfo()) {
1202 case CCValAssign::Full
:
1203 case CCValAssign::BCvt
:
1204 // FIXME: bitconverting between vector types may or may not be a
1205 // nop in big-endian situations.
1207 case CCValAssign::AExt
: {
1208 auto MIB
= MIRBuilder
.buildAnyExt(LocTy
, ValReg
);
1209 return MIB
.getReg(0);
1211 case CCValAssign::SExt
: {
1212 Register NewReg
= MRI
.createGenericVirtualRegister(LocTy
);
1213 MIRBuilder
.buildSExt(NewReg
, ValReg
);
1216 case CCValAssign::ZExt
: {
1217 Register NewReg
= MRI
.createGenericVirtualRegister(LocTy
);
1218 MIRBuilder
.buildZExt(NewReg
, ValReg
);
1222 llvm_unreachable("unable to extend register");
1225 void CallLowering::ValueAssigner::anchor() {}
1227 Register
CallLowering::IncomingValueHandler::buildExtensionHint(
1228 const CCValAssign
&VA
, Register SrcReg
, LLT NarrowTy
) {
1229 switch (VA
.getLocInfo()) {
1230 case CCValAssign::LocInfo::ZExt
: {
1232 .buildAssertZExt(MRI
.cloneVirtualRegister(SrcReg
), SrcReg
,
1233 NarrowTy
.getScalarSizeInBits())
1236 case CCValAssign::LocInfo::SExt
: {
1238 .buildAssertSExt(MRI
.cloneVirtualRegister(SrcReg
), SrcReg
,
1239 NarrowTy
.getScalarSizeInBits())
1248 /// Check if we can use a basic COPY instruction between the two types.
1250 /// We're currently building on top of the infrastructure using MVT, which loses
1251 /// pointer information in the CCValAssign. We accept copies from physical
1252 /// registers that have been reported as integers if it's to an equivalent sized
1254 static bool isCopyCompatibleType(LLT SrcTy
, LLT DstTy
) {
1258 if (SrcTy
.getSizeInBits() != DstTy
.getSizeInBits())
1261 SrcTy
= SrcTy
.getScalarType();
1262 DstTy
= DstTy
.getScalarType();
1264 return (SrcTy
.isPointer() && DstTy
.isScalar()) ||
1265 (DstTy
.isPointer() && SrcTy
.isScalar());
1268 void CallLowering::IncomingValueHandler::assignValueToReg(
1269 Register ValVReg
, Register PhysReg
, const CCValAssign
&VA
) {
1270 const MVT LocVT
= VA
.getLocVT();
1271 const LLT
LocTy(LocVT
);
1272 const LLT RegTy
= MRI
.getType(ValVReg
);
1274 if (isCopyCompatibleType(RegTy
, LocTy
)) {
1275 MIRBuilder
.buildCopy(ValVReg
, PhysReg
);
1279 auto Copy
= MIRBuilder
.buildCopy(LocTy
, PhysReg
);
1280 auto Hint
= buildExtensionHint(VA
, Copy
.getReg(0), RegTy
);
1281 MIRBuilder
.buildTrunc(ValVReg
, Hint
);