1 //=== X86CallingConv.cpp - X86 Custom Calling Convention Impl -*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains the implementation of custom routines for the X86
10 // Calling Convention that aren't done by tablegen.
12 //===----------------------------------------------------------------------===//
14 #include "X86CallingConv.h"
15 #include "X86Subtarget.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/CodeGen/CallingConvLower.h"
18 #include "llvm/IR/CallingConv.h"
22 /// When regcall calling convention compiled to 32 bit arch, special treatment
23 /// is required for 64 bit masks.
24 /// The value should be assigned to two GPRs.
25 /// \return true if registers were allocated and false otherwise.
26 static bool CC_X86_32_RegCall_Assign2Regs(unsigned &ValNo
, MVT
&ValVT
,
28 CCValAssign::LocInfo
&LocInfo
,
29 ISD::ArgFlagsTy
&ArgFlags
,
31 // List of GPR registers that are available to store values in regcall
32 // calling convention.
33 static const MCPhysReg RegList
[] = {X86::EAX
, X86::ECX
, X86::EDX
, X86::EDI
,
36 // The vector will save all the available registers for allocation.
37 SmallVector
<unsigned, 5> AvailableRegs
;
39 // searching for the available registers.
40 for (auto Reg
: RegList
) {
41 if (!State
.isAllocated(Reg
))
42 AvailableRegs
.push_back(Reg
);
45 const size_t RequiredGprsUponSplit
= 2;
46 if (AvailableRegs
.size() < RequiredGprsUponSplit
)
47 return false; // Not enough free registers - continue the search.
49 // Allocating the available registers.
50 for (unsigned I
= 0; I
< RequiredGprsUponSplit
; I
++) {
52 // Marking the register as located.
53 unsigned Reg
= State
.AllocateReg(AvailableRegs
[I
]);
55 // Since we previously made sure that 2 registers are available
56 // we expect that a real register number will be returned.
57 assert(Reg
&& "Expecting a register will be available");
59 // Assign the value to the allocated register
60 State
.addLoc(CCValAssign::getCustomReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
63 // Successful in allocating regsiters - stop scanning next rules.
67 static ArrayRef
<MCPhysReg
> CC_X86_VectorCallGetSSEs(const MVT
&ValVT
) {
68 if (ValVT
.is512BitVector()) {
69 static const MCPhysReg RegListZMM
[] = {X86::ZMM0
, X86::ZMM1
, X86::ZMM2
,
70 X86::ZMM3
, X86::ZMM4
, X86::ZMM5
};
71 return makeArrayRef(std::begin(RegListZMM
), std::end(RegListZMM
));
74 if (ValVT
.is256BitVector()) {
75 static const MCPhysReg RegListYMM
[] = {X86::YMM0
, X86::YMM1
, X86::YMM2
,
76 X86::YMM3
, X86::YMM4
, X86::YMM5
};
77 return makeArrayRef(std::begin(RegListYMM
), std::end(RegListYMM
));
80 static const MCPhysReg RegListXMM
[] = {X86::XMM0
, X86::XMM1
, X86::XMM2
,
81 X86::XMM3
, X86::XMM4
, X86::XMM5
};
82 return makeArrayRef(std::begin(RegListXMM
), std::end(RegListXMM
));
85 static ArrayRef
<MCPhysReg
> CC_X86_64_VectorCallGetGPRs() {
86 static const MCPhysReg RegListGPR
[] = {X86::RCX
, X86::RDX
, X86::R8
, X86::R9
};
87 return makeArrayRef(std::begin(RegListGPR
), std::end(RegListGPR
));
90 static bool CC_X86_VectorCallAssignRegister(unsigned &ValNo
, MVT
&ValVT
,
92 CCValAssign::LocInfo
&LocInfo
,
93 ISD::ArgFlagsTy
&ArgFlags
,
96 ArrayRef
<MCPhysReg
> RegList
= CC_X86_VectorCallGetSSEs(ValVT
);
97 bool Is64bit
= static_cast<const X86Subtarget
&>(
98 State
.getMachineFunction().getSubtarget())
101 for (auto Reg
: RegList
) {
102 // If the register is not marked as allocated - assign to it.
103 if (!State
.isAllocated(Reg
)) {
104 unsigned AssigedReg
= State
.AllocateReg(Reg
);
105 assert(AssigedReg
== Reg
&& "Expecting a valid register allocation");
107 CCValAssign::getReg(ValNo
, ValVT
, AssigedReg
, LocVT
, LocInfo
));
110 // If the register is marked as shadow allocated - assign to it.
111 if (Is64bit
&& State
.IsShadowAllocatedReg(Reg
)) {
112 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
117 llvm_unreachable("Clang should ensure that hva marked vectors will have "
118 "an available register.");
122 /// Vectorcall calling convention has special handling for vector types or
123 /// HVA for 64 bit arch.
124 /// For HVAs shadow registers might be allocated on the first pass
125 /// and actual XMM registers are allocated on the second pass.
126 /// For vector types, actual XMM registers are allocated on the first pass.
127 /// \return true if registers were allocated and false otherwise.
128 static bool CC_X86_64_VectorCall(unsigned &ValNo
, MVT
&ValVT
, MVT
&LocVT
,
129 CCValAssign::LocInfo
&LocInfo
,
130 ISD::ArgFlagsTy
&ArgFlags
, CCState
&State
) {
131 // On the second pass, go through the HVAs only.
132 if (ArgFlags
.isSecArgPass()) {
133 if (ArgFlags
.isHva())
134 return CC_X86_VectorCallAssignRegister(ValNo
, ValVT
, LocVT
, LocInfo
,
139 // Process only vector types as defined by vectorcall spec:
140 // "A vector type is either a floating-point type, for example,
141 // a float or double, or an SIMD vector type, for example, __m128 or __m256".
142 if (!(ValVT
.isFloatingPoint() ||
143 (ValVT
.isVector() && ValVT
.getSizeInBits() >= 128))) {
144 // If R9 was already assigned it means that we are after the fourth element
145 // and because this is not an HVA / Vector type, we need to allocate
146 // shadow XMM register.
147 if (State
.isAllocated(X86::R9
)) {
148 // Assign shadow XMM register.
149 (void)State
.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT
));
155 if (!ArgFlags
.isHva() || ArgFlags
.isHvaStart()) {
156 // Assign shadow GPR register.
157 (void)State
.AllocateReg(CC_X86_64_VectorCallGetGPRs());
159 // Assign XMM register - (shadow for HVA and non-shadow for non HVA).
160 if (unsigned Reg
= State
.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT
))) {
161 // In Vectorcall Calling convention, additional shadow stack can be
162 // created on top of the basic 32 bytes of win64.
163 // It can happen if the fifth or sixth argument is vector type or HVA.
164 // At that case for each argument a shadow stack of 8 bytes is allocated.
165 const TargetRegisterInfo
*TRI
=
166 State
.getMachineFunction().getSubtarget().getRegisterInfo();
167 if (TRI
->regsOverlap(Reg
, X86::XMM4
) ||
168 TRI
->regsOverlap(Reg
, X86::XMM5
))
169 State
.AllocateStack(8, 8);
171 if (!ArgFlags
.isHva()) {
172 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
173 return true; // Allocated a register - Stop the search.
178 // If this is an HVA - Stop the search,
179 // otherwise continue the search.
180 return ArgFlags
.isHva();
183 /// Vectorcall calling convention has special handling for vector types or
184 /// HVA for 32 bit arch.
185 /// For HVAs actual XMM registers are allocated on the second pass.
186 /// For vector types, actual XMM registers are allocated on the first pass.
187 /// \return true if registers were allocated and false otherwise.
188 static bool CC_X86_32_VectorCall(unsigned &ValNo
, MVT
&ValVT
, MVT
&LocVT
,
189 CCValAssign::LocInfo
&LocInfo
,
190 ISD::ArgFlagsTy
&ArgFlags
, CCState
&State
) {
191 // On the second pass, go through the HVAs only.
192 if (ArgFlags
.isSecArgPass()) {
193 if (ArgFlags
.isHva())
194 return CC_X86_VectorCallAssignRegister(ValNo
, ValVT
, LocVT
, LocInfo
,
199 // Process only vector types as defined by vectorcall spec:
200 // "A vector type is either a floating point type, for example,
201 // a float or double, or an SIMD vector type, for example, __m128 or __m256".
202 if (!(ValVT
.isFloatingPoint() ||
203 (ValVT
.isVector() && ValVT
.getSizeInBits() >= 128))) {
207 if (ArgFlags
.isHva())
208 return true; // If this is an HVA - Stop the search.
210 // Assign XMM register.
211 if (unsigned Reg
= State
.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT
))) {
212 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
216 // In case we did not find an available XMM register for a vector -
217 // pass it indirectly.
218 // It is similar to CCPassIndirect, with the addition of inreg.
219 if (!ValVT
.isFloatingPoint()) {
221 LocInfo
= CCValAssign::Indirect
;
225 return false; // No register was assigned - Continue the search.
228 static bool CC_X86_AnyReg_Error(unsigned &, MVT
&, MVT
&,
229 CCValAssign::LocInfo
&, ISD::ArgFlagsTy
&,
231 llvm_unreachable("The AnyReg calling convention is only supported by the "
232 "stackmap and patchpoint intrinsics.");
233 // gracefully fallback to X86 C calling convention on Release builds.
237 static bool CC_X86_32_MCUInReg(unsigned &ValNo
, MVT
&ValVT
, MVT
&LocVT
,
238 CCValAssign::LocInfo
&LocInfo
,
239 ISD::ArgFlagsTy
&ArgFlags
, CCState
&State
) {
240 // This is similar to CCAssignToReg<[EAX, EDX, ECX]>, but makes sure
241 // not to split i64 and double between a register and stack
242 static const MCPhysReg RegList
[] = {X86::EAX
, X86::EDX
, X86::ECX
};
243 static const unsigned NumRegs
= sizeof(RegList
) / sizeof(RegList
[0]);
245 SmallVectorImpl
<CCValAssign
> &PendingMembers
= State
.getPendingLocs();
247 // If this is the first part of an double/i64/i128, or if we're already
248 // in the middle of a split, add to the pending list. If this is not
249 // the end of the split, return, otherwise go on to process the pending
251 if (ArgFlags
.isSplit() || !PendingMembers
.empty()) {
252 PendingMembers
.push_back(
253 CCValAssign::getPending(ValNo
, ValVT
, LocVT
, LocInfo
));
254 if (!ArgFlags
.isSplitEnd())
258 // If there are no pending members, we are not in the middle of a split,
259 // so do the usual inreg stuff.
260 if (PendingMembers
.empty()) {
261 if (unsigned Reg
= State
.AllocateReg(RegList
)) {
262 State
.addLoc(CCValAssign::getReg(ValNo
, ValVT
, Reg
, LocVT
, LocInfo
));
268 assert(ArgFlags
.isSplitEnd());
270 // We now have the entire original argument in PendingMembers, so decide
271 // whether to use registers or the stack.
273 // a) To use registers, we need to have enough of them free to contain
274 // the entire argument.
275 // b) We never want to use more than 2 registers for a single argument.
277 unsigned FirstFree
= State
.getFirstUnallocated(RegList
);
278 bool UseRegs
= PendingMembers
.size() <= std::min(2U, NumRegs
- FirstFree
);
280 for (auto &It
: PendingMembers
) {
282 It
.convertToReg(State
.AllocateReg(RegList
[FirstFree
++]));
284 It
.convertToMem(State
.AllocateStack(4, 4));
288 PendingMembers
.clear();
293 /// X86 interrupt handlers can only take one or two stack arguments, but if
294 /// there are two arguments, they are in the opposite order from the standard
295 /// convention. Therefore, we have to look at the argument count up front before
296 /// allocating stack for each argument.
297 static bool CC_X86_Intr(unsigned &ValNo
, MVT
&ValVT
, MVT
&LocVT
,
298 CCValAssign::LocInfo
&LocInfo
,
299 ISD::ArgFlagsTy
&ArgFlags
, CCState
&State
) {
300 const MachineFunction
&MF
= State
.getMachineFunction();
301 size_t ArgCount
= State
.getMachineFunction().getFunction().arg_size();
302 bool Is64Bit
= static_cast<const X86Subtarget
&>(MF
.getSubtarget()).is64Bit();
303 unsigned SlotSize
= Is64Bit
? 8 : 4;
305 if (ArgCount
== 1 && ValNo
== 0) {
306 // If we have one argument, the argument is five stack slots big, at fixed
308 Offset
= State
.AllocateStack(5 * SlotSize
, 4);
309 } else if (ArgCount
== 2 && ValNo
== 0) {
310 // If we have two arguments, the stack slot is *after* the error code
311 // argument. Pretend it doesn't consume stack space, and account for it when
312 // we assign the second argument.
314 } else if (ArgCount
== 2 && ValNo
== 1) {
315 // If this is the second of two arguments, it must be the error code. It
316 // appears first on the stack, and is then followed by the five slot
319 (void)State
.AllocateStack(6 * SlotSize
, 4);
321 report_fatal_error("unsupported x86 interrupt prototype");
324 // FIXME: This should be accounted for in
325 // X86FrameLowering::getFrameIndexReference, not here.
326 if (Is64Bit
&& ArgCount
== 2)
329 State
.addLoc(CCValAssign::getMem(ValNo
, ValVT
, Offset
, LocVT
, LocInfo
));
333 // Provides entry points of CC_X86 and RetCC_X86.
334 #include "X86GenCallingConv.inc"