[AMDGPU] New gfx940 mfma instructions
[llvm-project.git] / llvm / lib / Target / AArch64 / AArch64CallingConvention.cpp
blobbfcafc6442d241f09ac1a3e02c0fbaf4990a8723
1 //=== AArch64CallingConvention.cpp - AArch64 CC impl ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the table-generated and custom routines for the AArch64
10 // Calling Convention.
12 //===----------------------------------------------------------------------===//
14 #include "AArch64CallingConvention.h"
15 #include "AArch64.h"
16 #include "AArch64InstrInfo.h"
17 #include "AArch64Subtarget.h"
18 #include "llvm/CodeGen/CallingConvLower.h"
19 #include "llvm/CodeGen/TargetInstrInfo.h"
20 #include "llvm/IR/CallingConv.h"
21 using namespace llvm;
23 static const MCPhysReg XRegList[] = {AArch64::X0, AArch64::X1, AArch64::X2,
24 AArch64::X3, AArch64::X4, AArch64::X5,
25 AArch64::X6, AArch64::X7};
26 static const MCPhysReg HRegList[] = {AArch64::H0, AArch64::H1, AArch64::H2,
27 AArch64::H3, AArch64::H4, AArch64::H5,
28 AArch64::H6, AArch64::H7};
29 static const MCPhysReg SRegList[] = {AArch64::S0, AArch64::S1, AArch64::S2,
30 AArch64::S3, AArch64::S4, AArch64::S5,
31 AArch64::S6, AArch64::S7};
32 static const MCPhysReg DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2,
33 AArch64::D3, AArch64::D4, AArch64::D5,
34 AArch64::D6, AArch64::D7};
35 static const MCPhysReg QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2,
36 AArch64::Q3, AArch64::Q4, AArch64::Q5,
37 AArch64::Q6, AArch64::Q7};
38 static const MCPhysReg ZRegList[] = {AArch64::Z0, AArch64::Z1, AArch64::Z2,
39 AArch64::Z3, AArch64::Z4, AArch64::Z5,
40 AArch64::Z6, AArch64::Z7};
42 static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers,
43 MVT LocVT, ISD::ArgFlagsTy &ArgFlags,
44 CCState &State, Align SlotAlign) {
45 if (LocVT.isScalableVector()) {
46 const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>(
47 State.getMachineFunction().getSubtarget());
48 const AArch64TargetLowering *TLI = Subtarget.getTargetLowering();
50 // We are about to reinvoke the CCAssignFn auto-generated handler. If we
51 // don't unset these flags we will get stuck in an infinite loop forever
52 // invoking the custom handler.
53 ArgFlags.setInConsecutiveRegs(false);
54 ArgFlags.setInConsecutiveRegsLast(false);
56 // The calling convention for passing SVE tuples states that in the event
57 // we cannot allocate enough registers for the tuple we should still leave
58 // any remaining registers unallocated. However, when we call the
59 // CCAssignFn again we want it to behave as if all remaining registers are
60 // allocated. This will force the code to pass the tuple indirectly in
61 // accordance with the PCS.
62 bool RegsAllocated[8];
63 for (int I = 0; I < 8; I++) {
64 RegsAllocated[I] = State.isAllocated(ZRegList[I]);
65 State.AllocateReg(ZRegList[I]);
68 auto &It = PendingMembers[0];
69 CCAssignFn *AssignFn =
70 TLI->CCAssignFnForCall(State.getCallingConv(), /*IsVarArg=*/false);
71 if (AssignFn(It.getValNo(), It.getValVT(), It.getValVT(), CCValAssign::Full,
72 ArgFlags, State))
73 llvm_unreachable("Call operand has unhandled type");
75 // Return the flags to how they were before.
76 ArgFlags.setInConsecutiveRegs(true);
77 ArgFlags.setInConsecutiveRegsLast(true);
79 // Return the register state back to how it was before, leaving any
80 // unallocated registers available for other smaller types.
81 for (int I = 0; I < 8; I++)
82 if (!RegsAllocated[I])
83 State.DeallocateReg(ZRegList[I]);
85 // All pending members have now been allocated
86 PendingMembers.clear();
87 return true;
90 unsigned Size = LocVT.getSizeInBits() / 8;
91 for (auto &It : PendingMembers) {
92 It.convertToMem(State.AllocateStack(Size, SlotAlign));
93 State.addLoc(It);
94 SlotAlign = Align(1);
97 // All pending members have now been allocated
98 PendingMembers.clear();
99 return true;
102 /// The Darwin variadic PCS places anonymous arguments in 8-byte stack slots. An
103 /// [N x Ty] type must still be contiguous in memory though.
104 static bool CC_AArch64_Custom_Stack_Block(
105 unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo,
106 ISD::ArgFlagsTy &ArgFlags, CCState &State) {
107 SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
109 // Add the argument to the list to be allocated once we know the size of the
110 // block.
111 PendingMembers.push_back(
112 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
114 if (!ArgFlags.isInConsecutiveRegsLast())
115 return true;
117 return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, Align(8));
120 /// Given an [N x Ty] block, it should be passed in a consecutive sequence of
121 /// registers. If no such sequence is available, mark the rest of the registers
122 /// of that type as used and place the argument on the stack.
123 static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
124 CCValAssign::LocInfo &LocInfo,
125 ISD::ArgFlagsTy &ArgFlags, CCState &State) {
126 const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>(
127 State.getMachineFunction().getSubtarget());
128 bool IsDarwinILP32 = Subtarget.isTargetILP32() && Subtarget.isTargetMachO();
130 // Try to allocate a contiguous block of registers, each of the correct
131 // size to hold one member.
132 ArrayRef<MCPhysReg> RegList;
133 if (LocVT.SimpleTy == MVT::i64 || (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32))
134 RegList = XRegList;
135 else if (LocVT.SimpleTy == MVT::f16)
136 RegList = HRegList;
137 else if (LocVT.SimpleTy == MVT::f32 || LocVT.is32BitVector())
138 RegList = SRegList;
139 else if (LocVT.SimpleTy == MVT::f64 || LocVT.is64BitVector())
140 RegList = DRegList;
141 else if (LocVT.SimpleTy == MVT::f128 || LocVT.is128BitVector())
142 RegList = QRegList;
143 else if (LocVT.isScalableVector())
144 RegList = ZRegList;
145 else {
146 // Not an array we want to split up after all.
147 return false;
150 SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
152 // Add the argument to the list to be allocated once we know the size of the
153 // block.
154 PendingMembers.push_back(
155 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
157 if (!ArgFlags.isInConsecutiveRegsLast())
158 return true;
160 // [N x i32] arguments get packed into x-registers on Darwin's arm64_32
161 // because that's how the armv7k Clang front-end emits small structs.
162 unsigned EltsPerReg = (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32) ? 2 : 1;
163 unsigned RegResult = State.AllocateRegBlock(
164 RegList, alignTo(PendingMembers.size(), EltsPerReg) / EltsPerReg);
165 if (RegResult && EltsPerReg == 1) {
166 for (auto &It : PendingMembers) {
167 It.convertToReg(RegResult);
168 State.addLoc(It);
169 ++RegResult;
171 PendingMembers.clear();
172 return true;
173 } else if (RegResult) {
174 assert(EltsPerReg == 2 && "unexpected ABI");
175 bool UseHigh = false;
176 CCValAssign::LocInfo Info;
177 for (auto &It : PendingMembers) {
178 Info = UseHigh ? CCValAssign::AExtUpper : CCValAssign::ZExt;
179 State.addLoc(CCValAssign::getReg(It.getValNo(), MVT::i32, RegResult,
180 MVT::i64, Info));
181 UseHigh = !UseHigh;
182 if (!UseHigh)
183 ++RegResult;
185 PendingMembers.clear();
186 return true;
189 if (!LocVT.isScalableVector()) {
190 // Mark all regs in the class as unavailable
191 for (auto Reg : RegList)
192 State.AllocateReg(Reg);
195 const Align StackAlign =
196 State.getMachineFunction().getDataLayout().getStackAlignment();
197 const Align MemAlign = ArgFlags.getNonZeroMemAlign();
198 Align SlotAlign = std::min(MemAlign, StackAlign);
199 if (!Subtarget.isTargetDarwin())
200 SlotAlign = std::max(SlotAlign, Align(8));
202 return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign);
205 // TableGen provides definitions of the calling convention analysis entry
206 // points.
207 #include "AArch64GenCallingConv.inc"