[ORC] Add std::tuple support to SimplePackedSerialization.
[llvm-project.git] / llvm / lib / Target / AArch64 / AArch64Subtarget.cpp
blobb22eb3b154f54d9a299092fc02a533a032e0996b
1 //===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the AArch64 specific subclass of TargetSubtarget.
11 //===----------------------------------------------------------------------===//
13 #include "AArch64Subtarget.h"
15 #include "AArch64.h"
16 #include "AArch64InstrInfo.h"
17 #include "AArch64PBQPRegAlloc.h"
18 #include "AArch64TargetMachine.h"
19 #include "GISel/AArch64CallLowering.h"
20 #include "GISel/AArch64LegalizerInfo.h"
21 #include "GISel/AArch64RegisterBankInfo.h"
22 #include "MCTargetDesc/AArch64AddressingModes.h"
23 #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
24 #include "llvm/CodeGen/MachineScheduler.h"
25 #include "llvm/IR/GlobalValue.h"
26 #include "llvm/Support/TargetParser.h"
28 using namespace llvm;
30 #define DEBUG_TYPE "aarch64-subtarget"
32 #define GET_SUBTARGETINFO_CTOR
33 #define GET_SUBTARGETINFO_TARGET_DESC
34 #include "AArch64GenSubtargetInfo.inc"
36 static cl::opt<bool>
37 EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
38 "converter pass"), cl::init(true), cl::Hidden);
40 // If OS supports TBI, use this flag to enable it.
41 static cl::opt<bool>
42 UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
43 "an address is ignored"), cl::init(false), cl::Hidden);
45 static cl::opt<bool>
46 UseNonLazyBind("aarch64-enable-nonlazybind",
47 cl::desc("Call nonlazybind functions via direct GOT load"),
48 cl::init(false), cl::Hidden);
50 static cl::opt<bool> UseAA("aarch64-use-aa", cl::init(true),
51 cl::desc("Enable the use of AA during codegen."));
53 AArch64Subtarget &
54 AArch64Subtarget::initializeSubtargetDependencies(StringRef FS,
55 StringRef CPUString) {
56 // Determine default and user-specified characteristics
58 if (CPUString.empty())
59 CPUString = "generic";
61 ParseSubtargetFeatures(CPUString, /*TuneCPU*/ CPUString, FS);
62 initializeProperties();
64 return *this;
67 void AArch64Subtarget::initializeProperties() {
68 // Initialize CPU specific properties. We should add a tablegen feature for
69 // this in the future so we can specify it together with the subtarget
70 // features.
71 switch (ARMProcFamily) {
72 case Others:
73 break;
74 case Carmel:
75 CacheLineSize = 64;
76 break;
77 case CortexA35:
78 break;
79 case CortexA53:
80 case CortexA55:
81 PrefFunctionLogAlignment = 4;
82 break;
83 case CortexA57:
84 MaxInterleaveFactor = 4;
85 PrefFunctionLogAlignment = 4;
86 break;
87 case CortexA65:
88 PrefFunctionLogAlignment = 3;
89 break;
90 case CortexA72:
91 case CortexA73:
92 case CortexA75:
93 case CortexA76:
94 case CortexA77:
95 case CortexA78:
96 case CortexA78C:
97 case CortexR82:
98 case CortexX1:
99 PrefFunctionLogAlignment = 4;
100 break;
101 case A64FX:
102 CacheLineSize = 256;
103 PrefFunctionLogAlignment = 3;
104 PrefLoopLogAlignment = 2;
105 MaxInterleaveFactor = 4;
106 PrefetchDistance = 128;
107 MinPrefetchStride = 1024;
108 MaxPrefetchIterationsAhead = 4;
109 break;
110 case AppleA7:
111 case AppleA10:
112 case AppleA11:
113 case AppleA12:
114 case AppleA13:
115 case AppleA14:
116 CacheLineSize = 64;
117 PrefetchDistance = 280;
118 MinPrefetchStride = 2048;
119 MaxPrefetchIterationsAhead = 3;
120 break;
121 case ExynosM3:
122 MaxInterleaveFactor = 4;
123 MaxJumpTableSize = 20;
124 PrefFunctionLogAlignment = 5;
125 PrefLoopLogAlignment = 4;
126 break;
127 case Falkor:
128 MaxInterleaveFactor = 4;
129 // FIXME: remove this to enable 64-bit SLP if performance looks good.
130 MinVectorRegisterBitWidth = 128;
131 CacheLineSize = 128;
132 PrefetchDistance = 820;
133 MinPrefetchStride = 2048;
134 MaxPrefetchIterationsAhead = 8;
135 break;
136 case Kryo:
137 MaxInterleaveFactor = 4;
138 VectorInsertExtractBaseCost = 2;
139 CacheLineSize = 128;
140 PrefetchDistance = 740;
141 MinPrefetchStride = 1024;
142 MaxPrefetchIterationsAhead = 11;
143 // FIXME: remove this to enable 64-bit SLP if performance looks good.
144 MinVectorRegisterBitWidth = 128;
145 break;
146 case NeoverseE1:
147 PrefFunctionLogAlignment = 3;
148 break;
149 case NeoverseN1:
150 case NeoverseN2:
151 case NeoverseV1:
152 PrefFunctionLogAlignment = 4;
153 break;
154 case Saphira:
155 MaxInterleaveFactor = 4;
156 // FIXME: remove this to enable 64-bit SLP if performance looks good.
157 MinVectorRegisterBitWidth = 128;
158 break;
159 case ThunderX2T99:
160 CacheLineSize = 64;
161 PrefFunctionLogAlignment = 3;
162 PrefLoopLogAlignment = 2;
163 MaxInterleaveFactor = 4;
164 PrefetchDistance = 128;
165 MinPrefetchStride = 1024;
166 MaxPrefetchIterationsAhead = 4;
167 // FIXME: remove this to enable 64-bit SLP if performance looks good.
168 MinVectorRegisterBitWidth = 128;
169 break;
170 case ThunderX:
171 case ThunderXT88:
172 case ThunderXT81:
173 case ThunderXT83:
174 CacheLineSize = 128;
175 PrefFunctionLogAlignment = 3;
176 PrefLoopLogAlignment = 2;
177 // FIXME: remove this to enable 64-bit SLP if performance looks good.
178 MinVectorRegisterBitWidth = 128;
179 break;
180 case TSV110:
181 CacheLineSize = 64;
182 PrefFunctionLogAlignment = 4;
183 PrefLoopLogAlignment = 2;
184 break;
185 case ThunderX3T110:
186 CacheLineSize = 64;
187 PrefFunctionLogAlignment = 4;
188 PrefLoopLogAlignment = 2;
189 MaxInterleaveFactor = 4;
190 PrefetchDistance = 128;
191 MinPrefetchStride = 1024;
192 MaxPrefetchIterationsAhead = 4;
193 // FIXME: remove this to enable 64-bit SLP if performance looks good.
194 MinVectorRegisterBitWidth = 128;
195 break;
199 AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
200 const std::string &FS,
201 const TargetMachine &TM, bool LittleEndian,
202 unsigned MinSVEVectorSizeInBitsOverride,
203 unsigned MaxSVEVectorSizeInBitsOverride)
204 : AArch64GenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
205 ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
206 CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
207 IsLittle(LittleEndian),
208 MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
209 MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
210 FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS, CPU)),
211 TSInfo(), TLInfo(TM, *this) {
212 if (AArch64::isX18ReservedByDefault(TT))
213 ReserveXRegister.set(18);
215 CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering()));
216 InlineAsmLoweringInfo.reset(new InlineAsmLowering(getTargetLowering()));
217 Legalizer.reset(new AArch64LegalizerInfo(*this));
219 auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo());
221 // FIXME: At this point, we can't rely on Subtarget having RBI.
222 // It's awkward to mix passing RBI and the Subtarget; should we pass
223 // TII/TRI as well?
224 InstSelector.reset(createAArch64InstructionSelector(
225 *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI));
227 RegBankInfo.reset(RBI);
230 const CallLowering *AArch64Subtarget::getCallLowering() const {
231 return CallLoweringInfo.get();
234 const InlineAsmLowering *AArch64Subtarget::getInlineAsmLowering() const {
235 return InlineAsmLoweringInfo.get();
238 InstructionSelector *AArch64Subtarget::getInstructionSelector() const {
239 return InstSelector.get();
242 const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const {
243 return Legalizer.get();
246 const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const {
247 return RegBankInfo.get();
250 /// Find the target operand flags that describe how a global value should be
251 /// referenced for the current subtarget.
252 unsigned
253 AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
254 const TargetMachine &TM) const {
255 // MachO large model always goes via a GOT, simply to get a single 8-byte
256 // absolute relocation on all global addresses.
257 if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
258 return AArch64II::MO_GOT;
260 if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) {
261 if (GV->hasDLLImportStorageClass())
262 return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT;
263 if (getTargetTriple().isOSWindows())
264 return AArch64II::MO_GOT | AArch64II::MO_COFFSTUB;
265 return AArch64II::MO_GOT;
268 // The small code model's direct accesses use ADRP, which cannot
269 // necessarily produce the value 0 (if the code is above 4GB).
270 // Same for the tiny code model, where we have a pc relative LDR.
271 if ((useSmallAddressing() || TM.getCodeModel() == CodeModel::Tiny) &&
272 GV->hasExternalWeakLinkage())
273 return AArch64II::MO_GOT;
275 // References to tagged globals are marked with MO_NC | MO_TAGGED to indicate
276 // that their nominal addresses are tagged and outside of the code model. In
277 // AArch64ExpandPseudo::expandMI we emit an additional instruction to set the
278 // tag if necessary based on MO_TAGGED.
279 if (AllowTaggedGlobals && !isa<FunctionType>(GV->getValueType()))
280 return AArch64II::MO_NC | AArch64II::MO_TAGGED;
282 return AArch64II::MO_NO_FLAG;
285 unsigned AArch64Subtarget::classifyGlobalFunctionReference(
286 const GlobalValue *GV, const TargetMachine &TM) const {
287 // MachO large model always goes via a GOT, because we don't have the
288 // relocations available to do anything else..
289 if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() &&
290 !GV->hasInternalLinkage())
291 return AArch64II::MO_GOT;
293 // NonLazyBind goes via GOT unless we know it's available locally.
294 auto *F = dyn_cast<Function>(GV);
295 if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) &&
296 !TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
297 return AArch64II::MO_GOT;
299 // Use ClassifyGlobalReference for setting MO_DLLIMPORT/MO_COFFSTUB.
300 if (getTargetTriple().isOSWindows())
301 return ClassifyGlobalReference(GV, TM);
303 return AArch64II::MO_NO_FLAG;
306 void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
307 unsigned NumRegionInstrs) const {
308 // LNT run (at least on Cyclone) showed reasonably significant gains for
309 // bi-directional scheduling. 253.perlbmk.
310 Policy.OnlyTopDown = false;
311 Policy.OnlyBottomUp = false;
312 // Enabling or Disabling the latency heuristic is a close call: It seems to
313 // help nearly no benchmark on out-of-order architectures, on the other hand
314 // it regresses register pressure on a few benchmarking.
315 Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;
318 bool AArch64Subtarget::enableEarlyIfConversion() const {
319 return EnableEarlyIfConvert;
322 bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
323 if (!UseAddressTopByteIgnored)
324 return false;
326 if (TargetTriple.isiOS()) {
327 unsigned Major, Minor, Micro;
328 TargetTriple.getiOSVersion(Major, Minor, Micro);
329 return Major >= 8;
332 return false;
335 std::unique_ptr<PBQPRAConstraint>
336 AArch64Subtarget::getCustomPBQPConstraints() const {
337 return balanceFPOps() ? std::make_unique<A57ChainingConstraint>() : nullptr;
340 void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
341 // We usually compute max call frame size after ISel. Do the computation now
342 // if the .mir file didn't specify it. Note that this will probably give you
343 // bogus values after PEI has eliminated the callframe setup/destroy pseudo
344 // instructions, specify explicitly if you need it to be correct.
345 MachineFrameInfo &MFI = MF.getFrameInfo();
346 if (!MFI.isMaxCallFrameSizeComputed())
347 MFI.computeMaxCallFrameSize(MF);
350 bool AArch64Subtarget::useSVEForFixedLengthVectors() const {
351 // Prefer NEON unless larger SVE registers are available.
352 return hasSVE() && getMinSVEVectorSizeInBits() >= 256;
355 bool AArch64Subtarget::useAA() const { return UseAA; }