1 //===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the AArch64 specific subclass of TargetSubtarget.
11 //===----------------------------------------------------------------------===//
13 #include "AArch64Subtarget.h"
16 #include "AArch64InstrInfo.h"
17 #include "AArch64PBQPRegAlloc.h"
18 #include "AArch64TargetMachine.h"
19 #include "GISel/AArch64CallLowering.h"
20 #include "GISel/AArch64LegalizerInfo.h"
21 #include "GISel/AArch64RegisterBankInfo.h"
22 #include "MCTargetDesc/AArch64AddressingModes.h"
23 #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineScheduler.h"
26 #include "llvm/IR/GlobalValue.h"
27 #include "llvm/Support/AArch64TargetParser.h"
28 #include "llvm/Support/TargetParser.h"
32 #define DEBUG_TYPE "aarch64-subtarget"
34 #define GET_SUBTARGETINFO_CTOR
35 #define GET_SUBTARGETINFO_TARGET_DESC
36 #include "AArch64GenSubtargetInfo.inc"
39 EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
40 "converter pass"), cl::init(true), cl::Hidden
);
42 // If OS supports TBI, use this flag to enable it.
44 UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
45 "an address is ignored"), cl::init(false), cl::Hidden
);
48 UseNonLazyBind("aarch64-enable-nonlazybind",
49 cl::desc("Call nonlazybind functions via direct GOT load"),
50 cl::init(false), cl::Hidden
);
52 static cl::opt
<bool> UseAA("aarch64-use-aa", cl::init(true),
53 cl::desc("Enable the use of AA during codegen."));
55 AArch64Subtarget
&AArch64Subtarget::initializeSubtargetDependencies(
56 StringRef FS
, StringRef CPUString
, StringRef TuneCPUString
) {
57 // Determine default and user-specified characteristics
59 if (CPUString
.empty())
60 CPUString
= "generic";
62 if (TuneCPUString
.empty())
63 TuneCPUString
= CPUString
;
65 ParseSubtargetFeatures(CPUString
, TuneCPUString
, FS
);
66 initializeProperties();
71 void AArch64Subtarget::initializeProperties() {
72 // Initialize CPU specific properties. We should add a tablegen feature for
73 // this in the future so we can specify it together with the subtarget
75 switch (ARMProcFamily
) {
85 PrefFunctionLogAlignment
= 4;
88 MaxInterleaveFactor
= 4;
89 PrefFunctionLogAlignment
= 4;
92 PrefFunctionLogAlignment
= 3;
104 PrefFunctionLogAlignment
= 4;
109 PrefFunctionLogAlignment
= 4;
114 PrefFunctionLogAlignment
= 3;
115 PrefLoopLogAlignment
= 2;
116 MaxInterleaveFactor
= 4;
117 PrefetchDistance
= 128;
118 MinPrefetchStride
= 1024;
119 MaxPrefetchIterationsAhead
= 4;
129 PrefetchDistance
= 280;
130 MinPrefetchStride
= 2048;
131 MaxPrefetchIterationsAhead
= 3;
134 MaxInterleaveFactor
= 4;
135 MaxJumpTableSize
= 20;
136 PrefFunctionLogAlignment
= 5;
137 PrefLoopLogAlignment
= 4;
140 MaxInterleaveFactor
= 4;
141 // FIXME: remove this to enable 64-bit SLP if performance looks good.
142 MinVectorRegisterBitWidth
= 128;
144 PrefetchDistance
= 820;
145 MinPrefetchStride
= 2048;
146 MaxPrefetchIterationsAhead
= 8;
149 MaxInterleaveFactor
= 4;
150 VectorInsertExtractBaseCost
= 2;
152 PrefetchDistance
= 740;
153 MinPrefetchStride
= 1024;
154 MaxPrefetchIterationsAhead
= 11;
155 // FIXME: remove this to enable 64-bit SLP if performance looks good.
156 MinVectorRegisterBitWidth
= 128;
159 PrefFunctionLogAlignment
= 3;
162 PrefFunctionLogAlignment
= 4;
163 PrefLoopLogAlignment
= 5;
164 MaxBytesForLoopAlignment
= 16;
167 PrefFunctionLogAlignment
= 4;
168 PrefLoopLogAlignment
= 5;
169 MaxBytesForLoopAlignment
= 16;
173 PrefFunctionLogAlignment
= 4;
174 PrefLoopLogAlignment
= 5;
175 MaxBytesForLoopAlignment
= 16;
179 PrefFunctionLogAlignment
= 4;
181 MaxInterleaveFactor
= 4;
184 MaxInterleaveFactor
= 4;
185 // FIXME: remove this to enable 64-bit SLP if performance looks good.
186 MinVectorRegisterBitWidth
= 128;
190 PrefFunctionLogAlignment
= 3;
191 PrefLoopLogAlignment
= 2;
192 MaxInterleaveFactor
= 4;
193 PrefetchDistance
= 128;
194 MinPrefetchStride
= 1024;
195 MaxPrefetchIterationsAhead
= 4;
196 // FIXME: remove this to enable 64-bit SLP if performance looks good.
197 MinVectorRegisterBitWidth
= 128;
204 PrefFunctionLogAlignment
= 3;
205 PrefLoopLogAlignment
= 2;
206 // FIXME: remove this to enable 64-bit SLP if performance looks good.
207 MinVectorRegisterBitWidth
= 128;
211 PrefFunctionLogAlignment
= 4;
212 PrefLoopLogAlignment
= 2;
216 PrefFunctionLogAlignment
= 4;
217 PrefLoopLogAlignment
= 2;
218 MaxInterleaveFactor
= 4;
219 PrefetchDistance
= 128;
220 MinPrefetchStride
= 1024;
221 MaxPrefetchIterationsAhead
= 4;
222 // FIXME: remove this to enable 64-bit SLP if performance looks good.
223 MinVectorRegisterBitWidth
= 128;
228 AArch64Subtarget::AArch64Subtarget(const Triple
&TT
, const std::string
&CPU
,
229 const std::string
&TuneCPU
,
230 const std::string
&FS
,
231 const TargetMachine
&TM
, bool LittleEndian
,
232 unsigned MinSVEVectorSizeInBitsOverride
,
233 unsigned MaxSVEVectorSizeInBitsOverride
)
234 : AArch64GenSubtargetInfo(TT
, CPU
, TuneCPU
, FS
),
235 ReserveXRegister(AArch64::GPR64commonRegClass
.getNumRegs()),
236 CustomCallSavedXRegs(AArch64::GPR64commonRegClass
.getNumRegs()),
237 IsLittle(LittleEndian
),
238 MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride
),
239 MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride
), TargetTriple(TT
),
240 InstrInfo(initializeSubtargetDependencies(FS
, CPU
, TuneCPU
)),
242 if (AArch64::isX18ReservedByDefault(TT
))
243 ReserveXRegister
.set(18);
245 CallLoweringInfo
.reset(new AArch64CallLowering(*getTargetLowering()));
246 InlineAsmLoweringInfo
.reset(new InlineAsmLowering(getTargetLowering()));
247 Legalizer
.reset(new AArch64LegalizerInfo(*this));
249 auto *RBI
= new AArch64RegisterBankInfo(*getRegisterInfo());
251 // FIXME: At this point, we can't rely on Subtarget having RBI.
252 // It's awkward to mix passing RBI and the Subtarget; should we pass
254 InstSelector
.reset(createAArch64InstructionSelector(
255 *static_cast<const AArch64TargetMachine
*>(&TM
), *this, *RBI
));
257 RegBankInfo
.reset(RBI
);
260 const CallLowering
*AArch64Subtarget::getCallLowering() const {
261 return CallLoweringInfo
.get();
264 const InlineAsmLowering
*AArch64Subtarget::getInlineAsmLowering() const {
265 return InlineAsmLoweringInfo
.get();
268 InstructionSelector
*AArch64Subtarget::getInstructionSelector() const {
269 return InstSelector
.get();
272 const LegalizerInfo
*AArch64Subtarget::getLegalizerInfo() const {
273 return Legalizer
.get();
276 const RegisterBankInfo
*AArch64Subtarget::getRegBankInfo() const {
277 return RegBankInfo
.get();
280 /// Find the target operand flags that describe how a global value should be
281 /// referenced for the current subtarget.
283 AArch64Subtarget::ClassifyGlobalReference(const GlobalValue
*GV
,
284 const TargetMachine
&TM
) const {
285 // MachO large model always goes via a GOT, simply to get a single 8-byte
286 // absolute relocation on all global addresses.
287 if (TM
.getCodeModel() == CodeModel::Large
&& isTargetMachO())
288 return AArch64II::MO_GOT
;
290 if (!TM
.shouldAssumeDSOLocal(*GV
->getParent(), GV
)) {
291 if (GV
->hasDLLImportStorageClass())
292 return AArch64II::MO_GOT
| AArch64II::MO_DLLIMPORT
;
293 if (getTargetTriple().isOSWindows())
294 return AArch64II::MO_GOT
| AArch64II::MO_COFFSTUB
;
295 return AArch64II::MO_GOT
;
298 // The small code model's direct accesses use ADRP, which cannot
299 // necessarily produce the value 0 (if the code is above 4GB).
300 // Same for the tiny code model, where we have a pc relative LDR.
301 if ((useSmallAddressing() || TM
.getCodeModel() == CodeModel::Tiny
) &&
302 GV
->hasExternalWeakLinkage())
303 return AArch64II::MO_GOT
;
305 // References to tagged globals are marked with MO_NC | MO_TAGGED to indicate
306 // that their nominal addresses are tagged and outside of the code model. In
307 // AArch64ExpandPseudo::expandMI we emit an additional instruction to set the
308 // tag if necessary based on MO_TAGGED.
309 if (AllowTaggedGlobals
&& !isa
<FunctionType
>(GV
->getValueType()))
310 return AArch64II::MO_NC
| AArch64II::MO_TAGGED
;
312 return AArch64II::MO_NO_FLAG
;
315 unsigned AArch64Subtarget::classifyGlobalFunctionReference(
316 const GlobalValue
*GV
, const TargetMachine
&TM
) const {
317 // MachO large model always goes via a GOT, because we don't have the
318 // relocations available to do anything else..
319 if (TM
.getCodeModel() == CodeModel::Large
&& isTargetMachO() &&
320 !GV
->hasInternalLinkage())
321 return AArch64II::MO_GOT
;
323 // NonLazyBind goes via GOT unless we know it's available locally.
324 auto *F
= dyn_cast
<Function
>(GV
);
325 if (UseNonLazyBind
&& F
&& F
->hasFnAttribute(Attribute::NonLazyBind
) &&
326 !TM
.shouldAssumeDSOLocal(*GV
->getParent(), GV
))
327 return AArch64II::MO_GOT
;
329 // Use ClassifyGlobalReference for setting MO_DLLIMPORT/MO_COFFSTUB.
330 if (getTargetTriple().isOSWindows())
331 return ClassifyGlobalReference(GV
, TM
);
333 return AArch64II::MO_NO_FLAG
;
336 void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy
&Policy
,
337 unsigned NumRegionInstrs
) const {
338 // LNT run (at least on Cyclone) showed reasonably significant gains for
339 // bi-directional scheduling. 253.perlbmk.
340 Policy
.OnlyTopDown
= false;
341 Policy
.OnlyBottomUp
= false;
342 // Enabling or Disabling the latency heuristic is a close call: It seems to
343 // help nearly no benchmark on out-of-order architectures, on the other hand
344 // it regresses register pressure on a few benchmarking.
345 Policy
.DisableLatencyHeuristic
= DisableLatencySchedHeuristic
;
348 bool AArch64Subtarget::enableEarlyIfConversion() const {
349 return EnableEarlyIfConvert
;
352 bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
353 if (!UseAddressTopByteIgnored
)
356 if (TargetTriple
.isDriverKit())
358 if (TargetTriple
.isiOS()) {
359 return TargetTriple
.getiOSVersion() >= VersionTuple(8);
365 std::unique_ptr
<PBQPRAConstraint
>
366 AArch64Subtarget::getCustomPBQPConstraints() const {
367 return balanceFPOps() ? std::make_unique
<A57ChainingConstraint
>() : nullptr;
370 void AArch64Subtarget::mirFileLoaded(MachineFunction
&MF
) const {
371 // We usually compute max call frame size after ISel. Do the computation now
372 // if the .mir file didn't specify it. Note that this will probably give you
373 // bogus values after PEI has eliminated the callframe setup/destroy pseudo
374 // instructions, specify explicitly if you need it to be correct.
375 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
376 if (!MFI
.isMaxCallFrameSizeComputed())
377 MFI
.computeMaxCallFrameSize(MF
);
380 bool AArch64Subtarget::useAA() const { return UseAA
; }