1 //===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the AArch64 specific subclass of TargetSubtarget.
11 //===----------------------------------------------------------------------===//
13 #include "AArch64Subtarget.h"
16 #include "AArch64InstrInfo.h"
17 #include "AArch64PBQPRegAlloc.h"
18 #include "AArch64TargetMachine.h"
19 #include "GISel/AArch64CallLowering.h"
20 #include "GISel/AArch64LegalizerInfo.h"
21 #include "GISel/AArch64RegisterBankInfo.h"
22 #include "MCTargetDesc/AArch64AddressingModes.h"
23 #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
24 #include "llvm/CodeGen/MachineScheduler.h"
25 #include "llvm/IR/GlobalValue.h"
26 #include "llvm/Support/TargetParser.h"
30 #define DEBUG_TYPE "aarch64-subtarget"
32 #define GET_SUBTARGETINFO_CTOR
33 #define GET_SUBTARGETINFO_TARGET_DESC
34 #include "AArch64GenSubtargetInfo.inc"
37 EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
38 "converter pass"), cl::init(true), cl::Hidden
);
40 // If OS supports TBI, use this flag to enable it.
42 UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
43 "an address is ignored"), cl::init(false), cl::Hidden
);
46 UseNonLazyBind("aarch64-enable-nonlazybind",
47 cl::desc("Call nonlazybind functions via direct GOT load"),
48 cl::init(false), cl::Hidden
);
50 static cl::opt
<bool> UseAA("aarch64-use-aa", cl::init(true),
51 cl::desc("Enable the use of AA during codegen."));
54 AArch64Subtarget::initializeSubtargetDependencies(StringRef FS
,
55 StringRef CPUString
) {
56 // Determine default and user-specified characteristics
58 if (CPUString
.empty())
59 CPUString
= "generic";
61 ParseSubtargetFeatures(CPUString
, /*TuneCPU*/ CPUString
, FS
);
62 initializeProperties();
67 void AArch64Subtarget::initializeProperties() {
68 // Initialize CPU specific properties. We should add a tablegen feature for
69 // this in the future so we can specify it together with the subtarget
71 switch (ARMProcFamily
) {
81 PrefFunctionLogAlignment
= 4;
84 MaxInterleaveFactor
= 4;
85 PrefFunctionLogAlignment
= 4;
88 PrefFunctionLogAlignment
= 3;
99 PrefFunctionLogAlignment
= 4;
103 PrefFunctionLogAlignment
= 3;
104 PrefLoopLogAlignment
= 2;
105 MaxInterleaveFactor
= 4;
106 PrefetchDistance
= 128;
107 MinPrefetchStride
= 1024;
108 MaxPrefetchIterationsAhead
= 4;
117 PrefetchDistance
= 280;
118 MinPrefetchStride
= 2048;
119 MaxPrefetchIterationsAhead
= 3;
122 MaxInterleaveFactor
= 4;
123 MaxJumpTableSize
= 20;
124 PrefFunctionLogAlignment
= 5;
125 PrefLoopLogAlignment
= 4;
128 MaxInterleaveFactor
= 4;
129 // FIXME: remove this to enable 64-bit SLP if performance looks good.
130 MinVectorRegisterBitWidth
= 128;
132 PrefetchDistance
= 820;
133 MinPrefetchStride
= 2048;
134 MaxPrefetchIterationsAhead
= 8;
137 MaxInterleaveFactor
= 4;
138 VectorInsertExtractBaseCost
= 2;
140 PrefetchDistance
= 740;
141 MinPrefetchStride
= 1024;
142 MaxPrefetchIterationsAhead
= 11;
143 // FIXME: remove this to enable 64-bit SLP if performance looks good.
144 MinVectorRegisterBitWidth
= 128;
147 PrefFunctionLogAlignment
= 3;
152 PrefFunctionLogAlignment
= 4;
155 MaxInterleaveFactor
= 4;
156 // FIXME: remove this to enable 64-bit SLP if performance looks good.
157 MinVectorRegisterBitWidth
= 128;
161 PrefFunctionLogAlignment
= 3;
162 PrefLoopLogAlignment
= 2;
163 MaxInterleaveFactor
= 4;
164 PrefetchDistance
= 128;
165 MinPrefetchStride
= 1024;
166 MaxPrefetchIterationsAhead
= 4;
167 // FIXME: remove this to enable 64-bit SLP if performance looks good.
168 MinVectorRegisterBitWidth
= 128;
175 PrefFunctionLogAlignment
= 3;
176 PrefLoopLogAlignment
= 2;
177 // FIXME: remove this to enable 64-bit SLP if performance looks good.
178 MinVectorRegisterBitWidth
= 128;
182 PrefFunctionLogAlignment
= 4;
183 PrefLoopLogAlignment
= 2;
187 PrefFunctionLogAlignment
= 4;
188 PrefLoopLogAlignment
= 2;
189 MaxInterleaveFactor
= 4;
190 PrefetchDistance
= 128;
191 MinPrefetchStride
= 1024;
192 MaxPrefetchIterationsAhead
= 4;
193 // FIXME: remove this to enable 64-bit SLP if performance looks good.
194 MinVectorRegisterBitWidth
= 128;
199 AArch64Subtarget::AArch64Subtarget(const Triple
&TT
, const std::string
&CPU
,
200 const std::string
&FS
,
201 const TargetMachine
&TM
, bool LittleEndian
,
202 unsigned MinSVEVectorSizeInBitsOverride
,
203 unsigned MaxSVEVectorSizeInBitsOverride
)
204 : AArch64GenSubtargetInfo(TT
, CPU
, /*TuneCPU*/ CPU
, FS
),
205 ReserveXRegister(AArch64::GPR64commonRegClass
.getNumRegs()),
206 CustomCallSavedXRegs(AArch64::GPR64commonRegClass
.getNumRegs()),
207 IsLittle(LittleEndian
),
208 MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride
),
209 MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride
), TargetTriple(TT
),
210 FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS
, CPU
)),
211 TSInfo(), TLInfo(TM
, *this) {
212 if (AArch64::isX18ReservedByDefault(TT
))
213 ReserveXRegister
.set(18);
215 CallLoweringInfo
.reset(new AArch64CallLowering(*getTargetLowering()));
216 InlineAsmLoweringInfo
.reset(new InlineAsmLowering(getTargetLowering()));
217 Legalizer
.reset(new AArch64LegalizerInfo(*this));
219 auto *RBI
= new AArch64RegisterBankInfo(*getRegisterInfo());
221 // FIXME: At this point, we can't rely on Subtarget having RBI.
222 // It's awkward to mix passing RBI and the Subtarget; should we pass
224 InstSelector
.reset(createAArch64InstructionSelector(
225 *static_cast<const AArch64TargetMachine
*>(&TM
), *this, *RBI
));
227 RegBankInfo
.reset(RBI
);
230 const CallLowering
*AArch64Subtarget::getCallLowering() const {
231 return CallLoweringInfo
.get();
234 const InlineAsmLowering
*AArch64Subtarget::getInlineAsmLowering() const {
235 return InlineAsmLoweringInfo
.get();
238 InstructionSelector
*AArch64Subtarget::getInstructionSelector() const {
239 return InstSelector
.get();
242 const LegalizerInfo
*AArch64Subtarget::getLegalizerInfo() const {
243 return Legalizer
.get();
246 const RegisterBankInfo
*AArch64Subtarget::getRegBankInfo() const {
247 return RegBankInfo
.get();
250 /// Find the target operand flags that describe how a global value should be
251 /// referenced for the current subtarget.
253 AArch64Subtarget::ClassifyGlobalReference(const GlobalValue
*GV
,
254 const TargetMachine
&TM
) const {
255 // MachO large model always goes via a GOT, simply to get a single 8-byte
256 // absolute relocation on all global addresses.
257 if (TM
.getCodeModel() == CodeModel::Large
&& isTargetMachO())
258 return AArch64II::MO_GOT
;
260 if (!TM
.shouldAssumeDSOLocal(*GV
->getParent(), GV
)) {
261 if (GV
->hasDLLImportStorageClass())
262 return AArch64II::MO_GOT
| AArch64II::MO_DLLIMPORT
;
263 if (getTargetTriple().isOSWindows())
264 return AArch64II::MO_GOT
| AArch64II::MO_COFFSTUB
;
265 return AArch64II::MO_GOT
;
268 // The small code model's direct accesses use ADRP, which cannot
269 // necessarily produce the value 0 (if the code is above 4GB).
270 // Same for the tiny code model, where we have a pc relative LDR.
271 if ((useSmallAddressing() || TM
.getCodeModel() == CodeModel::Tiny
) &&
272 GV
->hasExternalWeakLinkage())
273 return AArch64II::MO_GOT
;
275 // References to tagged globals are marked with MO_NC | MO_TAGGED to indicate
276 // that their nominal addresses are tagged and outside of the code model. In
277 // AArch64ExpandPseudo::expandMI we emit an additional instruction to set the
278 // tag if necessary based on MO_TAGGED.
279 if (AllowTaggedGlobals
&& !isa
<FunctionType
>(GV
->getValueType()))
280 return AArch64II::MO_NC
| AArch64II::MO_TAGGED
;
282 return AArch64II::MO_NO_FLAG
;
285 unsigned AArch64Subtarget::classifyGlobalFunctionReference(
286 const GlobalValue
*GV
, const TargetMachine
&TM
) const {
287 // MachO large model always goes via a GOT, because we don't have the
288 // relocations available to do anything else..
289 if (TM
.getCodeModel() == CodeModel::Large
&& isTargetMachO() &&
290 !GV
->hasInternalLinkage())
291 return AArch64II::MO_GOT
;
293 // NonLazyBind goes via GOT unless we know it's available locally.
294 auto *F
= dyn_cast
<Function
>(GV
);
295 if (UseNonLazyBind
&& F
&& F
->hasFnAttribute(Attribute::NonLazyBind
) &&
296 !TM
.shouldAssumeDSOLocal(*GV
->getParent(), GV
))
297 return AArch64II::MO_GOT
;
299 // Use ClassifyGlobalReference for setting MO_DLLIMPORT/MO_COFFSTUB.
300 if (getTargetTriple().isOSWindows())
301 return ClassifyGlobalReference(GV
, TM
);
303 return AArch64II::MO_NO_FLAG
;
306 void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy
&Policy
,
307 unsigned NumRegionInstrs
) const {
308 // LNT run (at least on Cyclone) showed reasonably significant gains for
309 // bi-directional scheduling. 253.perlbmk.
310 Policy
.OnlyTopDown
= false;
311 Policy
.OnlyBottomUp
= false;
312 // Enabling or Disabling the latency heuristic is a close call: It seems to
313 // help nearly no benchmark on out-of-order architectures, on the other hand
314 // it regresses register pressure on a few benchmarking.
315 Policy
.DisableLatencyHeuristic
= DisableLatencySchedHeuristic
;
318 bool AArch64Subtarget::enableEarlyIfConversion() const {
319 return EnableEarlyIfConvert
;
322 bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
323 if (!UseAddressTopByteIgnored
)
326 if (TargetTriple
.isiOS()) {
327 unsigned Major
, Minor
, Micro
;
328 TargetTriple
.getiOSVersion(Major
, Minor
, Micro
);
335 std::unique_ptr
<PBQPRAConstraint
>
336 AArch64Subtarget::getCustomPBQPConstraints() const {
337 return balanceFPOps() ? std::make_unique
<A57ChainingConstraint
>() : nullptr;
340 void AArch64Subtarget::mirFileLoaded(MachineFunction
&MF
) const {
341 // We usually compute max call frame size after ISel. Do the computation now
342 // if the .mir file didn't specify it. Note that this will probably give you
343 // bogus values after PEI has eliminated the callframe setup/destroy pseudo
344 // instructions, specify explicitly if you need it to be correct.
345 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
346 if (!MFI
.isMaxCallFrameSizeComputed())
347 MFI
.computeMaxCallFrameSize(MF
);
350 bool AArch64Subtarget::useSVEForFixedLengthVectors() const {
351 // Prefer NEON unless larger SVE registers are available.
352 return hasSVE() && getMinSVEVectorSizeInBits() >= 256;
355 bool AArch64Subtarget::useAA() const { return UseAA
; }