1 //===-- ARMTargetMachine.cpp - Define TargetMachine for ARM ---------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 //===----------------------------------------------------------------------===//
12 #include "ARMTargetMachine.h"
14 #include "ARMMacroFusion.h"
15 #include "ARMSubtarget.h"
16 #include "ARMTargetObjectFile.h"
17 #include "ARMTargetTransformInfo.h"
18 #include "MCTargetDesc/ARMMCTargetDesc.h"
19 #include "TargetInfo/ARMTargetInfo.h"
20 #include "llvm/ADT/Optional.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/StringRef.h"
23 #include "llvm/ADT/Triple.h"
24 #include "llvm/Analysis/TargetTransformInfo.h"
25 #include "llvm/CodeGen/ExecutionDomainFix.h"
26 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
27 #include "llvm/CodeGen/GlobalISel/IRTranslator.h"
28 #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
29 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
30 #include "llvm/CodeGen/GlobalISel/Legalizer.h"
31 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
32 #include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
33 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
34 #include "llvm/CodeGen/MachineFunction.h"
35 #include "llvm/CodeGen/MachineScheduler.h"
36 #include "llvm/CodeGen/Passes.h"
37 #include "llvm/CodeGen/TargetPassConfig.h"
38 #include "llvm/IR/Attributes.h"
39 #include "llvm/IR/DataLayout.h"
40 #include "llvm/IR/Function.h"
41 #include "llvm/Pass.h"
42 #include "llvm/Support/CodeGen.h"
43 #include "llvm/Support/CommandLine.h"
44 #include "llvm/Support/ErrorHandling.h"
45 #include "llvm/Support/TargetParser.h"
46 #include "llvm/Support/TargetRegistry.h"
47 #include "llvm/Target/TargetLoweringObjectFile.h"
48 #include "llvm/Target/TargetOptions.h"
49 #include "llvm/Transforms/CFGuard.h"
50 #include "llvm/Transforms/IPO.h"
51 #include "llvm/Transforms/Scalar.h"
59 DisableA15SDOptimization("disable-a15-sd-optimization", cl::Hidden
,
60 cl::desc("Inhibit optimization of S->D register accesses on A15"),
64 EnableAtomicTidy("arm-atomic-cfg-tidy", cl::Hidden
,
65 cl::desc("Run SimplifyCFG after expanding atomic operations"
66 " to make use of cmpxchg flow-based information"),
70 EnableARMLoadStoreOpt("arm-load-store-opt", cl::Hidden
,
71 cl::desc("Enable ARM load/store optimization pass"),
74 // FIXME: Unify control over GlobalMerge.
75 static cl::opt
<cl::boolOrDefault
>
76 EnableGlobalMerge("arm-global-merge", cl::Hidden
,
77 cl::desc("Enable the global merge pass"));
80 void initializeARMExecutionDomainFixPass(PassRegistry
&);
83 extern "C" LLVM_EXTERNAL_VISIBILITY
void LLVMInitializeARMTarget() {
84 // Register the target.
85 RegisterTargetMachine
<ARMLETargetMachine
> X(getTheARMLETarget());
86 RegisterTargetMachine
<ARMLETargetMachine
> A(getTheThumbLETarget());
87 RegisterTargetMachine
<ARMBETargetMachine
> Y(getTheARMBETarget());
88 RegisterTargetMachine
<ARMBETargetMachine
> B(getTheThumbBETarget());
90 PassRegistry
&Registry
= *PassRegistry::getPassRegistry();
91 initializeGlobalISel(Registry
);
92 initializeARMLoadStoreOptPass(Registry
);
93 initializeARMPreAllocLoadStoreOptPass(Registry
);
94 initializeARMParallelDSPPass(Registry
);
95 initializeARMConstantIslandsPass(Registry
);
96 initializeARMExecutionDomainFixPass(Registry
);
97 initializeARMExpandPseudoPass(Registry
);
98 initializeThumb2SizeReducePass(Registry
);
99 initializeMVEVPTBlockPass(Registry
);
100 initializeMVETPAndVPTOptimisationsPass(Registry
);
101 initializeMVETailPredicationPass(Registry
);
102 initializeARMLowOverheadLoopsPass(Registry
);
103 initializeARMBlockPlacementPass(Registry
);
104 initializeMVEGatherScatterLoweringPass(Registry
);
105 initializeARMSLSHardeningPass(Registry
);
106 initializeMVELaneInterleavingPass(Registry
);
109 static std::unique_ptr
<TargetLoweringObjectFile
> createTLOF(const Triple
&TT
) {
110 if (TT
.isOSBinFormatMachO())
111 return std::make_unique
<TargetLoweringObjectFileMachO
>();
112 if (TT
.isOSWindows())
113 return std::make_unique
<TargetLoweringObjectFileCOFF
>();
114 return std::make_unique
<ARMElfTargetObjectFile
>();
117 static ARMBaseTargetMachine::ARMABI
118 computeTargetABI(const Triple
&TT
, StringRef CPU
,
119 const TargetOptions
&Options
) {
120 StringRef ABIName
= Options
.MCOptions
.getABIName();
123 ABIName
= ARM::computeDefaultTargetABI(TT
, CPU
);
125 if (ABIName
== "aapcs16")
126 return ARMBaseTargetMachine::ARM_ABI_AAPCS16
;
127 else if (ABIName
.startswith("aapcs"))
128 return ARMBaseTargetMachine::ARM_ABI_AAPCS
;
129 else if (ABIName
.startswith("apcs"))
130 return ARMBaseTargetMachine::ARM_ABI_APCS
;
132 llvm_unreachable("Unhandled/unknown ABI Name!");
133 return ARMBaseTargetMachine::ARM_ABI_UNKNOWN
;
136 static std::string
computeDataLayout(const Triple
&TT
, StringRef CPU
,
137 const TargetOptions
&Options
,
139 auto ABI
= computeTargetABI(TT
, CPU
, Options
);
149 Ret
+= DataLayout::getManglingComponent(TT
);
151 // Pointers are 32 bits and aligned to 32 bits.
154 // Function pointers are aligned to 8 bits (because the LSB stores the
158 // ABIs other than APCS have 64 bit integers with natural alignment.
159 if (ABI
!= ARMBaseTargetMachine::ARM_ABI_APCS
)
162 // We have 64 bits floats. The APCS ABI requires them to be aligned to 32
163 // bits, others to 64 bits. We always try to align to 64 bits.
164 if (ABI
== ARMBaseTargetMachine::ARM_ABI_APCS
)
167 // We have 128 and 64 bit vectors. The APCS ABI aligns them to 32 bits, others
168 // to 64. We always ty to give them natural alignment.
169 if (ABI
== ARMBaseTargetMachine::ARM_ABI_APCS
)
170 Ret
+= "-v64:32:64-v128:32:128";
171 else if (ABI
!= ARMBaseTargetMachine::ARM_ABI_AAPCS16
)
172 Ret
+= "-v128:64:128";
174 // Try to align aggregates to 32 bits (the default is 64 bits, which has no
175 // particular hardware support on 32-bit ARM).
178 // Integer registers are 32 bits.
181 // The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit
182 // aligned everywhere else.
183 if (TT
.isOSNaCl() || ABI
== ARMBaseTargetMachine::ARM_ABI_AAPCS16
)
185 else if (ABI
== ARMBaseTargetMachine::ARM_ABI_AAPCS
)
193 static Reloc::Model
getEffectiveRelocModel(const Triple
&TT
,
194 Optional
<Reloc::Model
> RM
) {
196 // Default relocation model on Darwin is PIC.
197 return TT
.isOSBinFormatMachO() ? Reloc::PIC_
: Reloc::Static
;
199 if (*RM
== Reloc::ROPI
|| *RM
== Reloc::RWPI
|| *RM
== Reloc::ROPI_RWPI
)
200 assert(TT
.isOSBinFormatELF() &&
201 "ROPI/RWPI currently only supported for ELF");
203 // DynamicNoPIC is only used on darwin.
204 if (*RM
== Reloc::DynamicNoPIC
&& !TT
.isOSDarwin())
205 return Reloc::Static
;
210 /// Create an ARM architecture model.
212 ARMBaseTargetMachine::ARMBaseTargetMachine(const Target
&T
, const Triple
&TT
,
213 StringRef CPU
, StringRef FS
,
214 const TargetOptions
&Options
,
215 Optional
<Reloc::Model
> RM
,
216 Optional
<CodeModel::Model
> CM
,
217 CodeGenOpt::Level OL
, bool isLittle
)
218 : LLVMTargetMachine(T
, computeDataLayout(TT
, CPU
, Options
, isLittle
), TT
,
219 CPU
, FS
, Options
, getEffectiveRelocModel(TT
, RM
),
220 getEffectiveCodeModel(CM
, CodeModel::Small
), OL
),
221 TargetABI(computeTargetABI(TT
, CPU
, Options
)),
222 TLOF(createTLOF(getTargetTriple())), isLittle(isLittle
) {
224 // Default to triple-appropriate float ABI
225 if (Options
.FloatABIType
== FloatABI::Default
) {
226 if (isTargetHardFloat())
227 this->Options
.FloatABIType
= FloatABI::Hard
;
229 this->Options
.FloatABIType
= FloatABI::Soft
;
232 // Default to triple-appropriate EABI
233 if (Options
.EABIVersion
== EABI::Default
||
234 Options
.EABIVersion
== EABI::Unknown
) {
235 // musl is compatible with glibc with regard to EABI version
236 if ((TargetTriple
.getEnvironment() == Triple::GNUEABI
||
237 TargetTriple
.getEnvironment() == Triple::GNUEABIHF
||
238 TargetTriple
.getEnvironment() == Triple::MuslEABI
||
239 TargetTriple
.getEnvironment() == Triple::MuslEABIHF
) &&
240 !(TargetTriple
.isOSWindows() || TargetTriple
.isOSDarwin()))
241 this->Options
.EABIVersion
= EABI::GNU
;
243 this->Options
.EABIVersion
= EABI::EABI5
;
246 if (TT
.isOSBinFormatMachO()) {
247 this->Options
.TrapUnreachable
= true;
248 this->Options
.NoTrapAfterNoreturn
= true;
251 // ARM supports the debug entry values.
252 setSupportsDebugEntryValues(true);
256 // ARM supports the MachineOutliner.
257 setMachineOutliner(true);
258 setSupportsDefaultOutlining(true);
261 ARMBaseTargetMachine::~ARMBaseTargetMachine() = default;
264 ARMBaseTargetMachine::getSubtargetImpl(const Function
&F
) const {
265 Attribute CPUAttr
= F
.getFnAttribute("target-cpu");
266 Attribute FSAttr
= F
.getFnAttribute("target-features");
269 CPUAttr
.isValid() ? CPUAttr
.getValueAsString().str() : TargetCPU
;
271 FSAttr
.isValid() ? FSAttr
.getValueAsString().str() : TargetFS
;
273 // FIXME: This is related to the code below to reset the target options,
274 // we need to know whether or not the soft float flag is set on the
275 // function before we can generate a subtarget. We also need to use
276 // it as a key for the subtarget since that can be the only difference
277 // between two functions.
278 bool SoftFloat
= F
.getFnAttribute("use-soft-float").getValueAsBool();
279 // If the soft float attribute is set on the function turn on the soft float
280 // subtarget feature.
282 FS
+= FS
.empty() ? "+soft-float" : ",+soft-float";
284 // Use the optminsize to identify the subtarget, but don't use it in the
286 std::string Key
= CPU
+ FS
;
290 auto &I
= SubtargetMap
[Key
];
292 // This needs to be done before we create a new subtarget since any
293 // creation will depend on the TM and the code generation flags on the
294 // function that reside in TargetOptions.
295 resetTargetOptions(F
);
296 I
= std::make_unique
<ARMSubtarget
>(TargetTriple
, CPU
, FS
, *this, isLittle
,
299 if (!I
->isThumb() && !I
->hasARMOps())
300 F
.getContext().emitError("Function '" + F
.getName() + "' uses ARM "
301 "instructions, but the target does not support ARM mode execution.");
308 ARMBaseTargetMachine::getTargetTransformInfo(const Function
&F
) {
309 return TargetTransformInfo(ARMTTIImpl(this, F
));
312 ARMLETargetMachine::ARMLETargetMachine(const Target
&T
, const Triple
&TT
,
313 StringRef CPU
, StringRef FS
,
314 const TargetOptions
&Options
,
315 Optional
<Reloc::Model
> RM
,
316 Optional
<CodeModel::Model
> CM
,
317 CodeGenOpt::Level OL
, bool JIT
)
318 : ARMBaseTargetMachine(T
, TT
, CPU
, FS
, Options
, RM
, CM
, OL
, true) {}
320 ARMBETargetMachine::ARMBETargetMachine(const Target
&T
, const Triple
&TT
,
321 StringRef CPU
, StringRef FS
,
322 const TargetOptions
&Options
,
323 Optional
<Reloc::Model
> RM
,
324 Optional
<CodeModel::Model
> CM
,
325 CodeGenOpt::Level OL
, bool JIT
)
326 : ARMBaseTargetMachine(T
, TT
, CPU
, FS
, Options
, RM
, CM
, OL
, false) {}
330 /// ARM Code Generator Pass Configuration Options.
331 class ARMPassConfig
: public TargetPassConfig
{
333 ARMPassConfig(ARMBaseTargetMachine
&TM
, PassManagerBase
&PM
)
334 : TargetPassConfig(TM
, PM
) {}
336 ARMBaseTargetMachine
&getARMTargetMachine() const {
337 return getTM
<ARMBaseTargetMachine
>();
341 createMachineScheduler(MachineSchedContext
*C
) const override
{
342 ScheduleDAGMILive
*DAG
= createGenericSchedLive(C
);
343 // add DAG Mutations here.
344 const ARMSubtarget
&ST
= C
->MF
->getSubtarget
<ARMSubtarget
>();
346 DAG
->addMutation(createARMMacroFusionDAGMutation());
351 createPostMachineScheduler(MachineSchedContext
*C
) const override
{
352 ScheduleDAGMI
*DAG
= createGenericSchedPostRA(C
);
353 // add DAG Mutations here.
354 const ARMSubtarget
&ST
= C
->MF
->getSubtarget
<ARMSubtarget
>();
356 DAG
->addMutation(createARMMacroFusionDAGMutation());
360 void addIRPasses() override
;
361 void addCodeGenPrepare() override
;
362 bool addPreISel() override
;
363 bool addInstSelector() override
;
364 bool addIRTranslator() override
;
365 bool addLegalizeMachineIR() override
;
366 bool addRegBankSelect() override
;
367 bool addGlobalInstructionSelect() override
;
368 void addPreRegAlloc() override
;
369 void addPreSched2() override
;
370 void addPreEmitPass() override
;
371 void addPreEmitPass2() override
;
373 std::unique_ptr
<CSEConfigBase
> getCSEConfig() const override
;
376 class ARMExecutionDomainFix
: public ExecutionDomainFix
{
379 ARMExecutionDomainFix() : ExecutionDomainFix(ID
, ARM::DPRRegClass
) {}
380 StringRef
getPassName() const override
{
381 return "ARM Execution Domain Fix";
384 char ARMExecutionDomainFix::ID
;
386 } // end anonymous namespace
388 INITIALIZE_PASS_BEGIN(ARMExecutionDomainFix
, "arm-execution-domain-fix",
389 "ARM Execution Domain Fix", false, false)
390 INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis
)
391 INITIALIZE_PASS_END(ARMExecutionDomainFix
, "arm-execution-domain-fix",
392 "ARM Execution Domain Fix", false, false)
394 TargetPassConfig
*ARMBaseTargetMachine::createPassConfig(PassManagerBase
&PM
) {
395 return new ARMPassConfig(*this, PM
);
398 std::unique_ptr
<CSEConfigBase
> ARMPassConfig::getCSEConfig() const {
399 return getStandardCSEConfigForOpt(TM
->getOptLevel());
402 void ARMPassConfig::addIRPasses() {
403 if (TM
->Options
.ThreadModel
== ThreadModel::Single
)
404 addPass(createLowerAtomicPass());
406 addPass(createAtomicExpandPass());
408 // Cmpxchg instructions are often used with a subsequent comparison to
409 // determine whether it succeeded. We can exploit existing control-flow in
410 // ldrex/strex loops to simplify this, but it needs tidying up.
411 if (TM
->getOptLevel() != CodeGenOpt::None
&& EnableAtomicTidy
)
412 addPass(createCFGSimplificationPass(
413 SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true),
414 [this](const Function
&F
) {
415 const auto &ST
= this->TM
->getSubtarget
<ARMSubtarget
>(F
);
416 return ST
.hasAnyDataBarrier() && !ST
.isThumb1Only();
419 addPass(createMVEGatherScatterLoweringPass());
420 addPass(createMVELaneInterleavingPass());
422 TargetPassConfig::addIRPasses();
424 // Run the parallel DSP pass.
425 if (getOptLevel() == CodeGenOpt::Aggressive
)
426 addPass(createARMParallelDSPPass());
428 // Match interleaved memory accesses to ldN/stN intrinsics.
429 if (TM
->getOptLevel() != CodeGenOpt::None
)
430 addPass(createInterleavedAccessPass());
432 // Add Control Flow Guard checks.
433 if (TM
->getTargetTriple().isOSWindows())
434 addPass(createCFGuardCheckPass());
437 void ARMPassConfig::addCodeGenPrepare() {
438 if (getOptLevel() != CodeGenOpt::None
)
439 addPass(createTypePromotionPass());
440 TargetPassConfig::addCodeGenPrepare();
443 bool ARMPassConfig::addPreISel() {
444 if ((TM
->getOptLevel() != CodeGenOpt::None
&&
445 EnableGlobalMerge
== cl::BOU_UNSET
) ||
446 EnableGlobalMerge
== cl::BOU_TRUE
) {
447 // FIXME: This is using the thumb1 only constant value for
448 // maximal global offset for merging globals. We may want
449 // to look into using the old value for non-thumb1 code of
450 // 4095 based on the TargetMachine, but this starts to become
451 // tricky when doing code gen per function.
452 bool OnlyOptimizeForSize
= (TM
->getOptLevel() < CodeGenOpt::Aggressive
) &&
453 (EnableGlobalMerge
== cl::BOU_UNSET
);
454 // Merging of extern globals is enabled by default on non-Mach-O as we
455 // expect it to be generally either beneficial or harmless. On Mach-O it
456 // is disabled as we emit the .subsections_via_symbols directive which
457 // means that merging extern globals is not safe.
458 bool MergeExternalByDefault
= !TM
->getTargetTriple().isOSBinFormatMachO();
459 addPass(createGlobalMergePass(TM
, 127, OnlyOptimizeForSize
,
460 MergeExternalByDefault
));
463 if (TM
->getOptLevel() != CodeGenOpt::None
) {
464 addPass(createHardwareLoopsPass());
465 addPass(createMVETailPredicationPass());
466 // FIXME: IR passes can delete address-taken basic blocks, deleting
467 // corresponding blockaddresses. ARMConstantPoolConstant holds references to
468 // address-taken basic blocks which can be invalidated if the function
469 // containing the blockaddress has already been codegen'd and the basic
470 // block is removed. Work around this by forcing all IR passes to run before
471 // any ISel takes place. We should have a more principled way of handling
472 // this. See D99707 for more details.
473 addPass(createBarrierNoopPass());
479 bool ARMPassConfig::addInstSelector() {
480 addPass(createARMISelDag(getARMTargetMachine(), getOptLevel()));
484 bool ARMPassConfig::addIRTranslator() {
485 addPass(new IRTranslator(getOptLevel()));
489 bool ARMPassConfig::addLegalizeMachineIR() {
490 addPass(new Legalizer());
494 bool ARMPassConfig::addRegBankSelect() {
495 addPass(new RegBankSelect());
499 bool ARMPassConfig::addGlobalInstructionSelect() {
500 addPass(new InstructionSelect(getOptLevel()));
504 void ARMPassConfig::addPreRegAlloc() {
505 if (getOptLevel() != CodeGenOpt::None
) {
506 addPass(createMVETPAndVPTOptimisationsPass());
508 addPass(createMLxExpansionPass());
510 if (EnableARMLoadStoreOpt
)
511 addPass(createARMLoadStoreOptimizationPass(/* pre-register alloc */ true));
513 if (!DisableA15SDOptimization
)
514 addPass(createA15SDOptimizerPass());
518 void ARMPassConfig::addPreSched2() {
519 if (getOptLevel() != CodeGenOpt::None
) {
520 if (EnableARMLoadStoreOpt
)
521 addPass(createARMLoadStoreOptimizationPass());
523 addPass(new ARMExecutionDomainFix());
524 addPass(createBreakFalseDeps());
527 // Expand some pseudo instructions into multiple instructions to allow
528 // proper scheduling.
529 addPass(createARMExpandPseudoPass());
531 if (getOptLevel() != CodeGenOpt::None
) {
532 // When optimising for size, always run the Thumb2SizeReduction pass before
533 // IfConversion. Otherwise, check whether IT blocks are restricted
534 // (e.g. in v8, IfConversion depends on Thumb instruction widths)
535 addPass(createThumb2SizeReductionPass([this](const Function
&F
) {
536 return this->TM
->getSubtarget
<ARMSubtarget
>(F
).hasMinSize() ||
537 this->TM
->getSubtarget
<ARMSubtarget
>(F
).restrictIT();
540 addPass(createIfConverter([](const MachineFunction
&MF
) {
541 return !MF
.getSubtarget
<ARMSubtarget
>().isThumb1Only();
544 addPass(createMVEVPTBlockPass());
545 addPass(createThumb2ITBlockPass());
547 // Add both scheduling passes to give the subtarget an opportunity to pick
549 if (getOptLevel() != CodeGenOpt::None
) {
550 addPass(&PostMachineSchedulerID
);
551 addPass(&PostRASchedulerID
);
554 addPass(createARMIndirectThunks());
555 addPass(createARMSLSHardeningPass());
558 void ARMPassConfig::addPreEmitPass() {
559 addPass(createThumb2SizeReductionPass());
561 // Constant island pass work on unbundled instructions.
562 addPass(createUnpackMachineBundles([](const MachineFunction
&MF
) {
563 return MF
.getSubtarget
<ARMSubtarget
>().isThumb2();
566 // Don't optimize barriers or block placement at -O0.
567 if (getOptLevel() != CodeGenOpt::None
) {
568 addPass(createARMBlockPlacementPass());
569 addPass(createARMOptimizeBarriersPass());
573 void ARMPassConfig::addPreEmitPass2() {
574 addPass(createARMConstantIslandPass());
575 addPass(createARMLowOverheadLoopsPass());
577 if (TM
->getTargetTriple().isOSWindows()) {
578 // Identify valid longjmp targets for Windows Control Flow Guard.
579 addPass(createCFGuardLongjmpPass());
580 // Identify valid eh continuation targets for Windows EHCont Guard.
581 addPass(createEHContGuardCatchretPass());