1 //===---------- AArch64CollectLOH.cpp - AArch64 collect LOH pass --*- C++ -*-=//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains a pass that collect the Linker Optimization Hint (LOH).
10 // This pass should be run at the very end of the compilation flow, just before
12 // To be useful for the linker, the LOH must be printed into the assembly file.
14 // A LOH describes a sequence of instructions that may be optimized by the
16 // This same sequence cannot be optimized by the compiler because some of
17 // the information will be known at link time.
18 // For instance, consider the following sequence:
19 // L1: adrp xA, sym@PAGE
20 // L2: add xB, xA, sym@PAGEOFF
21 // L3: ldr xC, [xB, #imm]
22 // This sequence can be turned into:
23 // A literal load if sym@PAGE + sym@PAGEOFF + #imm - address(L3) is < 1MB:
24 // L3: ldr xC, sym+#imm
25 // It may also be turned into either the following more efficient
27 // - If sym@PAGEOFF + #imm fits the encoding space of L3.
28 // L1: adrp xA, sym@PAGE
29 // L3: ldr xC, [xB, sym@PAGEOFF + #imm]
30 // - If sym@PAGE + sym@PAGEOFF - address(L1) < 1MB:
32 // L3: ldr xC, [xB, #imm]
34 // To be valid a LOH must meet all the requirements needed by all the related
35 // possible linker transformations.
36 // For instance, using the running example, the constraints to emit
37 // ".loh AdrpAddLdr" are:
38 // - L1, L2, and L3 instructions are of the expected type, i.e.,
39 // respectively ADRP, ADD (immediate), and LD.
40 // - The result of L1 is used only by L2.
41 // - The register argument (xA) used in the ADD instruction is defined
43 // - The result of L2 is used only by L3.
44 // - The base address (xB) in L3 is defined only L2.
45 // - The ADRP in L1 and the ADD in L2 must reference the same symbol using
46 // @PAGE/@PAGEOFF with no additional constants
48 // Currently supported LOHs are:
49 // * So called non-ADRP-related:
50 // - .loh AdrpAddLdr L1, L2, L3:
51 // L1: adrp xA, sym@PAGE
52 // L2: add xB, xA, sym@PAGEOFF
53 // L3: ldr xC, [xB, #imm]
54 // - .loh AdrpLdrGotLdr L1, L2, L3:
55 // L1: adrp xA, sym@GOTPAGE
56 // L2: ldr xB, [xA, sym@GOTPAGEOFF]
57 // L3: ldr xC, [xB, #imm]
58 // - .loh AdrpLdr L1, L3:
59 // L1: adrp xA, sym@PAGE
60 // L3: ldr xC, [xA, sym@PAGEOFF]
61 // - .loh AdrpAddStr L1, L2, L3:
62 // L1: adrp xA, sym@PAGE
63 // L2: add xB, xA, sym@PAGEOFF
64 // L3: str xC, [xB, #imm]
65 // - .loh AdrpLdrGotStr L1, L2, L3:
66 // L1: adrp xA, sym@GOTPAGE
67 // L2: ldr xB, [xA, sym@GOTPAGEOFF]
68 // L3: str xC, [xB, #imm]
69 // - .loh AdrpAdd L1, L2:
70 // L1: adrp xA, sym@PAGE
71 // L2: add xB, xA, sym@PAGEOFF
72 // For all these LOHs, L1, L2, L3 form a simple chain:
73 // L1 result is used only by L2 and L2 result by L3.
74 // L3 LOH-related argument is defined only by L2 and L2 LOH-related argument
76 // All these LOHs aim at using more efficient load/store patterns by folding
77 // some instructions used to compute the address directly into the load/store.
79 // * So called ADRP-related:
80 // - .loh AdrpAdrp L2, L1:
81 // L2: ADRP xA, sym1@PAGE
82 // L1: ADRP xA, sym2@PAGE
83 // L2 dominates L1 and xA is not redifined between L2 and L1
84 // This LOH aims at getting rid of redundant ADRP instructions.
86 // The overall design for emitting the LOHs is:
87 // 1. AArch64CollectLOH (this pass) records the LOHs in the AArch64FunctionInfo.
88 // 2. AArch64AsmPrinter reads the LOHs from AArch64FunctionInfo and it:
89 // 1. Associates them a label.
90 // 2. Emits them in a MCStreamer (EmitLOHDirective).
91 // - The MCMachOStreamer records them into the MCAssembler.
92 // - The MCAsmStreamer prints them.
93 // - Other MCStreamers ignore them.
94 // 3. Closes the MCStreamer:
95 // - The MachObjectWriter gets them from the MCAssembler and writes
96 // them in the object file.
97 // - Other ObjectWriters ignore them.
98 //===----------------------------------------------------------------------===//
101 #include "AArch64InstrInfo.h"
102 #include "AArch64MachineFunctionInfo.h"
103 #include "llvm/ADT/BitVector.h"
104 #include "llvm/ADT/DenseMap.h"
105 #include "llvm/ADT/MapVector.h"
106 #include "llvm/ADT/SmallSet.h"
107 #include "llvm/ADT/SmallVector.h"
108 #include "llvm/ADT/Statistic.h"
109 #include "llvm/CodeGen/MachineBasicBlock.h"
110 #include "llvm/CodeGen/MachineFunctionPass.h"
111 #include "llvm/CodeGen/MachineInstr.h"
112 #include "llvm/CodeGen/TargetRegisterInfo.h"
113 #include "llvm/Support/Debug.h"
114 #include "llvm/Support/ErrorHandling.h"
115 #include "llvm/Support/raw_ostream.h"
116 #include "llvm/Target/TargetMachine.h"
117 using namespace llvm
;
119 #define DEBUG_TYPE "aarch64-collect-loh"
121 STATISTIC(NumADRPSimpleCandidate
,
122 "Number of simplifiable ADRP dominate by another");
123 STATISTIC(NumADDToSTR
, "Number of simplifiable STR reachable by ADD");
124 STATISTIC(NumLDRToSTR
, "Number of simplifiable STR reachable by LDR");
125 STATISTIC(NumADDToLDR
, "Number of simplifiable LDR reachable by ADD");
126 STATISTIC(NumLDRToLDR
, "Number of simplifiable LDR reachable by LDR");
127 STATISTIC(NumADRPToLDR
, "Number of simplifiable LDR reachable by ADRP");
128 STATISTIC(NumADRSimpleCandidate
, "Number of simplifiable ADRP + ADD");
130 #define AARCH64_COLLECT_LOH_NAME "AArch64 Collect Linker Optimization Hint (LOH)"
134 struct AArch64CollectLOH
: public MachineFunctionPass
{
136 AArch64CollectLOH() : MachineFunctionPass(ID
) {}
138 bool runOnMachineFunction(MachineFunction
&MF
) override
;
140 MachineFunctionProperties
getRequiredProperties() const override
{
141 return MachineFunctionProperties().set(
142 MachineFunctionProperties::Property::NoVRegs
);
145 StringRef
getPassName() const override
{ return AARCH64_COLLECT_LOH_NAME
; }
147 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
148 MachineFunctionPass::getAnalysisUsage(AU
);
149 AU
.setPreservesAll();
153 char AArch64CollectLOH::ID
= 0;
155 } // end anonymous namespace.
157 INITIALIZE_PASS(AArch64CollectLOH
, "aarch64-collect-loh",
158 AARCH64_COLLECT_LOH_NAME
, false, false)
160 static bool canAddBePartOfLOH(const MachineInstr
&MI
) {
161 // Check immediate to see if the immediate is an address.
162 switch (MI
.getOperand(2).getType()) {
165 case MachineOperand::MO_GlobalAddress
:
166 case MachineOperand::MO_JumpTableIndex
:
167 case MachineOperand::MO_ConstantPoolIndex
:
168 case MachineOperand::MO_BlockAddress
:
173 /// Answer the following question: Can Def be one of the definition
174 /// involved in a part of a LOH?
175 static bool canDefBePartOfLOH(const MachineInstr
&MI
) {
176 // Accept ADRP, ADDLow and LOADGot.
177 switch (MI
.getOpcode()) {
182 case AArch64::ADDXri
:
183 return canAddBePartOfLOH(MI
);
184 case AArch64::LDRXui
:
185 case AArch64::LDRWui
:
186 // Check immediate to see if the immediate is an address.
187 switch (MI
.getOperand(2).getType()) {
190 case MachineOperand::MO_GlobalAddress
:
191 return MI
.getOperand(2).getTargetFlags() & AArch64II::MO_GOT
;
196 /// Check whether the given instruction can the end of a LOH chain involving a
198 static bool isCandidateStore(const MachineInstr
&MI
, const MachineOperand
&MO
) {
199 switch (MI
.getOpcode()) {
202 case AArch64::STRBBui
:
203 case AArch64::STRHHui
:
204 case AArch64::STRBui
:
205 case AArch64::STRHui
:
206 case AArch64::STRWui
:
207 case AArch64::STRXui
:
208 case AArch64::STRSui
:
209 case AArch64::STRDui
:
210 case AArch64::STRQui
:
211 // We can only optimize the index operand.
212 // In case we have str xA, [xA, #imm], this is two different uses
213 // of xA and we cannot fold, otherwise the xA stored may be wrong,
214 // even if #imm == 0.
215 return MI
.getOperandNo(&MO
) == 1 &&
216 MI
.getOperand(0).getReg() != MI
.getOperand(1).getReg();
220 /// Check whether the given instruction can be the end of a LOH chain
221 /// involving a load.
222 static bool isCandidateLoad(const MachineInstr
&MI
) {
223 switch (MI
.getOpcode()) {
226 case AArch64::LDRSBWui
:
227 case AArch64::LDRSBXui
:
228 case AArch64::LDRSHWui
:
229 case AArch64::LDRSHXui
:
230 case AArch64::LDRSWui
:
231 case AArch64::LDRBui
:
232 case AArch64::LDRHui
:
233 case AArch64::LDRWui
:
234 case AArch64::LDRXui
:
235 case AArch64::LDRSui
:
236 case AArch64::LDRDui
:
237 case AArch64::LDRQui
:
238 return !(MI
.getOperand(2).getTargetFlags() & AArch64II::MO_GOT
);
242 /// Check whether the given instruction can load a litteral.
243 static bool supportLoadFromLiteral(const MachineInstr
&MI
) {
244 switch (MI
.getOpcode()) {
247 case AArch64::LDRSWui
:
248 case AArch64::LDRWui
:
249 case AArch64::LDRXui
:
250 case AArch64::LDRSui
:
251 case AArch64::LDRDui
:
252 case AArch64::LDRQui
:
257 /// Number of GPR registers traked by mapRegToGPRIndex()
258 static const unsigned N_GPR_REGS
= 31;
259 /// Map register number to index from 0-30.
260 static int mapRegToGPRIndex(MCPhysReg Reg
) {
261 static_assert(AArch64::X28
- AArch64::X0
+ 3 == N_GPR_REGS
, "Number of GPRs");
262 static_assert(AArch64::W30
- AArch64::W0
+ 1 == N_GPR_REGS
, "Number of GPRs");
263 if (AArch64::X0
<= Reg
&& Reg
<= AArch64::X28
)
264 return Reg
- AArch64::X0
;
265 if (AArch64::W0
<= Reg
&& Reg
<= AArch64::W30
)
266 return Reg
- AArch64::W0
;
267 // TableGen gives "FP" and "LR" an index not adjacent to X28 so we have to
268 // handle them as special cases.
269 if (Reg
== AArch64::FP
)
271 if (Reg
== AArch64::LR
)
276 /// State tracked per register.
277 /// The main algorithm walks backwards over a basic block maintaining this
278 /// datastructure for each tracked general purpose register.
280 MCLOHType Type
: 8; ///< "Best" type of LOH possible.
281 bool IsCandidate
: 1; ///< Possible LOH candidate.
282 bool OneUser
: 1; ///< Found exactly one user (yet).
283 bool MultiUsers
: 1; ///< Found multiple users.
284 const MachineInstr
*MI0
; ///< First instruction involved in the LOH.
285 const MachineInstr
*MI1
; ///< Second instruction involved in the LOH
287 const MachineInstr
*LastADRP
; ///< Last ADRP in same register.
290 /// Update state \p Info given \p MI uses the tracked register.
291 static void handleUse(const MachineInstr
&MI
, const MachineOperand
&MO
,
293 // We have multiple uses if we already found one before.
294 if (Info
.MultiUsers
|| Info
.OneUser
) {
295 Info
.IsCandidate
= false;
296 Info
.MultiUsers
= true;
301 // Start new LOHInfo if applicable.
302 if (isCandidateLoad(MI
)) {
303 Info
.Type
= MCLOH_AdrpLdr
;
304 Info
.IsCandidate
= true;
306 // Note that even this is AdrpLdr now, we can switch to a Ldr variant
308 } else if (isCandidateStore(MI
, MO
)) {
309 Info
.Type
= MCLOH_AdrpAddStr
;
310 Info
.IsCandidate
= true;
313 } else if (MI
.getOpcode() == AArch64::ADDXri
) {
314 Info
.Type
= MCLOH_AdrpAdd
;
315 Info
.IsCandidate
= true;
317 } else if ((MI
.getOpcode() == AArch64::LDRXui
||
318 MI
.getOpcode() == AArch64::LDRWui
) &&
319 MI
.getOperand(2).getTargetFlags() & AArch64II::MO_GOT
) {
320 Info
.Type
= MCLOH_AdrpLdrGot
;
321 Info
.IsCandidate
= true;
326 /// Update state \p Info given the tracked register is clobbered.
327 static void handleClobber(LOHInfo
&Info
) {
328 Info
.IsCandidate
= false;
329 Info
.OneUser
= false;
330 Info
.MultiUsers
= false;
331 Info
.LastADRP
= nullptr;
334 /// Update state \p Info given that \p MI is possibly the middle instruction
335 /// of an LOH involving 3 instructions.
336 static bool handleMiddleInst(const MachineInstr
&MI
, LOHInfo
&DefInfo
,
338 if (!DefInfo
.IsCandidate
|| (&DefInfo
!= &OpInfo
&& OpInfo
.OneUser
))
340 // Copy LOHInfo for dest register to LOHInfo for source register.
341 if (&DefInfo
!= &OpInfo
) {
343 // Invalidate \p DefInfo because we track it in \p OpInfo now.
344 handleClobber(DefInfo
);
346 DefInfo
.LastADRP
= nullptr;
348 // Advance state machine.
349 assert(OpInfo
.IsCandidate
&& "Expect valid state");
350 if (MI
.getOpcode() == AArch64::ADDXri
&& canAddBePartOfLOH(MI
)) {
351 if (OpInfo
.Type
== MCLOH_AdrpLdr
) {
352 OpInfo
.Type
= MCLOH_AdrpAddLdr
;
353 OpInfo
.IsCandidate
= true;
356 } else if (OpInfo
.Type
== MCLOH_AdrpAddStr
&& OpInfo
.MI1
== nullptr) {
357 OpInfo
.Type
= MCLOH_AdrpAddStr
;
358 OpInfo
.IsCandidate
= true;
363 assert((MI
.getOpcode() == AArch64::LDRXui
||
364 MI
.getOpcode() == AArch64::LDRWui
) &&
365 "Expect LDRXui or LDRWui");
366 assert((MI
.getOperand(2).getTargetFlags() & AArch64II::MO_GOT
) &&
367 "Expected GOT relocation");
368 if (OpInfo
.Type
== MCLOH_AdrpAddStr
&& OpInfo
.MI1
== nullptr) {
369 OpInfo
.Type
= MCLOH_AdrpLdrGotStr
;
370 OpInfo
.IsCandidate
= true;
373 } else if (OpInfo
.Type
== MCLOH_AdrpLdr
) {
374 OpInfo
.Type
= MCLOH_AdrpLdrGotLdr
;
375 OpInfo
.IsCandidate
= true;
383 /// Update state when seeing and ADRP instruction.
384 static void handleADRP(const MachineInstr
&MI
, AArch64FunctionInfo
&AFI
,
386 if (Info
.LastADRP
!= nullptr) {
387 LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpAdrp:\n"
388 << '\t' << MI
<< '\t' << *Info
.LastADRP
);
389 AFI
.addLOHDirective(MCLOH_AdrpAdrp
, {&MI
, Info
.LastADRP
});
390 ++NumADRPSimpleCandidate
;
393 // Produce LOH directive if possible.
394 if (Info
.IsCandidate
) {
397 LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpAdd:\n"
398 << '\t' << MI
<< '\t' << *Info
.MI0
);
399 AFI
.addLOHDirective(MCLOH_AdrpAdd
, {&MI
, Info
.MI0
});
400 ++NumADRSimpleCandidate
;
403 if (supportLoadFromLiteral(*Info
.MI0
)) {
404 LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpLdr:\n"
405 << '\t' << MI
<< '\t' << *Info
.MI0
);
406 AFI
.addLOHDirective(MCLOH_AdrpLdr
, {&MI
, Info
.MI0
});
410 case MCLOH_AdrpAddLdr
:
411 LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpAddLdr:\n"
412 << '\t' << MI
<< '\t' << *Info
.MI1
<< '\t'
414 AFI
.addLOHDirective(MCLOH_AdrpAddLdr
, {&MI
, Info
.MI1
, Info
.MI0
});
417 case MCLOH_AdrpAddStr
:
418 if (Info
.MI1
!= nullptr) {
419 LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpAddStr:\n"
420 << '\t' << MI
<< '\t' << *Info
.MI1
<< '\t'
422 AFI
.addLOHDirective(MCLOH_AdrpAddStr
, {&MI
, Info
.MI1
, Info
.MI0
});
426 case MCLOH_AdrpLdrGotLdr
:
427 LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGotLdr:\n"
428 << '\t' << MI
<< '\t' << *Info
.MI1
<< '\t'
430 AFI
.addLOHDirective(MCLOH_AdrpLdrGotLdr
, {&MI
, Info
.MI1
, Info
.MI0
});
433 case MCLOH_AdrpLdrGotStr
:
434 LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGotStr:\n"
435 << '\t' << MI
<< '\t' << *Info
.MI1
<< '\t'
437 AFI
.addLOHDirective(MCLOH_AdrpLdrGotStr
, {&MI
, Info
.MI1
, Info
.MI0
});
440 case MCLOH_AdrpLdrGot
:
441 LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGot:\n"
442 << '\t' << MI
<< '\t' << *Info
.MI0
);
443 AFI
.addLOHDirective(MCLOH_AdrpLdrGot
, {&MI
, Info
.MI0
});
446 llvm_unreachable("MCLOH_AdrpAdrp not used in state machine");
454 static void handleRegMaskClobber(const uint32_t *RegMask
, MCPhysReg Reg
,
456 if (!MachineOperand::clobbersPhysReg(RegMask
, Reg
))
458 int Idx
= mapRegToGPRIndex(Reg
);
460 handleClobber(LOHInfos
[Idx
]);
463 static void handleNormalInst(const MachineInstr
&MI
, LOHInfo
*LOHInfos
) {
464 // Handle defs and regmasks.
465 for (const MachineOperand
&MO
: MI
.operands()) {
466 if (MO
.isRegMask()) {
467 const uint32_t *RegMask
= MO
.getRegMask();
468 for (MCPhysReg Reg
: AArch64::GPR32RegClass
)
469 handleRegMaskClobber(RegMask
, Reg
, LOHInfos
);
470 for (MCPhysReg Reg
: AArch64::GPR64RegClass
)
471 handleRegMaskClobber(RegMask
, Reg
, LOHInfos
);
474 if (!MO
.isReg() || !MO
.isDef())
476 int Idx
= mapRegToGPRIndex(MO
.getReg());
479 handleClobber(LOHInfos
[Idx
]);
483 SmallSet
<int, 4> UsesSeen
;
484 for (const MachineOperand
&MO
: MI
.uses()) {
485 if (!MO
.isReg() || !MO
.readsReg())
487 int Idx
= mapRegToGPRIndex(MO
.getReg());
491 // Multiple uses of the same register within a single instruction don't
492 // count as MultiUser or block optimization. This is especially important on
493 // arm64_32, where any memory operation is likely to be an explicit use of
494 // xN and an implicit use of wN (the base address register).
495 if (!UsesSeen
.count(Idx
)) {
496 handleUse(MI
, MO
, LOHInfos
[Idx
]);
497 UsesSeen
.insert(Idx
);
502 bool AArch64CollectLOH::runOnMachineFunction(MachineFunction
&MF
) {
503 if (skipFunction(MF
.getFunction()))
506 LLVM_DEBUG(dbgs() << "********** AArch64 Collect LOH **********\n"
507 << "Looking in function " << MF
.getName() << '\n');
509 LOHInfo LOHInfos
[N_GPR_REGS
];
510 AArch64FunctionInfo
&AFI
= *MF
.getInfo
<AArch64FunctionInfo
>();
511 for (const MachineBasicBlock
&MBB
: MF
) {
512 // Reset register tracking state.
513 memset(LOHInfos
, 0, sizeof(LOHInfos
));
514 // Live-out registers are used.
515 for (const MachineBasicBlock
*Succ
: MBB
.successors()) {
516 for (const auto &LI
: Succ
->liveins()) {
517 int RegIdx
= mapRegToGPRIndex(LI
.PhysReg
);
519 LOHInfos
[RegIdx
].OneUser
= true;
523 // Walk the basic block backwards and update the per register state machine
525 for (const MachineInstr
&MI
: make_range(MBB
.rbegin(), MBB
.rend())) {
526 unsigned Opcode
= MI
.getOpcode();
528 case AArch64::ADDXri
:
529 case AArch64::LDRXui
:
530 case AArch64::LDRWui
:
531 if (canDefBePartOfLOH(MI
)) {
532 const MachineOperand
&Def
= MI
.getOperand(0);
533 const MachineOperand
&Op
= MI
.getOperand(1);
534 assert(Def
.isReg() && Def
.isDef() && "Expected reg def");
535 assert(Op
.isReg() && Op
.isUse() && "Expected reg use");
536 int DefIdx
= mapRegToGPRIndex(Def
.getReg());
537 int OpIdx
= mapRegToGPRIndex(Op
.getReg());
538 if (DefIdx
>= 0 && OpIdx
>= 0 &&
539 handleMiddleInst(MI
, LOHInfos
[DefIdx
], LOHInfos
[OpIdx
]))
544 const MachineOperand
&Op0
= MI
.getOperand(0);
545 int Idx
= mapRegToGPRIndex(Op0
.getReg());
547 handleADRP(MI
, AFI
, LOHInfos
[Idx
]);
552 handleNormalInst(MI
, LOHInfos
);
556 // Return "no change": The pass only collects information.
560 FunctionPass
*llvm::createAArch64CollectLOHPass() {
561 return new AArch64CollectLOH();