1 //===- ARM64.cpp ----------------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "Arch/ARM64Common.h"
10 #include "InputFiles.h"
12 #include "SyntheticSections.h"
15 #include "lld/Common/ErrorHandler.h"
16 #include "mach-o/compact_unwind_encoding.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/BinaryFormat/MachO.h"
20 #include "llvm/Support/Endian.h"
21 #include "llvm/Support/LEB128.h"
22 #include "llvm/Support/MathExtras.h"
25 using namespace llvm::MachO
;
26 using namespace llvm::support::endian
;
28 using namespace lld::macho
;
32 struct ARM64
: ARM64Common
{
34 void writeStub(uint8_t *buf
, const Symbol
&, uint64_t) const override
;
35 void writeStubHelperHeader(uint8_t *buf
) const override
;
36 void writeStubHelperEntry(uint8_t *buf
, const Symbol
&,
37 uint64_t entryAddr
) const override
;
39 void writeObjCMsgSendStub(uint8_t *buf
, Symbol
*sym
, uint64_t stubsAddr
,
40 uint64_t &stubOffset
, uint64_t selrefVA
,
41 Symbol
*objcMsgSend
) const override
;
42 void populateThunk(InputSection
*thunk
, Symbol
*funcSym
) override
;
43 void applyOptimizationHints(uint8_t *, const ObjFile
&) const override
;
45 void initICFSafeThunkBody(InputSection
*thunk
,
46 InputSection
*branchTarget
) const override
;
47 InputSection
*getThunkBranchTarget(InputSection
*thunk
) const override
;
48 uint32_t getICFSafeThunkSize() const override
;
53 // Random notes on reloc types:
54 // ADDEND always pairs with BRANCH26, PAGE21, or PAGEOFF12
55 // POINTER_TO_GOT: ld64 supports a 4-byte pc-relative form as well as an 8-byte
56 // absolute version of this relocation. The semantics of the absolute relocation
57 // are weird -- it results in the value of the GOT slot being written, instead
58 // of the address. Let's not support it unless we find a real-world use case.
59 static constexpr std::array
<RelocAttrs
, 11> relocAttrsArray
{{
60 #define B(x) RelocAttrBits::x
62 B(UNSIGNED
) | B(ABSOLUTE
) | B(EXTERN
) | B(LOCAL
) | B(BYTE4
) | B(BYTE8
)},
63 {"SUBTRACTOR", B(SUBTRAHEND
) | B(EXTERN
) | B(BYTE4
) | B(BYTE8
)},
64 {"BRANCH26", B(PCREL
) | B(EXTERN
) | B(BRANCH
) | B(BYTE4
)},
65 {"PAGE21", B(PCREL
) | B(EXTERN
) | B(BYTE4
)},
66 {"PAGEOFF12", B(ABSOLUTE
) | B(EXTERN
) | B(BYTE4
)},
67 {"GOT_LOAD_PAGE21", B(PCREL
) | B(EXTERN
) | B(GOT
) | B(BYTE4
)},
68 {"GOT_LOAD_PAGEOFF12",
69 B(ABSOLUTE
) | B(EXTERN
) | B(GOT
) | B(LOAD
) | B(BYTE4
)},
70 {"POINTER_TO_GOT", B(PCREL
) | B(EXTERN
) | B(GOT
) | B(POINTER
) | B(BYTE4
)},
71 {"TLVP_LOAD_PAGE21", B(PCREL
) | B(EXTERN
) | B(TLV
) | B(BYTE4
)},
72 {"TLVP_LOAD_PAGEOFF12",
73 B(ABSOLUTE
) | B(EXTERN
) | B(TLV
) | B(LOAD
) | B(BYTE4
)},
74 {"ADDEND", B(ADDEND
)},
78 static constexpr uint32_t stubCode
[] = {
79 0x90000010, // 00: adrp x16, __la_symbol_ptr@page
80 0xf9400210, // 04: ldr x16, [x16, __la_symbol_ptr@pageoff]
81 0xd61f0200, // 08: br x16
84 void ARM64::writeStub(uint8_t *buf8
, const Symbol
&sym
,
85 uint64_t pointerVA
) const {
86 ::writeStub(buf8
, stubCode
, sym
, pointerVA
);
89 static constexpr uint32_t stubHelperHeaderCode
[] = {
90 0x90000011, // 00: adrp x17, _dyld_private@page
91 0x91000231, // 04: add x17, x17, _dyld_private@pageoff
92 0xa9bf47f0, // 08: stp x16/x17, [sp, #-16]!
93 0x90000010, // 0c: adrp x16, dyld_stub_binder@page
94 0xf9400210, // 10: ldr x16, [x16, dyld_stub_binder@pageoff]
95 0xd61f0200, // 14: br x16
98 void ARM64::writeStubHelperHeader(uint8_t *buf8
) const {
99 ::writeStubHelperHeader
<LP64
>(buf8
, stubHelperHeaderCode
);
102 static constexpr uint32_t stubHelperEntryCode
[] = {
103 0x18000050, // 00: ldr w16, l0
104 0x14000000, // 04: b stubHelperHeader
105 0x00000000, // 08: l0: .long 0
108 void ARM64::writeStubHelperEntry(uint8_t *buf8
, const Symbol
&sym
,
109 uint64_t entryVA
) const {
110 ::writeStubHelperEntry(buf8
, stubHelperEntryCode
, sym
, entryVA
);
113 static constexpr uint32_t objcStubsFastCode
[] = {
114 0x90000001, // adrp x1, __objc_selrefs@page
115 0xf9400021, // ldr x1, [x1, @selector("foo")@pageoff]
116 0x90000010, // adrp x16, _got@page
117 0xf9400210, // ldr x16, [x16, _objc_msgSend@pageoff]
118 0xd61f0200, // br x16
119 0xd4200020, // brk #0x1
120 0xd4200020, // brk #0x1
121 0xd4200020, // brk #0x1
124 static constexpr uint32_t objcStubsSmallCode
[] = {
125 0x90000001, // adrp x1, __objc_selrefs@page
126 0xf9400021, // ldr x1, [x1, @selector("foo")@pageoff]
127 0x14000000, // b _objc_msgSend
130 void ARM64::writeObjCMsgSendStub(uint8_t *buf
, Symbol
*sym
, uint64_t stubsAddr
,
131 uint64_t &stubOffset
, uint64_t selrefVA
,
132 Symbol
*objcMsgSend
) const {
133 uint64_t objcMsgSendAddr
;
134 uint64_t objcStubSize
;
135 uint64_t objcMsgSendIndex
;
137 if (config
->objcStubsMode
== ObjCStubsMode::fast
) {
138 objcStubSize
= target
->objcStubsFastSize
;
139 objcMsgSendAddr
= in
.got
->addr
;
140 objcMsgSendIndex
= objcMsgSend
->gotIndex
;
141 ::writeObjCMsgSendFastStub
<LP64
>(buf
, objcStubsFastCode
, sym
, stubsAddr
,
142 stubOffset
, selrefVA
, objcMsgSendAddr
,
145 assert(config
->objcStubsMode
== ObjCStubsMode::small
);
146 objcStubSize
= target
->objcStubsSmallSize
;
147 if (auto *d
= dyn_cast
<Defined
>(objcMsgSend
)) {
148 objcMsgSendAddr
= d
->getVA();
149 objcMsgSendIndex
= 0;
151 objcMsgSendAddr
= in
.stubs
->addr
;
152 objcMsgSendIndex
= objcMsgSend
->stubsIndex
;
154 ::writeObjCMsgSendSmallStub
<LP64
>(buf
, objcStubsSmallCode
, sym
, stubsAddr
,
155 stubOffset
, selrefVA
, objcMsgSendAddr
,
158 stubOffset
+= objcStubSize
;
161 // A thunk is the relaxed variation of stubCode. We don't need the
162 // extra indirection through a lazy pointer because the target address
163 // is known at link time.
164 static constexpr uint32_t thunkCode
[] = {
165 0x90000010, // 00: adrp x16, <thunk.ptr>@page
166 0x91000210, // 04: add x16, [x16,<thunk.ptr>@pageoff]
167 0xd61f0200, // 08: br x16
170 void ARM64::populateThunk(InputSection
*thunk
, Symbol
*funcSym
) {
172 thunk
->data
= {reinterpret_cast<const uint8_t *>(thunkCode
),
174 thunk
->relocs
.emplace_back(/*type=*/ARM64_RELOC_PAGEOFF12
,
175 /*pcrel=*/false, /*length=*/2,
176 /*offset=*/4, /*addend=*/0,
177 /*referent=*/funcSym
);
178 thunk
->relocs
.emplace_back(/*type=*/ARM64_RELOC_PAGE21
,
179 /*pcrel=*/true, /*length=*/2,
180 /*offset=*/0, /*addend=*/0,
181 /*referent=*/funcSym
);
183 // Just a single direct branch to the target function.
184 static constexpr uint32_t icfSafeThunkCode
[] = {
185 0x14000000, // 08: b target
188 void ARM64::initICFSafeThunkBody(InputSection
*thunk
,
189 InputSection
*branchTarget
) const {
190 // The base data here will not be itself modified, we'll just be adding a
191 // reloc below. So we can directly use the constexpr above as the data.
192 thunk
->data
= {reinterpret_cast<const uint8_t *>(icfSafeThunkCode
),
193 sizeof(icfSafeThunkCode
)};
195 thunk
->relocs
.emplace_back(/*type=*/ARM64_RELOC_BRANCH26
,
196 /*pcrel=*/true, /*length=*/2,
197 /*offset=*/0, /*addend=*/0,
198 /*referent=*/branchTarget
);
201 InputSection
*ARM64::getThunkBranchTarget(InputSection
*thunk
) const {
202 assert(thunk
->relocs
.size() == 1 &&
203 "expected a single reloc on ARM64 ICF thunk");
204 auto &reloc
= thunk
->relocs
[0];
205 assert(reloc
.referent
.is
<InputSection
*>() &&
206 "ARM64 thunk reloc is expected to point to an InputSection");
208 return reloc
.referent
.dyn_cast
<InputSection
*>();
211 uint32_t ARM64::getICFSafeThunkSize() const { return sizeof(icfSafeThunkCode
); }
213 ARM64::ARM64() : ARM64Common(LP64()) {
214 cpuType
= CPU_TYPE_ARM64
;
215 cpuSubtype
= CPU_SUBTYPE_ARM64_ALL
;
217 stubSize
= sizeof(stubCode
);
218 thunkSize
= sizeof(thunkCode
);
220 objcStubsFastSize
= sizeof(objcStubsFastCode
);
221 objcStubsFastAlignment
= 32;
222 objcStubsSmallSize
= sizeof(objcStubsSmallCode
);
223 objcStubsSmallAlignment
= 4;
225 // Branch immediate is two's complement 26 bits, which is implicitly
226 // multiplied by 4 (since all functions are 4-aligned: The branch range
227 // is -4*(2**(26-1))..4*(2**(26-1) - 1).
228 backwardBranchRange
= 128 * 1024 * 1024;
229 forwardBranchRange
= backwardBranchRange
- 4;
231 modeDwarfEncoding
= UNWIND_ARM64_MODE_DWARF
;
232 subtractorRelocType
= ARM64_RELOC_SUBTRACTOR
;
233 unsignedRelocType
= ARM64_RELOC_UNSIGNED
;
235 stubHelperHeaderSize
= sizeof(stubHelperHeaderCode
);
236 stubHelperEntrySize
= sizeof(stubHelperEntryCode
);
238 relocAttrs
= {relocAttrsArray
.data(), relocAttrsArray
.size()};
243 uint32_t destRegister
;
248 uint8_t destRegister
;
253 enum ExtendType
{ ZeroExtend
= 1, Sign64
= 2, Sign32
= 3 };
256 uint8_t destRegister
;
257 uint8_t baseRegister
;
260 ExtendType extendType
;
265 static bool parseAdrp(uint32_t insn
, Adrp
&adrp
) {
266 if ((insn
& 0x9f000000) != 0x90000000)
268 adrp
.destRegister
= insn
& 0x1f;
269 uint64_t immHi
= (insn
>> 5) & 0x7ffff;
270 uint64_t immLo
= (insn
>> 29) & 0x3;
271 adrp
.addend
= SignExtend64
<21>(immLo
| (immHi
<< 2)) * 4096;
275 static bool parseAdd(uint32_t insn
, Add
&add
) {
276 if ((insn
& 0xffc00000) != 0x91000000)
278 add
.destRegister
= insn
& 0x1f;
279 add
.srcRegister
= (insn
>> 5) & 0x1f;
280 add
.addend
= (insn
>> 10) & 0xfff;
284 static bool parseLdr(uint32_t insn
, Ldr
&ldr
) {
285 ldr
.destRegister
= insn
& 0x1f;
286 ldr
.baseRegister
= (insn
>> 5) & 0x1f;
287 uint8_t size
= insn
>> 30;
288 uint8_t opc
= (insn
>> 22) & 3;
290 if ((insn
& 0x3fc00000) == 0x39400000) {
291 // LDR (immediate), LDRB (immediate), LDRH (immediate)
293 ldr
.extendType
= ZeroExtend
;
295 } else if ((insn
& 0x3f800000) == 0x39800000) {
296 // LDRSB (immediate), LDRSH (immediate), LDRSW (immediate)
298 ldr
.extendType
= static_cast<ExtendType
>(opc
);
300 } else if ((insn
& 0x3f400000) == 0x3d400000) {
301 // LDR (immediate, SIMD&FP)
302 ldr
.extendType
= ZeroExtend
;
306 else if (size
== 0 && opc
== 3)
313 ldr
.offset
= ((insn
>> 10) & 0xfff) << ldr
.p2Size
;
317 static bool isValidAdrOffset(int32_t delta
) { return isInt
<21>(delta
); }
319 static void writeAdr(void *loc
, uint32_t dest
, int32_t delta
) {
320 assert(isValidAdrOffset(delta
));
321 uint32_t opcode
= 0x10000000;
322 uint32_t immHi
= (delta
& 0x001ffffc) << 3;
323 uint32_t immLo
= (delta
& 0x00000003) << 29;
324 write32le(loc
, opcode
| immHi
| immLo
| dest
);
327 static void writeNop(void *loc
) { write32le(loc
, 0xd503201f); }
329 static bool isLiteralLdrEligible(const Ldr
&ldr
) {
330 return ldr
.p2Size
> 1 && isShiftedInt
<19, 2>(ldr
.offset
);
333 static void writeLiteralLdr(void *loc
, const Ldr
&ldr
) {
334 assert(isLiteralLdrEligible(ldr
));
335 uint32_t imm19
= (ldr
.offset
/ 4 & maskTrailingOnes
<uint32_t>(19)) << 5;
337 switch (ldr
.p2Size
) {
342 opcode
= ldr
.extendType
== Sign64
? 0x98000000 : 0x18000000;
345 opcode
= ldr
.isFloat
? 0x5c000000 : 0x58000000;
351 llvm_unreachable("Invalid literal ldr size");
353 write32le(loc
, opcode
| imm19
| ldr
.destRegister
);
356 static bool isImmediateLdrEligible(const Ldr
&ldr
) {
357 // Note: We deviate from ld64's behavior, which converts to immediate loads
358 // only if ldr.offset < 4096, even though the offset is divided by the load's
359 // size in the 12-bit immediate operand. Only the unsigned offset variant is
362 uint32_t size
= 1 << ldr
.p2Size
;
363 return ldr
.offset
>= 0 && (ldr
.offset
% size
) == 0 &&
364 isUInt
<12>(ldr
.offset
>> ldr
.p2Size
);
367 static void writeImmediateLdr(void *loc
, const Ldr
&ldr
) {
368 assert(isImmediateLdrEligible(ldr
));
369 uint32_t opcode
= 0x39000000;
371 opcode
|= 0x04000000;
372 assert(ldr
.extendType
== ZeroExtend
);
374 opcode
|= ldr
.destRegister
;
375 opcode
|= ldr
.baseRegister
<< 5;
377 if (ldr
.p2Size
== 4) {
381 opc
= ldr
.extendType
;
384 uint32_t immBits
= ldr
.offset
>> ldr
.p2Size
;
385 write32le(loc
, opcode
| (immBits
<< 10) | (opc
<< 22) | (size
<< 30));
388 // Transforms a pair of adrp+add instructions into an adr instruction if the
389 // target is within the +/- 1 MiB range allowed by the adr's 21 bit signed
392 // adrp xN, _foo@PAGE
393 // add xM, xN, _foo@PAGEOFF
397 static void applyAdrpAdd(uint8_t *buf
, const ConcatInputSection
*isec
,
398 uint64_t offset1
, uint64_t offset2
) {
399 uint32_t ins1
= read32le(buf
+ offset1
);
400 uint32_t ins2
= read32le(buf
+ offset2
);
403 if (!parseAdrp(ins1
, adrp
) || !parseAdd(ins2
, add
))
405 if (adrp
.destRegister
!= add
.srcRegister
)
408 uint64_t addr1
= isec
->getVA() + offset1
;
409 uint64_t referent
= pageBits(addr1
) + adrp
.addend
+ add
.addend
;
410 int64_t delta
= referent
- addr1
;
411 if (!isValidAdrOffset(delta
))
414 writeAdr(buf
+ offset1
, add
.destRegister
, delta
);
415 writeNop(buf
+ offset2
);
418 // Transforms two adrp instructions into a single adrp if their referent
419 // addresses are located on the same 4096 byte page.
421 // adrp xN, _foo@PAGE
422 // adrp xN, _bar@PAGE
424 // adrp xN, _foo@PAGE
426 static void applyAdrpAdrp(uint8_t *buf
, const ConcatInputSection
*isec
,
427 uint64_t offset1
, uint64_t offset2
) {
428 uint32_t ins1
= read32le(buf
+ offset1
);
429 uint32_t ins2
= read32le(buf
+ offset2
);
431 if (!parseAdrp(ins1
, adrp1
) || !parseAdrp(ins2
, adrp2
))
433 if (adrp1
.destRegister
!= adrp2
.destRegister
)
436 uint64_t page1
= pageBits(offset1
+ isec
->getVA()) + adrp1
.addend
;
437 uint64_t page2
= pageBits(offset2
+ isec
->getVA()) + adrp2
.addend
;
441 writeNop(buf
+ offset2
);
444 // Transforms a pair of adrp+ldr (immediate) instructions into an ldr (literal)
445 // load from a PC-relative address if it is 4-byte aligned and within +/- 1 MiB,
446 // as ldr can encode a signed 19-bit offset that gets multiplied by 4.
448 // adrp xN, _foo@PAGE
449 // ldr xM, [xN, _foo@PAGEOFF]
453 static void applyAdrpLdr(uint8_t *buf
, const ConcatInputSection
*isec
,
454 uint64_t offset1
, uint64_t offset2
) {
455 uint32_t ins1
= read32le(buf
+ offset1
);
456 uint32_t ins2
= read32le(buf
+ offset2
);
459 if (!parseAdrp(ins1
, adrp
) || !parseLdr(ins2
, ldr
))
461 if (adrp
.destRegister
!= ldr
.baseRegister
)
464 uint64_t addr1
= isec
->getVA() + offset1
;
465 uint64_t addr2
= isec
->getVA() + offset2
;
466 uint64_t referent
= pageBits(addr1
) + adrp
.addend
+ ldr
.offset
;
467 ldr
.offset
= referent
- addr2
;
468 if (!isLiteralLdrEligible(ldr
))
471 writeNop(buf
+ offset1
);
472 writeLiteralLdr(buf
+ offset2
, ldr
);
475 // GOT loads are emitted by the compiler as a pair of adrp and ldr instructions,
476 // but they may be changed to adrp+add by relaxGotLoad(). This hint performs
477 // the AdrpLdr or AdrpAdd transformation depending on whether it was relaxed.
478 static void applyAdrpLdrGot(uint8_t *buf
, const ConcatInputSection
*isec
,
479 uint64_t offset1
, uint64_t offset2
) {
480 uint32_t ins2
= read32le(buf
+ offset2
);
483 if (parseAdd(ins2
, add
))
484 applyAdrpAdd(buf
, isec
, offset1
, offset2
);
485 else if (parseLdr(ins2
, ldr
))
486 applyAdrpLdr(buf
, isec
, offset1
, offset2
);
489 // Optimizes an adrp+add+ldr sequence used for loading from a local symbol's
490 // address by loading directly if it's close enough, or to an adrp(p)+ldr
491 // sequence if it's not.
493 // adrp x0, _foo@PAGE
494 // add x1, x0, _foo@PAGEOFF
495 // ldr x2, [x1, #off]
496 static void applyAdrpAddLdr(uint8_t *buf
, const ConcatInputSection
*isec
,
497 uint64_t offset1
, uint64_t offset2
,
499 uint32_t ins1
= read32le(buf
+ offset1
);
501 if (!parseAdrp(ins1
, adrp
))
503 uint32_t ins2
= read32le(buf
+ offset2
);
505 if (!parseAdd(ins2
, add
))
507 uint32_t ins3
= read32le(buf
+ offset3
);
509 if (!parseLdr(ins3
, ldr
))
511 if (adrp
.destRegister
!= add
.srcRegister
)
513 if (add
.destRegister
!= ldr
.baseRegister
)
516 // Load from the target address directly.
519 // ldr x2, [_foo + #off]
520 uint64_t addr1
= isec
->getVA() + offset1
;
521 uint64_t addr3
= isec
->getVA() + offset3
;
522 uint64_t referent
= pageBits(addr1
) + adrp
.addend
+ add
.addend
;
523 Ldr literalLdr
= ldr
;
524 literalLdr
.offset
+= referent
- addr3
;
525 if (isLiteralLdrEligible(literalLdr
)) {
526 writeNop(buf
+ offset1
);
527 writeNop(buf
+ offset2
);
528 writeLiteralLdr(buf
+ offset3
, literalLdr
);
532 // Load the target address into a register and load from there indirectly.
535 // ldr x2, [x1, #off]
536 int64_t adrOffset
= referent
- addr1
;
537 if (isValidAdrOffset(adrOffset
)) {
538 writeAdr(buf
+ offset1
, ldr
.baseRegister
, adrOffset
);
539 // Note: ld64 moves the offset into the adr instruction for AdrpAddLdr, but
540 // not for AdrpLdrGotLdr. Its effect is the same either way.
541 writeNop(buf
+ offset2
);
545 // Move the target's page offset into the ldr's immediate offset.
546 // adrp x0, _foo@PAGE
548 // ldr x2, [x0, _foo@PAGEOFF + #off]
549 Ldr immediateLdr
= ldr
;
550 immediateLdr
.baseRegister
= adrp
.destRegister
;
551 immediateLdr
.offset
+= add
.addend
;
552 if (isImmediateLdrEligible(immediateLdr
)) {
553 writeNop(buf
+ offset2
);
554 writeImmediateLdr(buf
+ offset3
, immediateLdr
);
559 // Relaxes a GOT-indirect load.
560 // If the referenced symbol is external and its GOT entry is within +/- 1 MiB,
561 // the GOT entry can be loaded with a single literal ldr instruction.
562 // If the referenced symbol is local and thus has been relaxed to adrp+add+ldr,
563 // we perform the AdrpAddLdr transformation.
564 static void applyAdrpLdrGotLdr(uint8_t *buf
, const ConcatInputSection
*isec
,
565 uint64_t offset1
, uint64_t offset2
,
567 uint32_t ins2
= read32le(buf
+ offset2
);
571 if (parseAdd(ins2
, add
)) {
572 applyAdrpAddLdr(buf
, isec
, offset1
, offset2
, offset3
);
573 } else if (parseLdr(ins2
, ldr2
)) {
574 // adrp x1, _foo@GOTPAGE
575 // ldr x2, [x1, _foo@GOTPAGEOFF]
576 // ldr x3, [x2, #off]
578 uint32_t ins1
= read32le(buf
+ offset1
);
580 if (!parseAdrp(ins1
, adrp
))
582 uint32_t ins3
= read32le(buf
+ offset3
);
584 if (!parseLdr(ins3
, ldr3
))
587 if (ldr2
.baseRegister
!= adrp
.destRegister
)
589 if (ldr3
.baseRegister
!= ldr2
.destRegister
)
591 // Loads from the GOT must be pointer sized.
592 if (ldr2
.p2Size
!= 3 || ldr2
.isFloat
)
595 uint64_t addr1
= isec
->getVA() + offset1
;
596 uint64_t addr2
= isec
->getVA() + offset2
;
597 uint64_t referent
= pageBits(addr1
) + adrp
.addend
+ ldr2
.offset
;
598 // Load the GOT entry's address directly.
600 // ldr x2, _foo@GOTPAGE + _foo@GOTPAGEOFF
601 // ldr x3, [x2, #off]
602 Ldr literalLdr
= ldr2
;
603 literalLdr
.offset
= referent
- addr2
;
604 if (isLiteralLdrEligible(literalLdr
)) {
605 writeNop(buf
+ offset1
);
606 writeLiteralLdr(buf
+ offset2
, literalLdr
);
611 static uint64_t readValue(const uint8_t *&ptr
, const uint8_t *end
) {
613 uint64_t value
= decodeULEB128(ptr
, &n
, end
);
618 template <typename Callback
>
619 static void forEachHint(ArrayRef
<uint8_t> data
, Callback callback
) {
620 std::array
<uint64_t, 3> args
;
622 for (const uint8_t *p
= data
.begin(), *end
= data
.end(); p
< end
;) {
623 uint64_t type
= readValue(p
, end
);
627 uint64_t argCount
= readValue(p
, end
);
628 // All known LOH types as of 2022-09 have 3 or fewer arguments; skip others.
630 for (unsigned i
= 0; i
< argCount
; ++i
)
635 for (unsigned i
= 0; i
< argCount
; ++i
)
636 args
[i
] = readValue(p
, end
);
637 callback(type
, ArrayRef
<uint64_t>(args
.data(), argCount
));
641 // On RISC architectures like arm64, materializing a memory address generally
642 // takes multiple instructions. If the referenced symbol is located close enough
643 // in memory, fewer instructions are needed.
645 // Linker optimization hints record where addresses are computed. After
646 // addresses have been assigned, if possible, we change them to a shorter
647 // sequence of instructions. The size of the binary is not modified; the
648 // eliminated instructions are replaced with NOPs. This still leads to faster
649 // code as the CPU can skip over NOPs quickly.
651 // LOHs are specified by the LC_LINKER_OPTIMIZATION_HINTS load command, which
652 // points to a sequence of ULEB128-encoded numbers. Each entry specifies a
653 // transformation kind, and 2 or 3 addresses where the instructions are located.
654 void ARM64::applyOptimizationHints(uint8_t *outBuf
, const ObjFile
&obj
) const {
655 ArrayRef
<uint8_t> data
= obj
.getOptimizationHints();
659 const ConcatInputSection
*section
= nullptr;
660 uint64_t sectionAddr
= 0;
661 uint8_t *buf
= nullptr;
663 auto findSection
= [&](uint64_t addr
) {
664 if (section
&& addr
>= sectionAddr
&&
665 addr
< sectionAddr
+ section
->getSize())
668 if (obj
.sections
.empty())
670 auto secIt
= std::prev(llvm::upper_bound(
672 [](uint64_t off
, const Section
*sec
) { return off
< sec
->addr
; }));
673 const Section
*sec
= *secIt
;
675 if (sec
->subsections
.empty())
677 auto subsecIt
= std::prev(llvm::upper_bound(
678 sec
->subsections
, addr
- sec
->addr
,
679 [](uint64_t off
, Subsection subsec
) { return off
< subsec
.offset
; }));
680 const Subsection
&subsec
= *subsecIt
;
681 const ConcatInputSection
*isec
=
682 dyn_cast_or_null
<ConcatInputSection
>(subsec
.isec
);
683 if (!isec
|| isec
->shouldOmitFromOutput())
687 sectionAddr
= subsec
.offset
+ sec
->addr
;
688 buf
= outBuf
+ section
->outSecOff
+ section
->parent
->fileOff
;
692 auto isValidOffset
= [&](uint64_t offset
) {
693 if (offset
< sectionAddr
|| offset
>= sectionAddr
+ section
->getSize()) {
694 error(toString(&obj
) +
695 ": linker optimization hint spans multiple sections");
701 bool hasAdrpAdrp
= false;
702 forEachHint(data
, [&](uint64_t kind
, ArrayRef
<uint64_t> args
) {
703 if (kind
== LOH_ARM64_ADRP_ADRP
) {
708 if (!findSection(args
[0]))
711 case LOH_ARM64_ADRP_ADD
:
712 if (isValidOffset(args
[1]))
713 applyAdrpAdd(buf
, section
, args
[0] - sectionAddr
,
714 args
[1] - sectionAddr
);
716 case LOH_ARM64_ADRP_LDR
:
717 if (isValidOffset(args
[1]))
718 applyAdrpLdr(buf
, section
, args
[0] - sectionAddr
,
719 args
[1] - sectionAddr
);
721 case LOH_ARM64_ADRP_LDR_GOT
:
722 if (isValidOffset(args
[1]))
723 applyAdrpLdrGot(buf
, section
, args
[0] - sectionAddr
,
724 args
[1] - sectionAddr
);
726 case LOH_ARM64_ADRP_ADD_LDR
:
727 if (isValidOffset(args
[1]) && isValidOffset(args
[2]))
728 applyAdrpAddLdr(buf
, section
, args
[0] - sectionAddr
,
729 args
[1] - sectionAddr
, args
[2] - sectionAddr
);
731 case LOH_ARM64_ADRP_LDR_GOT_LDR
:
732 if (isValidOffset(args
[1]) && isValidOffset(args
[2]))
733 applyAdrpLdrGotLdr(buf
, section
, args
[0] - sectionAddr
,
734 args
[1] - sectionAddr
, args
[2] - sectionAddr
);
736 case LOH_ARM64_ADRP_ADD_STR
:
737 case LOH_ARM64_ADRP_LDR_GOT_STR
:
738 // TODO: Implement these
746 // AdrpAdrp optimization hints are performed in a second pass because they
747 // might interfere with other transformations. For instance, consider the
750 // adrp x0, _foo@PAGE
751 // add x1, x0, _foo@PAGEOFF
752 // adrp x0, _bar@PAGE
753 // add x2, x0, _bar@PAGEOFF
755 // If we perform the AdrpAdrp relaxation first, we get:
757 // adrp x0, _foo@PAGE
758 // add x1, x0, _foo@PAGEOFF
760 // add x2, x0, _bar@PAGEOFF
762 // If we then apply AdrpAdd to the first two instructions, the add will have a
763 // garbage value in x0:
768 // add x2, x0, _bar@PAGEOFF
769 forEachHint(data
, [&](uint64_t kind
, ArrayRef
<uint64_t> args
) {
770 if (kind
!= LOH_ARM64_ADRP_ADRP
)
772 if (!findSection(args
[0]))
774 if (isValidOffset(args
[1]))
775 applyAdrpAdrp(buf
, section
, args
[0] - sectionAddr
, args
[1] - sectionAddr
);
779 TargetInfo
*macho::createARM64TargetInfo() {