1 //===- AArch64.cpp --------------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "InputFiles.h"
10 #include "OutputSections.h"
12 #include "SyntheticSections.h"
14 #include "lld/Common/ErrorHandler.h"
15 #include "llvm/BinaryFormat/ELF.h"
16 #include "llvm/Support/Endian.h"
19 using namespace llvm::support::endian
;
20 using namespace llvm::ELF
;
22 using namespace lld::elf
;
24 // Page(Expr) is the page address of the expression Expr, defined
25 // as (Expr & ~0xFFF). (This applies even if the machine page size
26 // supported by the platform has a different value.)
27 uint64_t elf::getAArch64Page(uint64_t expr
) {
28 return expr
& ~static_cast<uint64_t>(0xFFF);
31 // A BTI landing pad is a valid target for an indirect branch when the Branch
32 // Target Identification has been enabled. As linker generated branches are
33 // via x16 the BTI landing pads are defined as: BTI C, BTI J, BTI JC, PACIASP,
35 bool elf::isAArch64BTILandingPad(Ctx
&ctx
, Symbol
&s
, int64_t a
) {
36 // PLT entries accessed indirectly have a BTI c.
39 Defined
*d
= dyn_cast
<Defined
>(&s
);
40 if (!isa_and_nonnull
<InputSection
>(d
->section
))
41 // All places that we cannot disassemble are responsible for making
42 // the target a BTI landing pad.
44 InputSection
*isec
= cast
<InputSection
>(d
->section
);
45 uint64_t off
= d
->value
+ a
;
46 // Likely user error, but protect ourselves against out of bounds
48 if (off
>= isec
->getSize())
50 const uint8_t *buf
= isec
->content().begin();
51 const uint32_t instr
= read32le(buf
+ off
);
52 // All BTI instructions are HINT instructions which all have same encoding
53 // apart from bits [11:5]
54 if ((instr
& 0xd503201f) == 0xd503201f &&
55 is_contained({/*PACIASP*/ 0xd503233f, /*PACIBSP*/ 0xd503237f,
56 /*BTI C*/ 0xd503245f, /*BTI J*/ 0xd503249f,
57 /*BTI JC*/ 0xd50324df},
64 class AArch64
: public TargetInfo
{
67 RelExpr
getRelExpr(RelType type
, const Symbol
&s
,
68 const uint8_t *loc
) const override
;
69 RelType
getDynRel(RelType type
) const override
;
70 int64_t getImplicitAddend(const uint8_t *buf
, RelType type
) const override
;
71 void writeGotPlt(uint8_t *buf
, const Symbol
&s
) const override
;
72 void writeIgotPlt(uint8_t *buf
, const Symbol
&s
) const override
;
73 void writePltHeader(uint8_t *buf
) const override
;
74 void writePlt(uint8_t *buf
, const Symbol
&sym
,
75 uint64_t pltEntryAddr
) const override
;
76 bool needsThunk(RelExpr expr
, RelType type
, const InputFile
*file
,
77 uint64_t branchAddr
, const Symbol
&s
,
78 int64_t a
) const override
;
79 uint32_t getThunkSectionSpacing() const override
;
80 bool inBranchRange(RelType type
, uint64_t src
, uint64_t dst
) const override
;
81 bool usesOnlyLowPageBits(RelType type
) const override
;
82 void relocate(uint8_t *loc
, const Relocation
&rel
,
83 uint64_t val
) const override
;
84 RelExpr
adjustTlsExpr(RelType type
, RelExpr expr
) const override
;
85 void relocateAlloc(InputSectionBase
&sec
, uint8_t *buf
) const override
;
88 void relaxTlsGdToLe(uint8_t *loc
, const Relocation
&rel
, uint64_t val
) const;
89 void relaxTlsGdToIe(uint8_t *loc
, const Relocation
&rel
, uint64_t val
) const;
90 void relaxTlsIeToLe(uint8_t *loc
, const Relocation
&rel
, uint64_t val
) const;
93 struct AArch64Relaxer
{
95 bool safeToRelaxAdrpLdr
= false;
97 AArch64Relaxer(Ctx
&ctx
, ArrayRef
<Relocation
> relocs
);
98 bool tryRelaxAdrpAdd(const Relocation
&adrpRel
, const Relocation
&addRel
,
99 uint64_t secAddr
, uint8_t *buf
) const;
100 bool tryRelaxAdrpLdr(const Relocation
&adrpRel
, const Relocation
&ldrRel
,
101 uint64_t secAddr
, uint8_t *buf
) const;
105 // Return the bits [Start, End] from Val shifted Start bits.
106 // For instance, getBits(0xF0, 4, 8) returns 0xF.
107 static uint64_t getBits(uint64_t val
, int start
, int end
) {
108 uint64_t mask
= ((uint64_t)1 << (end
+ 1 - start
)) - 1;
109 return (val
>> start
) & mask
;
112 AArch64::AArch64(Ctx
&ctx
) : TargetInfo(ctx
) {
113 copyRel
= R_AARCH64_COPY
;
114 relativeRel
= R_AARCH64_RELATIVE
;
115 iRelativeRel
= R_AARCH64_IRELATIVE
;
116 gotRel
= R_AARCH64_GLOB_DAT
;
117 pltRel
= R_AARCH64_JUMP_SLOT
;
118 symbolicRel
= R_AARCH64_ABS64
;
119 tlsDescRel
= R_AARCH64_TLSDESC
;
120 tlsGotRel
= R_AARCH64_TLS_TPREL64
;
124 defaultMaxPageSize
= 65536;
126 // Align to the 2 MiB page size (known as a superpage or huge page).
127 // FreeBSD automatically promotes 2 MiB-aligned allocations.
128 defaultImageBase
= 0x200000;
133 RelExpr
AArch64::getRelExpr(RelType type
, const Symbol
&s
,
134 const uint8_t *loc
) const {
136 case R_AARCH64_ABS16
:
137 case R_AARCH64_ABS32
:
138 case R_AARCH64_ABS64
:
139 case R_AARCH64_ADD_ABS_LO12_NC
:
140 case R_AARCH64_LDST128_ABS_LO12_NC
:
141 case R_AARCH64_LDST16_ABS_LO12_NC
:
142 case R_AARCH64_LDST32_ABS_LO12_NC
:
143 case R_AARCH64_LDST64_ABS_LO12_NC
:
144 case R_AARCH64_LDST8_ABS_LO12_NC
:
145 case R_AARCH64_MOVW_SABS_G0
:
146 case R_AARCH64_MOVW_SABS_G1
:
147 case R_AARCH64_MOVW_SABS_G2
:
148 case R_AARCH64_MOVW_UABS_G0
:
149 case R_AARCH64_MOVW_UABS_G0_NC
:
150 case R_AARCH64_MOVW_UABS_G1
:
151 case R_AARCH64_MOVW_UABS_G1_NC
:
152 case R_AARCH64_MOVW_UABS_G2
:
153 case R_AARCH64_MOVW_UABS_G2_NC
:
154 case R_AARCH64_MOVW_UABS_G3
:
156 case R_AARCH64_AUTH_ABS64
:
157 return RE_AARCH64_AUTH
;
158 case R_AARCH64_TLSDESC_ADR_PAGE21
:
159 return RE_AARCH64_TLSDESC_PAGE
;
160 case R_AARCH64_TLSDESC_LD64_LO12
:
161 case R_AARCH64_TLSDESC_ADD_LO12
:
163 case R_AARCH64_TLSDESC_CALL
:
164 return R_TLSDESC_CALL
;
165 case R_AARCH64_TLSLE_ADD_TPREL_HI12
:
166 case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC
:
167 case R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC
:
168 case R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC
:
169 case R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC
:
170 case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC
:
171 case R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC
:
172 case R_AARCH64_TLSLE_MOVW_TPREL_G0
:
173 case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC
:
174 case R_AARCH64_TLSLE_MOVW_TPREL_G1
:
175 case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC
:
176 case R_AARCH64_TLSLE_MOVW_TPREL_G2
:
178 case R_AARCH64_CALL26
:
179 case R_AARCH64_CONDBR19
:
180 case R_AARCH64_JUMP26
:
181 case R_AARCH64_TSTBR14
:
183 case R_AARCH64_PLT32
:
184 const_cast<Symbol
&>(s
).thunkAccessed
= true;
186 case R_AARCH64_PREL16
:
187 case R_AARCH64_PREL32
:
188 case R_AARCH64_PREL64
:
189 case R_AARCH64_ADR_PREL_LO21
:
190 case R_AARCH64_LD_PREL_LO19
:
191 case R_AARCH64_MOVW_PREL_G0
:
192 case R_AARCH64_MOVW_PREL_G0_NC
:
193 case R_AARCH64_MOVW_PREL_G1
:
194 case R_AARCH64_MOVW_PREL_G1_NC
:
195 case R_AARCH64_MOVW_PREL_G2
:
196 case R_AARCH64_MOVW_PREL_G2_NC
:
197 case R_AARCH64_MOVW_PREL_G3
:
199 case R_AARCH64_ADR_PREL_PG_HI21
:
200 case R_AARCH64_ADR_PREL_PG_HI21_NC
:
201 return RE_AARCH64_PAGE_PC
;
202 case R_AARCH64_LD64_GOT_LO12_NC
:
203 case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC
:
205 case R_AARCH64_AUTH_LD64_GOT_LO12_NC
:
206 case R_AARCH64_AUTH_GOT_ADD_LO12_NC
:
207 return RE_AARCH64_AUTH_GOT
;
208 case R_AARCH64_AUTH_GOT_LD_PREL19
:
209 case R_AARCH64_AUTH_GOT_ADR_PREL_LO21
:
210 return RE_AARCH64_AUTH_GOT_PC
;
211 case R_AARCH64_LD64_GOTPAGE_LO15
:
212 return RE_AARCH64_GOT_PAGE
;
213 case R_AARCH64_ADR_GOT_PAGE
:
214 case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21
:
215 return RE_AARCH64_GOT_PAGE_PC
;
216 case R_AARCH64_AUTH_ADR_GOT_PAGE
:
217 return RE_AARCH64_AUTH_GOT_PAGE_PC
;
218 case R_AARCH64_GOTPCREL32
:
219 case R_AARCH64_GOT_LD_PREL19
:
224 Err(ctx
) << getErrorLoc(ctx
, loc
) << "unknown relocation (" << type
.v
225 << ") against symbol " << &s
;
230 RelExpr
AArch64::adjustTlsExpr(RelType type
, RelExpr expr
) const {
231 if (expr
== R_RELAX_TLS_GD_TO_IE
) {
232 if (type
== R_AARCH64_TLSDESC_ADR_PAGE21
)
233 return RE_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC
;
234 return R_RELAX_TLS_GD_TO_IE_ABS
;
239 bool AArch64::usesOnlyLowPageBits(RelType type
) const {
243 case R_AARCH64_ADD_ABS_LO12_NC
:
244 case R_AARCH64_LD64_GOT_LO12_NC
:
245 case R_AARCH64_LDST128_ABS_LO12_NC
:
246 case R_AARCH64_LDST16_ABS_LO12_NC
:
247 case R_AARCH64_LDST32_ABS_LO12_NC
:
248 case R_AARCH64_LDST64_ABS_LO12_NC
:
249 case R_AARCH64_LDST8_ABS_LO12_NC
:
250 case R_AARCH64_TLSDESC_ADD_LO12
:
251 case R_AARCH64_TLSDESC_LD64_LO12
:
252 case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC
:
257 RelType
AArch64::getDynRel(RelType type
) const {
258 if (type
== R_AARCH64_ABS64
|| type
== R_AARCH64_AUTH_ABS64
)
260 return R_AARCH64_NONE
;
263 int64_t AArch64::getImplicitAddend(const uint8_t *buf
, RelType type
) const {
265 case R_AARCH64_TLSDESC
:
266 return read64(ctx
, buf
+ 8);
268 case R_AARCH64_GLOB_DAT
:
269 case R_AARCH64_AUTH_GLOB_DAT
:
270 case R_AARCH64_JUMP_SLOT
:
272 case R_AARCH64_ABS16
:
273 case R_AARCH64_PREL16
:
274 return SignExtend64
<16>(read16(ctx
, buf
));
275 case R_AARCH64_ABS32
:
276 case R_AARCH64_PREL32
:
277 return SignExtend64
<32>(read32(ctx
, buf
));
278 case R_AARCH64_ABS64
:
279 case R_AARCH64_PREL64
:
280 case R_AARCH64_RELATIVE
:
281 case R_AARCH64_IRELATIVE
:
282 case R_AARCH64_TLS_TPREL64
:
283 return read64(ctx
, buf
);
285 // The following relocation types all point at instructions, and
286 // relocate an immediate field in the instruction.
288 // The general rule, from AAELF64 §5.7.2 "Addends and PC-bias",
289 // says: "If the relocation relocates an instruction the immediate
290 // field of the instruction is extracted, scaled as required by
291 // the instruction field encoding, and sign-extended to 64 bits".
293 // The R_AARCH64_MOVW family operates on wide MOV/MOVK/MOVZ
294 // instructions, which have a 16-bit immediate field with its low
295 // bit in bit 5 of the instruction encoding. When the immediate
296 // field is used as an implicit addend for REL-type relocations,
297 // it is treated as added to the low bits of the output value, not
298 // shifted depending on the relocation type.
300 // This allows REL relocations to express the requirement 'please
301 // add 12345 to this symbol value and give me the four 16-bit
302 // chunks of the result', by putting the same addend 12345 in all
303 // four instructions. Carries between the 16-bit chunks are
304 // handled correctly, because the whole 64-bit addition is done
305 // once per relocation.
306 case R_AARCH64_MOVW_UABS_G0
:
307 case R_AARCH64_MOVW_UABS_G0_NC
:
308 case R_AARCH64_MOVW_UABS_G1
:
309 case R_AARCH64_MOVW_UABS_G1_NC
:
310 case R_AARCH64_MOVW_UABS_G2
:
311 case R_AARCH64_MOVW_UABS_G2_NC
:
312 case R_AARCH64_MOVW_UABS_G3
:
313 return SignExtend64
<16>(getBits(read32le(buf
), 5, 20));
315 // R_AARCH64_TSTBR14 points at a TBZ or TBNZ instruction, which
316 // has a 14-bit offset measured in instructions, i.e. shifted left
318 case R_AARCH64_TSTBR14
:
319 return SignExtend64
<16>(getBits(read32le(buf
), 5, 18) << 2);
321 // R_AARCH64_CONDBR19 operates on the ordinary B.cond instruction,
322 // which has a 19-bit offset measured in instructions.
324 // R_AARCH64_LD_PREL_LO19 operates on the LDR (literal)
325 // instruction, which also has a 19-bit offset, measured in 4-byte
326 // chunks. So the calculation is the same as for
327 // R_AARCH64_CONDBR19.
328 case R_AARCH64_CONDBR19
:
329 case R_AARCH64_LD_PREL_LO19
:
330 return SignExtend64
<21>(getBits(read32le(buf
), 5, 23) << 2);
332 // R_AARCH64_ADD_ABS_LO12_NC operates on ADD (immediate). The
333 // immediate can optionally be shifted left by 12 bits, but this
334 // relocation is intended for the case where it is not.
335 case R_AARCH64_ADD_ABS_LO12_NC
:
336 return SignExtend64
<12>(getBits(read32le(buf
), 10, 21));
338 // R_AARCH64_ADR_PREL_LO21 operates on an ADR instruction, whose
339 // 21-bit immediate is split between two bits high up in the word
340 // (in fact the two _lowest_ order bits of the value) and 19 bits
343 // R_AARCH64_ADR_PREL_PG_HI21[_NC] operate on an ADRP instruction,
344 // which encodes the immediate in the same way, but will shift it
345 // left by 12 bits when the instruction executes. For the same
346 // reason as the MOVW family, we don't apply that left shift here.
347 case R_AARCH64_ADR_PREL_LO21
:
348 case R_AARCH64_ADR_PREL_PG_HI21
:
349 case R_AARCH64_ADR_PREL_PG_HI21_NC
:
350 return SignExtend64
<21>((getBits(read32le(buf
), 5, 23) << 2) |
351 getBits(read32le(buf
), 29, 30));
353 // R_AARCH64_{JUMP,CALL}26 operate on B and BL, which have a
354 // 26-bit offset measured in instructions.
355 case R_AARCH64_JUMP26
:
356 case R_AARCH64_CALL26
:
357 return SignExtend64
<28>(getBits(read32le(buf
), 0, 25) << 2);
360 InternalErr(ctx
, buf
) << "cannot read addend for relocation " << type
;
365 void AArch64::writeGotPlt(uint8_t *buf
, const Symbol
&) const {
366 write64(ctx
, buf
, ctx
.in
.plt
->getVA());
369 void AArch64::writeIgotPlt(uint8_t *buf
, const Symbol
&s
) const {
370 if (ctx
.arg
.writeAddends
)
371 write64(ctx
, buf
, s
.getVA(ctx
));
374 void AArch64::writePltHeader(uint8_t *buf
) const {
375 const uint8_t pltData
[] = {
376 0xf0, 0x7b, 0xbf, 0xa9, // stp x16, x30, [sp,#-16]!
377 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.got.plt[2]))
378 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.got.plt[2]))]
379 0x10, 0x02, 0x00, 0x91, // add x16, x16, Offset(&(.got.plt[2]))
380 0x20, 0x02, 0x1f, 0xd6, // br x17
381 0x1f, 0x20, 0x03, 0xd5, // nop
382 0x1f, 0x20, 0x03, 0xd5, // nop
383 0x1f, 0x20, 0x03, 0xd5 // nop
385 memcpy(buf
, pltData
, sizeof(pltData
));
387 uint64_t got
= ctx
.in
.gotPlt
->getVA();
388 uint64_t plt
= ctx
.in
.plt
->getVA();
389 relocateNoSym(buf
+ 4, R_AARCH64_ADR_PREL_PG_HI21
,
390 getAArch64Page(got
+ 16) - getAArch64Page(plt
+ 4));
391 relocateNoSym(buf
+ 8, R_AARCH64_LDST64_ABS_LO12_NC
, got
+ 16);
392 relocateNoSym(buf
+ 12, R_AARCH64_ADD_ABS_LO12_NC
, got
+ 16);
395 void AArch64::writePlt(uint8_t *buf
, const Symbol
&sym
,
396 uint64_t pltEntryAddr
) const {
397 const uint8_t inst
[] = {
398 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.got.plt[n]))
399 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.got.plt[n]))]
400 0x10, 0x02, 0x00, 0x91, // add x16, x16, Offset(&(.got.plt[n]))
401 0x20, 0x02, 0x1f, 0xd6 // br x17
403 memcpy(buf
, inst
, sizeof(inst
));
405 uint64_t gotPltEntryAddr
= sym
.getGotPltVA(ctx
);
406 relocateNoSym(buf
, R_AARCH64_ADR_PREL_PG_HI21
,
407 getAArch64Page(gotPltEntryAddr
) - getAArch64Page(pltEntryAddr
));
408 relocateNoSym(buf
+ 4, R_AARCH64_LDST64_ABS_LO12_NC
, gotPltEntryAddr
);
409 relocateNoSym(buf
+ 8, R_AARCH64_ADD_ABS_LO12_NC
, gotPltEntryAddr
);
412 bool AArch64::needsThunk(RelExpr expr
, RelType type
, const InputFile
*file
,
413 uint64_t branchAddr
, const Symbol
&s
,
415 // If s is an undefined weak symbol and does not have a PLT entry then it will
416 // be resolved as a branch to the next instruction. If it is hidden, its
417 // binding has been converted to local, so we just check isUndefined() here. A
418 // undefined non-weak symbol will have been errored.
419 if (s
.isUndefined() && !s
.isInPlt(ctx
))
421 // ELF for the ARM 64-bit architecture, section Call and Jump relocations
422 // only permits range extension thunks for R_AARCH64_CALL26 and
423 // R_AARCH64_JUMP26 relocation types.
424 if (type
!= R_AARCH64_CALL26
&& type
!= R_AARCH64_JUMP26
&&
425 type
!= R_AARCH64_PLT32
)
427 uint64_t dst
= expr
== R_PLT_PC
? s
.getPltVA(ctx
) : s
.getVA(ctx
, a
);
428 return !inBranchRange(type
, branchAddr
, dst
);
431 uint32_t AArch64::getThunkSectionSpacing() const {
432 // See comment in Arch/ARM.cpp for a more detailed explanation of
433 // getThunkSectionSpacing(). For AArch64 the only branches we are permitted to
434 // Thunk have a range of +/- 128 MiB
435 return (128 * 1024 * 1024) - 0x30000;
438 bool AArch64::inBranchRange(RelType type
, uint64_t src
, uint64_t dst
) const {
439 if (type
!= R_AARCH64_CALL26
&& type
!= R_AARCH64_JUMP26
&&
440 type
!= R_AARCH64_PLT32
)
442 // The AArch64 call and unconditional branch instructions have a range of
443 // +/- 128 MiB. The PLT32 relocation supports a range up to +/- 2 GiB.
445 type
== R_AARCH64_PLT32
? (UINT64_C(1) << 31) : (128 * 1024 * 1024);
447 // Immediate of branch is signed.
449 return dst
- src
<= range
;
451 return src
- dst
<= range
;
454 static void write32AArch64Addr(uint8_t *l
, uint64_t imm
) {
455 uint32_t immLo
= (imm
& 0x3) << 29;
456 uint32_t immHi
= (imm
& 0x1FFFFC) << 3;
457 uint64_t mask
= (0x3 << 29) | (0x1FFFFC << 3);
458 write32le(l
, (read32le(l
) & ~mask
) | immLo
| immHi
);
461 static void writeMaskedBits32le(uint8_t *p
, int32_t v
, uint32_t mask
) {
462 write32le(p
, (read32le(p
) & ~mask
) | v
);
465 // Update the immediate field in a AARCH64 ldr, str, and add instruction.
466 static void write32Imm12(uint8_t *l
, uint64_t imm
) {
467 writeMaskedBits32le(l
, (imm
& 0xFFF) << 10, 0xFFF << 10);
470 // Update the immediate field in an AArch64 movk, movn or movz instruction
471 // for a signed relocation, and update the opcode of a movn or movz instruction
472 // to match the sign of the operand.
473 static void writeSMovWImm(uint8_t *loc
, uint32_t imm
) {
474 uint32_t inst
= read32le(loc
);
475 // Opcode field is bits 30, 29, with 10 = movz, 00 = movn and 11 = movk.
476 if (!(inst
& (1 << 29))) {
479 // Change opcode to movn, which takes an inverted operand.
483 // Change opcode to movz.
487 write32le(loc
, inst
| ((imm
& 0xFFFF) << 5));
490 void AArch64::relocate(uint8_t *loc
, const Relocation
&rel
,
491 uint64_t val
) const {
493 case R_AARCH64_ABS16
:
494 case R_AARCH64_PREL16
:
495 checkIntUInt(ctx
, loc
, val
, 16, rel
);
496 write16(ctx
, loc
, val
);
498 case R_AARCH64_ABS32
:
499 case R_AARCH64_PREL32
:
500 checkIntUInt(ctx
, loc
, val
, 32, rel
);
501 write32(ctx
, loc
, val
);
503 case R_AARCH64_PLT32
:
504 case R_AARCH64_GOTPCREL32
:
505 checkInt(ctx
, loc
, val
, 32, rel
);
506 write32(ctx
, loc
, val
);
508 case R_AARCH64_ABS64
:
509 // AArch64 relocations to tagged symbols have extended semantics, as
511 // https://github.com/ARM-software/abi-aa/blob/main/memtagabielf64/memtagabielf64.rst#841extended-semantics-of-r_aarch64_relative.
512 // tl;dr: encode the symbol's special addend in the place, which is an
513 // offset to the point where the logical tag is derived from. Quick hack, if
514 // the addend is within the symbol's bounds, no need to encode the tag
515 // derivation offset.
516 if (rel
.sym
&& rel
.sym
->isTagged() &&
518 rel
.addend
>= static_cast<int64_t>(rel
.sym
->getSize())))
519 write64(ctx
, loc
, -rel
.addend
);
521 write64(ctx
, loc
, val
);
523 case R_AARCH64_PREL64
:
524 write64(ctx
, loc
, val
);
526 case R_AARCH64_AUTH_ABS64
:
527 // If val is wider than 32 bits, the relocation must have been moved from
528 // .relr.auth.dyn to .rela.dyn, and the addend write is not needed.
530 // If val fits in 32 bits, we have two potential scenarios:
531 // * True RELR: Write the 32-bit `val`.
532 // * RELA: Even if the value now fits in 32 bits, it might have been
533 // converted from RELR during an iteration in
534 // finalizeAddressDependentContent(). Writing the value is harmless
535 // because dynamic linking ignores it.
537 write32(ctx
, loc
, val
);
539 case R_AARCH64_ADD_ABS_LO12_NC
:
540 case R_AARCH64_AUTH_GOT_ADD_LO12_NC
:
541 write32Imm12(loc
, val
);
543 case R_AARCH64_ADR_GOT_PAGE
:
544 case R_AARCH64_AUTH_ADR_GOT_PAGE
:
545 case R_AARCH64_ADR_PREL_PG_HI21
:
546 case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21
:
547 case R_AARCH64_TLSDESC_ADR_PAGE21
:
548 checkInt(ctx
, loc
, val
, 33, rel
);
550 case R_AARCH64_ADR_PREL_PG_HI21_NC
:
551 write32AArch64Addr(loc
, val
>> 12);
553 case R_AARCH64_ADR_PREL_LO21
:
554 case R_AARCH64_AUTH_GOT_ADR_PREL_LO21
:
555 checkInt(ctx
, loc
, val
, 21, rel
);
556 write32AArch64Addr(loc
, val
);
558 case R_AARCH64_JUMP26
:
559 // Normally we would just write the bits of the immediate field, however
560 // when patching instructions for the cpu errata fix -fix-cortex-a53-843419
561 // we want to replace a non-branch instruction with a branch immediate
562 // instruction. By writing all the bits of the instruction including the
563 // opcode and the immediate (0 001 | 01 imm26) we can do this
564 // transformation by placing a R_AARCH64_JUMP26 relocation at the offset of
565 // the instruction we want to patch.
566 write32le(loc
, 0x14000000);
568 case R_AARCH64_CALL26
:
569 checkInt(ctx
, loc
, val
, 28, rel
);
570 writeMaskedBits32le(loc
, (val
& 0x0FFFFFFC) >> 2, 0x0FFFFFFC >> 2);
572 case R_AARCH64_CONDBR19
:
573 case R_AARCH64_LD_PREL_LO19
:
574 case R_AARCH64_GOT_LD_PREL19
:
575 case R_AARCH64_AUTH_GOT_LD_PREL19
:
576 checkAlignment(ctx
, loc
, val
, 4, rel
);
577 checkInt(ctx
, loc
, val
, 21, rel
);
578 writeMaskedBits32le(loc
, (val
& 0x1FFFFC) << 3, 0x1FFFFC << 3);
580 case R_AARCH64_LDST8_ABS_LO12_NC
:
581 case R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC
:
582 write32Imm12(loc
, getBits(val
, 0, 11));
584 case R_AARCH64_LDST16_ABS_LO12_NC
:
585 case R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC
:
586 checkAlignment(ctx
, loc
, val
, 2, rel
);
587 write32Imm12(loc
, getBits(val
, 1, 11));
589 case R_AARCH64_LDST32_ABS_LO12_NC
:
590 case R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC
:
591 checkAlignment(ctx
, loc
, val
, 4, rel
);
592 write32Imm12(loc
, getBits(val
, 2, 11));
594 case R_AARCH64_LDST64_ABS_LO12_NC
:
595 case R_AARCH64_LD64_GOT_LO12_NC
:
596 case R_AARCH64_AUTH_LD64_GOT_LO12_NC
:
597 case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC
:
598 case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC
:
599 case R_AARCH64_TLSDESC_LD64_LO12
:
600 checkAlignment(ctx
, loc
, val
, 8, rel
);
601 write32Imm12(loc
, getBits(val
, 3, 11));
603 case R_AARCH64_LDST128_ABS_LO12_NC
:
604 case R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC
:
605 checkAlignment(ctx
, loc
, val
, 16, rel
);
606 write32Imm12(loc
, getBits(val
, 4, 11));
608 case R_AARCH64_LD64_GOTPAGE_LO15
:
609 checkAlignment(ctx
, loc
, val
, 8, rel
);
610 write32Imm12(loc
, getBits(val
, 3, 14));
612 case R_AARCH64_MOVW_UABS_G0
:
613 checkUInt(ctx
, loc
, val
, 16, rel
);
615 case R_AARCH64_MOVW_UABS_G0_NC
:
616 writeMaskedBits32le(loc
, (val
& 0xFFFF) << 5, 0xFFFF << 5);
618 case R_AARCH64_MOVW_UABS_G1
:
619 checkUInt(ctx
, loc
, val
, 32, rel
);
621 case R_AARCH64_MOVW_UABS_G1_NC
:
622 writeMaskedBits32le(loc
, (val
& 0xFFFF0000) >> 11, 0xFFFF0000 >> 11);
624 case R_AARCH64_MOVW_UABS_G2
:
625 checkUInt(ctx
, loc
, val
, 48, rel
);
627 case R_AARCH64_MOVW_UABS_G2_NC
:
628 writeMaskedBits32le(loc
, (val
& 0xFFFF00000000) >> 27,
629 0xFFFF00000000 >> 27);
631 case R_AARCH64_MOVW_UABS_G3
:
632 writeMaskedBits32le(loc
, (val
& 0xFFFF000000000000) >> 43,
633 0xFFFF000000000000 >> 43);
635 case R_AARCH64_MOVW_PREL_G0
:
636 case R_AARCH64_MOVW_SABS_G0
:
637 case R_AARCH64_TLSLE_MOVW_TPREL_G0
:
638 checkInt(ctx
, loc
, val
, 17, rel
);
640 case R_AARCH64_MOVW_PREL_G0_NC
:
641 case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC
:
642 writeSMovWImm(loc
, val
);
644 case R_AARCH64_MOVW_PREL_G1
:
645 case R_AARCH64_MOVW_SABS_G1
:
646 case R_AARCH64_TLSLE_MOVW_TPREL_G1
:
647 checkInt(ctx
, loc
, val
, 33, rel
);
649 case R_AARCH64_MOVW_PREL_G1_NC
:
650 case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC
:
651 writeSMovWImm(loc
, val
>> 16);
653 case R_AARCH64_MOVW_PREL_G2
:
654 case R_AARCH64_MOVW_SABS_G2
:
655 case R_AARCH64_TLSLE_MOVW_TPREL_G2
:
656 checkInt(ctx
, loc
, val
, 49, rel
);
658 case R_AARCH64_MOVW_PREL_G2_NC
:
659 writeSMovWImm(loc
, val
>> 32);
661 case R_AARCH64_MOVW_PREL_G3
:
662 writeSMovWImm(loc
, val
>> 48);
664 case R_AARCH64_TSTBR14
:
665 checkInt(ctx
, loc
, val
, 16, rel
);
666 writeMaskedBits32le(loc
, (val
& 0xFFFC) << 3, 0xFFFC << 3);
668 case R_AARCH64_TLSLE_ADD_TPREL_HI12
:
669 checkUInt(ctx
, loc
, val
, 24, rel
);
670 write32Imm12(loc
, val
>> 12);
672 case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC
:
673 case R_AARCH64_TLSDESC_ADD_LO12
:
674 write32Imm12(loc
, val
);
676 case R_AARCH64_TLSDESC
:
677 // For R_AARCH64_TLSDESC the addend is stored in the second 64-bit word.
678 write64(ctx
, loc
+ 8, val
);
681 llvm_unreachable("unknown relocation");
685 void AArch64::relaxTlsGdToLe(uint8_t *loc
, const Relocation
&rel
,
686 uint64_t val
) const {
687 // TLSDESC Global-Dynamic relocation are in the form:
688 // adrp x0, :tlsdesc:v [R_AARCH64_TLSDESC_ADR_PAGE21]
689 // ldr x1, [x0, #:tlsdesc_lo12:v [R_AARCH64_TLSDESC_LD64_LO12]
690 // add x0, x0, :tlsdesc_los:v [R_AARCH64_TLSDESC_ADD_LO12]
691 // .tlsdesccall [R_AARCH64_TLSDESC_CALL]
693 // And it can optimized to:
694 // movz x0, #0x0, lsl #16
698 checkUInt(ctx
, loc
, val
, 32, rel
);
701 case R_AARCH64_TLSDESC_ADD_LO12
:
702 case R_AARCH64_TLSDESC_CALL
:
703 write32le(loc
, 0xd503201f); // nop
705 case R_AARCH64_TLSDESC_ADR_PAGE21
:
706 write32le(loc
, 0xd2a00000 | (((val
>> 16) & 0xffff) << 5)); // movz
708 case R_AARCH64_TLSDESC_LD64_LO12
:
709 write32le(loc
, 0xf2800000 | ((val
& 0xffff) << 5)); // movk
712 llvm_unreachable("unsupported relocation for TLS GD to LE relaxation");
716 void AArch64::relaxTlsGdToIe(uint8_t *loc
, const Relocation
&rel
,
717 uint64_t val
) const {
718 // TLSDESC Global-Dynamic relocation are in the form:
719 // adrp x0, :tlsdesc:v [R_AARCH64_TLSDESC_ADR_PAGE21]
720 // ldr x1, [x0, #:tlsdesc_lo12:v [R_AARCH64_TLSDESC_LD64_LO12]
721 // add x0, x0, :tlsdesc_los:v [R_AARCH64_TLSDESC_ADD_LO12]
722 // .tlsdesccall [R_AARCH64_TLSDESC_CALL]
724 // And it can optimized to:
725 // adrp x0, :gottprel:v
726 // ldr x0, [x0, :gottprel_lo12:v]
731 case R_AARCH64_TLSDESC_ADD_LO12
:
732 case R_AARCH64_TLSDESC_CALL
:
733 write32le(loc
, 0xd503201f); // nop
735 case R_AARCH64_TLSDESC_ADR_PAGE21
:
736 write32le(loc
, 0x90000000); // adrp
737 relocateNoSym(loc
, R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21
, val
);
739 case R_AARCH64_TLSDESC_LD64_LO12
:
740 write32le(loc
, 0xf9400000); // ldr
741 relocateNoSym(loc
, R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC
, val
);
744 llvm_unreachable("unsupported relocation for TLS GD to LE relaxation");
748 void AArch64::relaxTlsIeToLe(uint8_t *loc
, const Relocation
&rel
,
749 uint64_t val
) const {
750 checkUInt(ctx
, loc
, val
, 32, rel
);
752 if (rel
.type
== R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21
) {
754 uint32_t regNo
= read32le(loc
) & 0x1f;
755 write32le(loc
, (0xd2a00000 | regNo
) | (((val
>> 16) & 0xffff) << 5));
758 if (rel
.type
== R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC
) {
760 uint32_t regNo
= read32le(loc
) & 0x1f;
761 write32le(loc
, (0xf2800000 | regNo
) | ((val
& 0xffff) << 5));
764 llvm_unreachable("invalid relocation for TLS IE to LE relaxation");
767 AArch64Relaxer::AArch64Relaxer(Ctx
&ctx
, ArrayRef
<Relocation
> relocs
)
771 // Check if R_AARCH64_ADR_GOT_PAGE and R_AARCH64_LD64_GOT_LO12_NC
772 // always appear in pairs.
774 const size_t size
= relocs
.size();
775 for (; i
!= size
; ++i
) {
776 if (relocs
[i
].type
== R_AARCH64_ADR_GOT_PAGE
) {
777 if (i
+ 1 < size
&& relocs
[i
+ 1].type
== R_AARCH64_LD64_GOT_LO12_NC
) {
782 } else if (relocs
[i
].type
== R_AARCH64_LD64_GOT_LO12_NC
) {
786 safeToRelaxAdrpLdr
= i
== size
;
789 bool AArch64Relaxer::tryRelaxAdrpAdd(const Relocation
&adrpRel
,
790 const Relocation
&addRel
, uint64_t secAddr
,
791 uint8_t *buf
) const {
792 // When the address of sym is within the range of ADR then
795 // ADD xn, xn, :lo12: sym
799 if (!ctx
.arg
.relax
|| adrpRel
.type
!= R_AARCH64_ADR_PREL_PG_HI21
||
800 addRel
.type
!= R_AARCH64_ADD_ABS_LO12_NC
)
802 // Check if the relocations apply to consecutive instructions.
803 if (adrpRel
.offset
+ 4 != addRel
.offset
)
805 if (adrpRel
.sym
!= addRel
.sym
)
807 if (adrpRel
.addend
!= 0 || addRel
.addend
!= 0)
810 uint32_t adrpInstr
= read32le(buf
+ adrpRel
.offset
);
811 uint32_t addInstr
= read32le(buf
+ addRel
.offset
);
812 // Check if the first instruction is ADRP and the second instruction is ADD.
813 if ((adrpInstr
& 0x9f000000) != 0x90000000 ||
814 (addInstr
& 0xffc00000) != 0x91000000)
816 uint32_t adrpDestReg
= adrpInstr
& 0x1f;
817 uint32_t addDestReg
= addInstr
& 0x1f;
818 uint32_t addSrcReg
= (addInstr
>> 5) & 0x1f;
819 if (adrpDestReg
!= addDestReg
|| adrpDestReg
!= addSrcReg
)
822 Symbol
&sym
= *adrpRel
.sym
;
823 // Check if the address difference is within 1MiB range.
824 int64_t val
= sym
.getVA(ctx
) - (secAddr
+ addRel
.offset
);
825 if (val
< -1024 * 1024 || val
>= 1024 * 1024)
828 Relocation adrRel
= {R_ABS
, R_AARCH64_ADR_PREL_LO21
, addRel
.offset
,
831 write32le(buf
+ adrpRel
.offset
, 0xd503201f);
833 write32le(buf
+ adrRel
.offset
, 0x10000000 | adrpDestReg
);
834 ctx
.target
->relocate(buf
+ adrRel
.offset
, adrRel
, val
);
838 bool AArch64Relaxer::tryRelaxAdrpLdr(const Relocation
&adrpRel
,
839 const Relocation
&ldrRel
, uint64_t secAddr
,
840 uint8_t *buf
) const {
841 if (!safeToRelaxAdrpLdr
)
844 // When the definition of sym is not preemptible then we may
846 // ADRP xn, :got: sym
847 // LDR xn, [ xn :got_lo12: sym]
850 // ADD xn, xn, :lo_12: sym
852 if (adrpRel
.type
!= R_AARCH64_ADR_GOT_PAGE
||
853 ldrRel
.type
!= R_AARCH64_LD64_GOT_LO12_NC
)
855 // Check if the relocations apply to consecutive instructions.
856 if (adrpRel
.offset
+ 4 != ldrRel
.offset
)
858 // Check if the relocations reference the same symbol and
859 // skip undefined, preemptible and STT_GNU_IFUNC symbols.
860 if (!adrpRel
.sym
|| adrpRel
.sym
!= ldrRel
.sym
|| !adrpRel
.sym
->isDefined() ||
861 adrpRel
.sym
->isPreemptible
|| adrpRel
.sym
->isGnuIFunc())
863 // Check if the addends of the both relocations are zero.
864 if (adrpRel
.addend
!= 0 || ldrRel
.addend
!= 0)
866 uint32_t adrpInstr
= read32le(buf
+ adrpRel
.offset
);
867 uint32_t ldrInstr
= read32le(buf
+ ldrRel
.offset
);
868 // Check if the first instruction is ADRP and the second instruction is LDR.
869 if ((adrpInstr
& 0x9f000000) != 0x90000000 ||
870 (ldrInstr
& 0x3b000000) != 0x39000000)
872 // Check the value of the sf bit.
873 if (!(ldrInstr
>> 31))
875 uint32_t adrpDestReg
= adrpInstr
& 0x1f;
876 uint32_t ldrDestReg
= ldrInstr
& 0x1f;
877 uint32_t ldrSrcReg
= (ldrInstr
>> 5) & 0x1f;
878 // Check if ADPR and LDR use the same register.
879 if (adrpDestReg
!= ldrDestReg
|| adrpDestReg
!= ldrSrcReg
)
882 Symbol
&sym
= *adrpRel
.sym
;
883 // GOT references to absolute symbols can't be relaxed to use ADRP/ADD in
884 // position-independent code because these instructions produce a relative
886 if (ctx
.arg
.isPic
&& !cast
<Defined
>(sym
).section
)
888 // Check if the address difference is within 4GB range.
890 getAArch64Page(sym
.getVA(ctx
)) - getAArch64Page(secAddr
+ adrpRel
.offset
);
891 if (val
!= llvm::SignExtend64(val
, 33))
894 Relocation adrpSymRel
= {RE_AARCH64_PAGE_PC
, R_AARCH64_ADR_PREL_PG_HI21
,
895 adrpRel
.offset
, /*addend=*/0, &sym
};
896 Relocation addRel
= {R_ABS
, R_AARCH64_ADD_ABS_LO12_NC
, ldrRel
.offset
,
900 write32le(buf
+ adrpSymRel
.offset
, 0x90000000 | adrpDestReg
);
901 // add x_<dest reg>, x_<dest reg>
902 write32le(buf
+ addRel
.offset
, 0x91000000 | adrpDestReg
| (adrpDestReg
<< 5));
904 ctx
.target
->relocate(
905 buf
+ adrpSymRel
.offset
, adrpSymRel
,
906 SignExtend64(getAArch64Page(sym
.getVA(ctx
)) -
907 getAArch64Page(secAddr
+ adrpSymRel
.offset
),
909 ctx
.target
->relocate(buf
+ addRel
.offset
, addRel
,
910 SignExtend64(sym
.getVA(ctx
), 64));
911 tryRelaxAdrpAdd(adrpSymRel
, addRel
, secAddr
, buf
);
915 // Tagged symbols have upper address bits that are added by the dynamic loader,
916 // and thus need the full 64-bit GOT entry. Do not relax such symbols.
917 static bool needsGotForMemtag(const Relocation
&rel
) {
918 return rel
.sym
->isTagged() && needsGot(rel
.expr
);
921 void AArch64::relocateAlloc(InputSectionBase
&sec
, uint8_t *buf
) const {
922 uint64_t secAddr
= sec
.getOutputSection()->addr
;
923 if (auto *s
= dyn_cast
<InputSection
>(&sec
))
924 secAddr
+= s
->outSecOff
;
925 else if (auto *ehIn
= dyn_cast
<EhInputSection
>(&sec
))
926 secAddr
+= ehIn
->getParent()->outSecOff
;
927 AArch64Relaxer
relaxer(ctx
, sec
.relocs());
928 for (size_t i
= 0, size
= sec
.relocs().size(); i
!= size
; ++i
) {
929 const Relocation
&rel
= sec
.relocs()[i
];
930 uint8_t *loc
= buf
+ rel
.offset
;
931 const uint64_t val
= sec
.getRelocTargetVA(ctx
, rel
, secAddr
+ rel
.offset
);
933 if (needsGotForMemtag(rel
)) {
934 relocate(loc
, rel
, val
);
939 case RE_AARCH64_GOT_PAGE_PC
:
941 relaxer
.tryRelaxAdrpLdr(rel
, sec
.relocs()[i
+ 1], secAddr
, buf
)) {
946 case RE_AARCH64_PAGE_PC
:
948 relaxer
.tryRelaxAdrpAdd(rel
, sec
.relocs()[i
+ 1], secAddr
, buf
)) {
953 case RE_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC
:
954 case R_RELAX_TLS_GD_TO_IE_ABS
:
955 relaxTlsGdToIe(loc
, rel
, val
);
957 case R_RELAX_TLS_GD_TO_LE
:
958 relaxTlsGdToLe(loc
, rel
, val
);
960 case R_RELAX_TLS_IE_TO_LE
:
961 relaxTlsIeToLe(loc
, rel
, val
);
966 relocate(loc
, rel
, val
);
970 // AArch64 may use security features in variant PLT sequences. These are:
971 // Pointer Authentication (PAC), introduced in armv8.3-a and Branch Target
972 // Indicator (BTI) introduced in armv8.5-a. The additional instructions used
973 // in the variant Plt sequences are encoded in the Hint space so they can be
974 // deployed on older architectures, which treat the instructions as a nop.
975 // PAC and BTI can be combined leading to the following combinations:
977 // writePltHeaderBti (no PAC Header needed)
979 // writePltBti (BTI only)
980 // writePltPac (PAC only)
981 // writePltBtiPac (BTI and PAC)
983 // When PAC is enabled the dynamic loader encrypts the address that it places
984 // in the .got.plt using the pacia1716 instruction which encrypts the value in
985 // x17 using the modifier in x16. The static linker places autia1716 before the
986 // indirect branch to x17 to authenticate the address in x17 with the modifier
987 // in x16. This makes it more difficult for an attacker to modify the value in
990 // When BTI is enabled all indirect branches must land on a bti instruction.
991 // The static linker must place a bti instruction at the start of any PLT entry
992 // that may be the target of an indirect branch. As the PLT entries call the
993 // lazy resolver indirectly this must have a bti instruction at start. In
994 // general a bti instruction is not needed for a PLT entry as indirect calls
995 // are resolved to the function address and not the PLT entry for the function.
996 // There are a small number of cases where the PLT address can escape, such as
997 // taking the address of a function or ifunc via a non got-generating
998 // relocation, and a shared library refers to that symbol.
1000 // We use the bti c variant of the instruction which permits indirect branches
1001 // (br) via x16/x17 and indirect function calls (blr) via any register. The ABI
1002 // guarantees that all indirect branches from code requiring BTI protection
1003 // will go via x16/x17
1006 class AArch64BtiPac final
: public AArch64
{
1008 AArch64BtiPac(Ctx
&);
1009 void writePltHeader(uint8_t *buf
) const override
;
1010 void writePlt(uint8_t *buf
, const Symbol
&sym
,
1011 uint64_t pltEntryAddr
) const override
;
1014 bool btiHeader
; // bti instruction needed in PLT Header and Entry
1017 PEK_AuthHint
, // use autia1716 instr for authenticated branch in PLT entry
1018 PEK_Auth
, // use braa instr for authenticated branch in PLT entry
1023 AArch64BtiPac::AArch64BtiPac(Ctx
&ctx
) : AArch64(ctx
) {
1024 btiHeader
= (ctx
.arg
.andFeatures
& GNU_PROPERTY_AARCH64_FEATURE_1_BTI
);
1025 // A BTI (Branch Target Indicator) Plt Entry is only required if the
1026 // address of the PLT entry can be taken by the program, which permits an
1027 // indirect jump to the PLT entry. This can happen when the address
1028 // of the PLT entry for a function is canonicalised due to the address of
1029 // the function in an executable being taken by a shared library, or
1030 // non-preemptible ifunc referenced by non-GOT-generating, non-PLT-generating
1032 // The PAC PLT entries require dynamic loader support and this isn't known
1033 // from properties in the objects, so we use the command line flag.
1034 // By default we only use hint-space instructions, but if we detect the
1035 // PAuthABI, which requires v8.3-A, we can use the non-hint space
1038 if (ctx
.arg
.zPacPlt
) {
1039 if (llvm::any_of(ctx
.aarch64PauthAbiCoreInfo
,
1040 [](uint8_t c
) { return c
!= 0; }))
1041 pacEntryKind
= PEK_Auth
;
1043 pacEntryKind
= PEK_AuthHint
;
1045 pacEntryKind
= PEK_NoAuth
;
1048 if (btiHeader
|| (pacEntryKind
!= PEK_NoAuth
)) {
1054 void AArch64BtiPac::writePltHeader(uint8_t *buf
) const {
1055 const uint8_t btiData
[] = { 0x5f, 0x24, 0x03, 0xd5 }; // bti c
1056 const uint8_t pltData
[] = {
1057 0xf0, 0x7b, 0xbf, 0xa9, // stp x16, x30, [sp,#-16]!
1058 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.got.plt[2]))
1059 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.got.plt[2]))]
1060 0x10, 0x02, 0x00, 0x91, // add x16, x16, Offset(&(.got.plt[2]))
1061 0x20, 0x02, 0x1f, 0xd6, // br x17
1062 0x1f, 0x20, 0x03, 0xd5, // nop
1063 0x1f, 0x20, 0x03, 0xd5 // nop
1065 const uint8_t nopData
[] = { 0x1f, 0x20, 0x03, 0xd5 }; // nop
1067 uint64_t got
= ctx
.in
.gotPlt
->getVA();
1068 uint64_t plt
= ctx
.in
.plt
->getVA();
1071 // PltHeader is called indirectly by plt[N]. Prefix pltData with a BTI C
1073 memcpy(buf
, btiData
, sizeof(btiData
));
1074 buf
+= sizeof(btiData
);
1075 plt
+= sizeof(btiData
);
1077 memcpy(buf
, pltData
, sizeof(pltData
));
1079 relocateNoSym(buf
+ 4, R_AARCH64_ADR_PREL_PG_HI21
,
1080 getAArch64Page(got
+ 16) - getAArch64Page(plt
+ 4));
1081 relocateNoSym(buf
+ 8, R_AARCH64_LDST64_ABS_LO12_NC
, got
+ 16);
1082 relocateNoSym(buf
+ 12, R_AARCH64_ADD_ABS_LO12_NC
, got
+ 16);
1084 // We didn't add the BTI c instruction so round out size with NOP.
1085 memcpy(buf
+ sizeof(pltData
), nopData
, sizeof(nopData
));
1088 void AArch64BtiPac::writePlt(uint8_t *buf
, const Symbol
&sym
,
1089 uint64_t pltEntryAddr
) const {
1090 // The PLT entry is of the form:
1091 // [btiData] addrInst (pacBr | stdBr) [nopData]
1092 const uint8_t btiData
[] = { 0x5f, 0x24, 0x03, 0xd5 }; // bti c
1093 const uint8_t addrInst
[] = {
1094 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.got.plt[n]))
1095 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.got.plt[n]))]
1096 0x10, 0x02, 0x00, 0x91 // add x16, x16, Offset(&(.got.plt[n]))
1098 const uint8_t pacHintBr
[] = {
1099 0x9f, 0x21, 0x03, 0xd5, // autia1716
1100 0x20, 0x02, 0x1f, 0xd6 // br x17
1102 const uint8_t pacBr
[] = {
1103 0x30, 0x0a, 0x1f, 0xd7, // braa x17, x16
1104 0x1f, 0x20, 0x03, 0xd5 // nop
1106 const uint8_t stdBr
[] = {
1107 0x20, 0x02, 0x1f, 0xd6, // br x17
1108 0x1f, 0x20, 0x03, 0xd5 // nop
1110 const uint8_t nopData
[] = { 0x1f, 0x20, 0x03, 0xd5 }; // nop
1112 // NEEDS_COPY indicates a non-ifunc canonical PLT entry whose address may
1113 // escape to shared objects. isInIplt indicates a non-preemptible ifunc. Its
1114 // address may escape if referenced by a direct relocation. If relative
1115 // vtables are used then if the vtable is in a shared object the offsets will
1116 // be to the PLT entry. The condition is conservative.
1117 bool hasBti
= btiHeader
&&
1118 (sym
.hasFlag(NEEDS_COPY
) || sym
.isInIplt
|| sym
.thunkAccessed
);
1120 memcpy(buf
, btiData
, sizeof(btiData
));
1121 buf
+= sizeof(btiData
);
1122 pltEntryAddr
+= sizeof(btiData
);
1125 uint64_t gotPltEntryAddr
= sym
.getGotPltVA(ctx
);
1126 memcpy(buf
, addrInst
, sizeof(addrInst
));
1127 relocateNoSym(buf
, R_AARCH64_ADR_PREL_PG_HI21
,
1128 getAArch64Page(gotPltEntryAddr
) - getAArch64Page(pltEntryAddr
));
1129 relocateNoSym(buf
+ 4, R_AARCH64_LDST64_ABS_LO12_NC
, gotPltEntryAddr
);
1130 relocateNoSym(buf
+ 8, R_AARCH64_ADD_ABS_LO12_NC
, gotPltEntryAddr
);
1132 if (pacEntryKind
!= PEK_NoAuth
)
1133 memcpy(buf
+ sizeof(addrInst
),
1134 pacEntryKind
== PEK_AuthHint
? pacHintBr
: pacBr
,
1135 sizeof(pacEntryKind
== PEK_AuthHint
? pacHintBr
: pacBr
));
1137 memcpy(buf
+ sizeof(addrInst
), stdBr
, sizeof(stdBr
));
1139 // We didn't add the BTI c instruction so round out size with NOP.
1140 memcpy(buf
+ sizeof(addrInst
) + sizeof(stdBr
), nopData
, sizeof(nopData
));
1143 template <class ELFT
>
1145 addTaggedSymbolReferences(Ctx
&ctx
, InputSectionBase
&sec
,
1146 DenseMap
<Symbol
*, unsigned> &referenceCount
) {
1147 assert(sec
.type
== SHT_AARCH64_MEMTAG_GLOBALS_STATIC
);
1149 const RelsOrRelas
<ELFT
> rels
= sec
.relsOrRelas
<ELFT
>();
1150 if (rels
.areRelocsRel())
1152 << "non-RELA relocations are not allowed with memtag globals";
1154 for (const typename
ELFT::Rela
&rel
: rels
.relas
) {
1155 Symbol
&sym
= sec
.file
->getRelocTargetSym(rel
);
1156 // Linker-synthesized symbols such as __executable_start may be referenced
1157 // as tagged in input objfiles, and we don't want them to be tagged. A
1158 // cheap way to exclude them is the type check, but their type is
1159 // STT_NOTYPE. In addition, this save us from checking untaggable symbols,
1160 // like functions or TLS symbols.
1161 if (sym
.type
!= STT_OBJECT
)
1163 // STB_LOCAL symbols can't be referenced from outside the object file, and
1164 // thus don't need to be checked for references from other object files.
1165 if (sym
.binding
== STB_LOCAL
) {
1166 sym
.setIsTagged(true);
1169 ++referenceCount
[&sym
];
1174 // A tagged symbol must be denoted as being tagged by all references and the
1175 // chosen definition. For simplicity, here, it must also be denoted as tagged
1176 // for all definitions. Otherwise:
1178 // 1. A tagged definition can be used by an untagged declaration, in which case
1179 // the untagged access may be PC-relative, causing a tag mismatch at
1181 // 2. An untagged definition can be used by a tagged declaration, where the
1182 // compiler has taken advantage of the increased alignment of the tagged
1183 // declaration, but the alignment at runtime is wrong, causing a fault.
1185 // Ideally, this isn't a problem, as any TU that imports or exports tagged
1186 // symbols should also be built with tagging. But, to handle these cases, we
1187 // demote the symbol to be untagged.
1188 void elf::createTaggedSymbols(Ctx
&ctx
) {
1189 assert(hasMemtag(ctx
));
1191 // First, collect all symbols that are marked as tagged, and count how many
1192 // times they're marked as tagged.
1193 DenseMap
<Symbol
*, unsigned> taggedSymbolReferenceCount
;
1194 for (InputFile
*file
: ctx
.objectFiles
) {
1195 if (file
->kind() != InputFile::ObjKind
)
1197 for (InputSectionBase
*section
: file
->getSections()) {
1198 if (!section
|| section
->type
!= SHT_AARCH64_MEMTAG_GLOBALS_STATIC
||
1199 section
== &InputSection::discarded
)
1201 invokeELFT(addTaggedSymbolReferences
, ctx
, *section
,
1202 taggedSymbolReferenceCount
);
1206 // Now, go through all the symbols. If the number of declarations +
1207 // definitions to a symbol exceeds the amount of times they're marked as
1208 // tagged, it means we have an objfile that uses the untagged variant of the
1210 for (InputFile
*file
: ctx
.objectFiles
) {
1211 if (file
->kind() != InputFile::BinaryKind
&&
1212 file
->kind() != InputFile::ObjKind
)
1215 for (Symbol
*symbol
: file
->getSymbols()) {
1216 // See `addTaggedSymbolReferences` for more details.
1217 if (symbol
->type
!= STT_OBJECT
||
1218 symbol
->binding
== STB_LOCAL
)
1220 auto it
= taggedSymbolReferenceCount
.find(symbol
);
1221 if (it
== taggedSymbolReferenceCount
.end()) continue;
1222 unsigned &remainingAllowedTaggedRefs
= it
->second
;
1223 if (remainingAllowedTaggedRefs
== 0) {
1224 taggedSymbolReferenceCount
.erase(it
);
1227 --remainingAllowedTaggedRefs
;
1231 // `addTaggedSymbolReferences` has already checked that we have RELA
1232 // relocations, the only other way to get written addends is with
1233 // --apply-dynamic-relocs.
1234 if (!taggedSymbolReferenceCount
.empty() && ctx
.arg
.writeAddends
)
1235 ErrAlways(ctx
) << "--apply-dynamic-relocs cannot be used with MTE globals";
1237 // Now, `taggedSymbolReferenceCount` should only contain symbols that are
1238 // defined as tagged exactly the same amount as it's referenced, meaning all
1240 for (auto &[symbol
, remainingTaggedRefs
] : taggedSymbolReferenceCount
) {
1241 assert(remainingTaggedRefs
== 0 &&
1242 "Symbol is defined as tagged more times than it's used");
1243 symbol
->setIsTagged(true);
1247 void elf::setAArch64TargetInfo(Ctx
&ctx
) {
1248 if ((ctx
.arg
.andFeatures
& GNU_PROPERTY_AARCH64_FEATURE_1_BTI
) ||
1250 ctx
.target
.reset(new AArch64BtiPac(ctx
));
1252 ctx
.target
.reset(new AArch64(ctx
));