[mlir][py] Enable loading only specified dialects during creation. (#121421)
[llvm-project.git] / lld / ELF / Arch / AArch64.cpp
blobb63551d0f682e50bae0e9d618ac0515b96c74e29
1 //===- AArch64.cpp --------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "InputFiles.h"
10 #include "OutputSections.h"
11 #include "Symbols.h"
12 #include "SyntheticSections.h"
13 #include "Target.h"
14 #include "lld/Common/ErrorHandler.h"
15 #include "llvm/BinaryFormat/ELF.h"
16 #include "llvm/Support/Endian.h"
18 using namespace llvm;
19 using namespace llvm::support::endian;
20 using namespace llvm::ELF;
21 using namespace lld;
22 using namespace lld::elf;
24 // Page(Expr) is the page address of the expression Expr, defined
25 // as (Expr & ~0xFFF). (This applies even if the machine page size
26 // supported by the platform has a different value.)
27 uint64_t elf::getAArch64Page(uint64_t expr) {
28 return expr & ~static_cast<uint64_t>(0xFFF);
31 // A BTI landing pad is a valid target for an indirect branch when the Branch
32 // Target Identification has been enabled. As linker generated branches are
33 // via x16 the BTI landing pads are defined as: BTI C, BTI J, BTI JC, PACIASP,
34 // PACIBSP.
35 bool elf::isAArch64BTILandingPad(Ctx &ctx, Symbol &s, int64_t a) {
36 // PLT entries accessed indirectly have a BTI c.
37 if (s.isInPlt(ctx))
38 return true;
39 Defined *d = dyn_cast<Defined>(&s);
40 if (!isa_and_nonnull<InputSection>(d->section))
41 // All places that we cannot disassemble are responsible for making
42 // the target a BTI landing pad.
43 return true;
44 InputSection *isec = cast<InputSection>(d->section);
45 uint64_t off = d->value + a;
46 // Likely user error, but protect ourselves against out of bounds
47 // access.
48 if (off >= isec->getSize())
49 return true;
50 const uint8_t *buf = isec->content().begin();
51 const uint32_t instr = read32le(buf + off);
52 // All BTI instructions are HINT instructions which all have same encoding
53 // apart from bits [11:5]
54 if ((instr & 0xd503201f) == 0xd503201f &&
55 is_contained({/*PACIASP*/ 0xd503233f, /*PACIBSP*/ 0xd503237f,
56 /*BTI C*/ 0xd503245f, /*BTI J*/ 0xd503249f,
57 /*BTI JC*/ 0xd50324df},
58 instr))
59 return true;
60 return false;
63 namespace {
64 class AArch64 : public TargetInfo {
65 public:
66 AArch64(Ctx &);
67 RelExpr getRelExpr(RelType type, const Symbol &s,
68 const uint8_t *loc) const override;
69 RelType getDynRel(RelType type) const override;
70 int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
71 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
72 void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
73 void writePltHeader(uint8_t *buf) const override;
74 void writePlt(uint8_t *buf, const Symbol &sym,
75 uint64_t pltEntryAddr) const override;
76 bool needsThunk(RelExpr expr, RelType type, const InputFile *file,
77 uint64_t branchAddr, const Symbol &s,
78 int64_t a) const override;
79 uint32_t getThunkSectionSpacing() const override;
80 bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override;
81 bool usesOnlyLowPageBits(RelType type) const override;
82 void relocate(uint8_t *loc, const Relocation &rel,
83 uint64_t val) const override;
84 RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
85 void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;
87 private:
88 void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
89 void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
90 void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
93 struct AArch64Relaxer {
94 Ctx &ctx;
95 bool safeToRelaxAdrpLdr = false;
97 AArch64Relaxer(Ctx &ctx, ArrayRef<Relocation> relocs);
98 bool tryRelaxAdrpAdd(const Relocation &adrpRel, const Relocation &addRel,
99 uint64_t secAddr, uint8_t *buf) const;
100 bool tryRelaxAdrpLdr(const Relocation &adrpRel, const Relocation &ldrRel,
101 uint64_t secAddr, uint8_t *buf) const;
103 } // namespace
105 // Return the bits [Start, End] from Val shifted Start bits.
106 // For instance, getBits(0xF0, 4, 8) returns 0xF.
107 static uint64_t getBits(uint64_t val, int start, int end) {
108 uint64_t mask = ((uint64_t)1 << (end + 1 - start)) - 1;
109 return (val >> start) & mask;
112 AArch64::AArch64(Ctx &ctx) : TargetInfo(ctx) {
113 copyRel = R_AARCH64_COPY;
114 relativeRel = R_AARCH64_RELATIVE;
115 iRelativeRel = R_AARCH64_IRELATIVE;
116 gotRel = R_AARCH64_GLOB_DAT;
117 pltRel = R_AARCH64_JUMP_SLOT;
118 symbolicRel = R_AARCH64_ABS64;
119 tlsDescRel = R_AARCH64_TLSDESC;
120 tlsGotRel = R_AARCH64_TLS_TPREL64;
121 pltHeaderSize = 32;
122 pltEntrySize = 16;
123 ipltEntrySize = 16;
124 defaultMaxPageSize = 65536;
126 // Align to the 2 MiB page size (known as a superpage or huge page).
127 // FreeBSD automatically promotes 2 MiB-aligned allocations.
128 defaultImageBase = 0x200000;
130 needsThunks = true;
133 RelExpr AArch64::getRelExpr(RelType type, const Symbol &s,
134 const uint8_t *loc) const {
135 switch (type) {
136 case R_AARCH64_ABS16:
137 case R_AARCH64_ABS32:
138 case R_AARCH64_ABS64:
139 case R_AARCH64_ADD_ABS_LO12_NC:
140 case R_AARCH64_LDST128_ABS_LO12_NC:
141 case R_AARCH64_LDST16_ABS_LO12_NC:
142 case R_AARCH64_LDST32_ABS_LO12_NC:
143 case R_AARCH64_LDST64_ABS_LO12_NC:
144 case R_AARCH64_LDST8_ABS_LO12_NC:
145 case R_AARCH64_MOVW_SABS_G0:
146 case R_AARCH64_MOVW_SABS_G1:
147 case R_AARCH64_MOVW_SABS_G2:
148 case R_AARCH64_MOVW_UABS_G0:
149 case R_AARCH64_MOVW_UABS_G0_NC:
150 case R_AARCH64_MOVW_UABS_G1:
151 case R_AARCH64_MOVW_UABS_G1_NC:
152 case R_AARCH64_MOVW_UABS_G2:
153 case R_AARCH64_MOVW_UABS_G2_NC:
154 case R_AARCH64_MOVW_UABS_G3:
155 return R_ABS;
156 case R_AARCH64_AUTH_ABS64:
157 return RE_AARCH64_AUTH;
158 case R_AARCH64_TLSDESC_ADR_PAGE21:
159 return RE_AARCH64_TLSDESC_PAGE;
160 case R_AARCH64_TLSDESC_LD64_LO12:
161 case R_AARCH64_TLSDESC_ADD_LO12:
162 return R_TLSDESC;
163 case R_AARCH64_TLSDESC_CALL:
164 return R_TLSDESC_CALL;
165 case R_AARCH64_TLSLE_ADD_TPREL_HI12:
166 case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
167 case R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC:
168 case R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC:
169 case R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC:
170 case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC:
171 case R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC:
172 case R_AARCH64_TLSLE_MOVW_TPREL_G0:
173 case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC:
174 case R_AARCH64_TLSLE_MOVW_TPREL_G1:
175 case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC:
176 case R_AARCH64_TLSLE_MOVW_TPREL_G2:
177 return R_TPREL;
178 case R_AARCH64_CALL26:
179 case R_AARCH64_CONDBR19:
180 case R_AARCH64_JUMP26:
181 case R_AARCH64_TSTBR14:
182 return R_PLT_PC;
183 case R_AARCH64_PLT32:
184 const_cast<Symbol &>(s).thunkAccessed = true;
185 return R_PLT_PC;
186 case R_AARCH64_PREL16:
187 case R_AARCH64_PREL32:
188 case R_AARCH64_PREL64:
189 case R_AARCH64_ADR_PREL_LO21:
190 case R_AARCH64_LD_PREL_LO19:
191 case R_AARCH64_MOVW_PREL_G0:
192 case R_AARCH64_MOVW_PREL_G0_NC:
193 case R_AARCH64_MOVW_PREL_G1:
194 case R_AARCH64_MOVW_PREL_G1_NC:
195 case R_AARCH64_MOVW_PREL_G2:
196 case R_AARCH64_MOVW_PREL_G2_NC:
197 case R_AARCH64_MOVW_PREL_G3:
198 return R_PC;
199 case R_AARCH64_ADR_PREL_PG_HI21:
200 case R_AARCH64_ADR_PREL_PG_HI21_NC:
201 return RE_AARCH64_PAGE_PC;
202 case R_AARCH64_LD64_GOT_LO12_NC:
203 case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
204 return R_GOT;
205 case R_AARCH64_AUTH_LD64_GOT_LO12_NC:
206 case R_AARCH64_AUTH_GOT_ADD_LO12_NC:
207 return RE_AARCH64_AUTH_GOT;
208 case R_AARCH64_AUTH_GOT_LD_PREL19:
209 case R_AARCH64_AUTH_GOT_ADR_PREL_LO21:
210 return RE_AARCH64_AUTH_GOT_PC;
211 case R_AARCH64_LD64_GOTPAGE_LO15:
212 return RE_AARCH64_GOT_PAGE;
213 case R_AARCH64_ADR_GOT_PAGE:
214 case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
215 return RE_AARCH64_GOT_PAGE_PC;
216 case R_AARCH64_AUTH_ADR_GOT_PAGE:
217 return RE_AARCH64_AUTH_GOT_PAGE_PC;
218 case R_AARCH64_GOTPCREL32:
219 case R_AARCH64_GOT_LD_PREL19:
220 return R_GOT_PC;
221 case R_AARCH64_NONE:
222 return R_NONE;
223 default:
224 Err(ctx) << getErrorLoc(ctx, loc) << "unknown relocation (" << type.v
225 << ") against symbol " << &s;
226 return R_NONE;
230 RelExpr AArch64::adjustTlsExpr(RelType type, RelExpr expr) const {
231 if (expr == R_RELAX_TLS_GD_TO_IE) {
232 if (type == R_AARCH64_TLSDESC_ADR_PAGE21)
233 return RE_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC;
234 return R_RELAX_TLS_GD_TO_IE_ABS;
236 return expr;
239 bool AArch64::usesOnlyLowPageBits(RelType type) const {
240 switch (type) {
241 default:
242 return false;
243 case R_AARCH64_ADD_ABS_LO12_NC:
244 case R_AARCH64_LD64_GOT_LO12_NC:
245 case R_AARCH64_LDST128_ABS_LO12_NC:
246 case R_AARCH64_LDST16_ABS_LO12_NC:
247 case R_AARCH64_LDST32_ABS_LO12_NC:
248 case R_AARCH64_LDST64_ABS_LO12_NC:
249 case R_AARCH64_LDST8_ABS_LO12_NC:
250 case R_AARCH64_TLSDESC_ADD_LO12:
251 case R_AARCH64_TLSDESC_LD64_LO12:
252 case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
253 return true;
257 RelType AArch64::getDynRel(RelType type) const {
258 if (type == R_AARCH64_ABS64 || type == R_AARCH64_AUTH_ABS64)
259 return type;
260 return R_AARCH64_NONE;
263 int64_t AArch64::getImplicitAddend(const uint8_t *buf, RelType type) const {
264 switch (type) {
265 case R_AARCH64_TLSDESC:
266 return read64(ctx, buf + 8);
267 case R_AARCH64_NONE:
268 case R_AARCH64_GLOB_DAT:
269 case R_AARCH64_AUTH_GLOB_DAT:
270 case R_AARCH64_JUMP_SLOT:
271 return 0;
272 case R_AARCH64_ABS16:
273 case R_AARCH64_PREL16:
274 return SignExtend64<16>(read16(ctx, buf));
275 case R_AARCH64_ABS32:
276 case R_AARCH64_PREL32:
277 return SignExtend64<32>(read32(ctx, buf));
278 case R_AARCH64_ABS64:
279 case R_AARCH64_PREL64:
280 case R_AARCH64_RELATIVE:
281 case R_AARCH64_IRELATIVE:
282 case R_AARCH64_TLS_TPREL64:
283 return read64(ctx, buf);
285 // The following relocation types all point at instructions, and
286 // relocate an immediate field in the instruction.
288 // The general rule, from AAELF64 §5.7.2 "Addends and PC-bias",
289 // says: "If the relocation relocates an instruction the immediate
290 // field of the instruction is extracted, scaled as required by
291 // the instruction field encoding, and sign-extended to 64 bits".
293 // The R_AARCH64_MOVW family operates on wide MOV/MOVK/MOVZ
294 // instructions, which have a 16-bit immediate field with its low
295 // bit in bit 5 of the instruction encoding. When the immediate
296 // field is used as an implicit addend for REL-type relocations,
297 // it is treated as added to the low bits of the output value, not
298 // shifted depending on the relocation type.
300 // This allows REL relocations to express the requirement 'please
301 // add 12345 to this symbol value and give me the four 16-bit
302 // chunks of the result', by putting the same addend 12345 in all
303 // four instructions. Carries between the 16-bit chunks are
304 // handled correctly, because the whole 64-bit addition is done
305 // once per relocation.
306 case R_AARCH64_MOVW_UABS_G0:
307 case R_AARCH64_MOVW_UABS_G0_NC:
308 case R_AARCH64_MOVW_UABS_G1:
309 case R_AARCH64_MOVW_UABS_G1_NC:
310 case R_AARCH64_MOVW_UABS_G2:
311 case R_AARCH64_MOVW_UABS_G2_NC:
312 case R_AARCH64_MOVW_UABS_G3:
313 return SignExtend64<16>(getBits(read32le(buf), 5, 20));
315 // R_AARCH64_TSTBR14 points at a TBZ or TBNZ instruction, which
316 // has a 14-bit offset measured in instructions, i.e. shifted left
317 // by 2.
318 case R_AARCH64_TSTBR14:
319 return SignExtend64<16>(getBits(read32le(buf), 5, 18) << 2);
321 // R_AARCH64_CONDBR19 operates on the ordinary B.cond instruction,
322 // which has a 19-bit offset measured in instructions.
324 // R_AARCH64_LD_PREL_LO19 operates on the LDR (literal)
325 // instruction, which also has a 19-bit offset, measured in 4-byte
326 // chunks. So the calculation is the same as for
327 // R_AARCH64_CONDBR19.
328 case R_AARCH64_CONDBR19:
329 case R_AARCH64_LD_PREL_LO19:
330 return SignExtend64<21>(getBits(read32le(buf), 5, 23) << 2);
332 // R_AARCH64_ADD_ABS_LO12_NC operates on ADD (immediate). The
333 // immediate can optionally be shifted left by 12 bits, but this
334 // relocation is intended for the case where it is not.
335 case R_AARCH64_ADD_ABS_LO12_NC:
336 return SignExtend64<12>(getBits(read32le(buf), 10, 21));
338 // R_AARCH64_ADR_PREL_LO21 operates on an ADR instruction, whose
339 // 21-bit immediate is split between two bits high up in the word
340 // (in fact the two _lowest_ order bits of the value) and 19 bits
341 // lower down.
343 // R_AARCH64_ADR_PREL_PG_HI21[_NC] operate on an ADRP instruction,
344 // which encodes the immediate in the same way, but will shift it
345 // left by 12 bits when the instruction executes. For the same
346 // reason as the MOVW family, we don't apply that left shift here.
347 case R_AARCH64_ADR_PREL_LO21:
348 case R_AARCH64_ADR_PREL_PG_HI21:
349 case R_AARCH64_ADR_PREL_PG_HI21_NC:
350 return SignExtend64<21>((getBits(read32le(buf), 5, 23) << 2) |
351 getBits(read32le(buf), 29, 30));
353 // R_AARCH64_{JUMP,CALL}26 operate on B and BL, which have a
354 // 26-bit offset measured in instructions.
355 case R_AARCH64_JUMP26:
356 case R_AARCH64_CALL26:
357 return SignExtend64<28>(getBits(read32le(buf), 0, 25) << 2);
359 default:
360 InternalErr(ctx, buf) << "cannot read addend for relocation " << type;
361 return 0;
365 void AArch64::writeGotPlt(uint8_t *buf, const Symbol &) const {
366 write64(ctx, buf, ctx.in.plt->getVA());
369 void AArch64::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
370 if (ctx.arg.writeAddends)
371 write64(ctx, buf, s.getVA(ctx));
374 void AArch64::writePltHeader(uint8_t *buf) const {
375 const uint8_t pltData[] = {
376 0xf0, 0x7b, 0xbf, 0xa9, // stp x16, x30, [sp,#-16]!
377 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.got.plt[2]))
378 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.got.plt[2]))]
379 0x10, 0x02, 0x00, 0x91, // add x16, x16, Offset(&(.got.plt[2]))
380 0x20, 0x02, 0x1f, 0xd6, // br x17
381 0x1f, 0x20, 0x03, 0xd5, // nop
382 0x1f, 0x20, 0x03, 0xd5, // nop
383 0x1f, 0x20, 0x03, 0xd5 // nop
385 memcpy(buf, pltData, sizeof(pltData));
387 uint64_t got = ctx.in.gotPlt->getVA();
388 uint64_t plt = ctx.in.plt->getVA();
389 relocateNoSym(buf + 4, R_AARCH64_ADR_PREL_PG_HI21,
390 getAArch64Page(got + 16) - getAArch64Page(plt + 4));
391 relocateNoSym(buf + 8, R_AARCH64_LDST64_ABS_LO12_NC, got + 16);
392 relocateNoSym(buf + 12, R_AARCH64_ADD_ABS_LO12_NC, got + 16);
395 void AArch64::writePlt(uint8_t *buf, const Symbol &sym,
396 uint64_t pltEntryAddr) const {
397 const uint8_t inst[] = {
398 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.got.plt[n]))
399 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.got.plt[n]))]
400 0x10, 0x02, 0x00, 0x91, // add x16, x16, Offset(&(.got.plt[n]))
401 0x20, 0x02, 0x1f, 0xd6 // br x17
403 memcpy(buf, inst, sizeof(inst));
405 uint64_t gotPltEntryAddr = sym.getGotPltVA(ctx);
406 relocateNoSym(buf, R_AARCH64_ADR_PREL_PG_HI21,
407 getAArch64Page(gotPltEntryAddr) - getAArch64Page(pltEntryAddr));
408 relocateNoSym(buf + 4, R_AARCH64_LDST64_ABS_LO12_NC, gotPltEntryAddr);
409 relocateNoSym(buf + 8, R_AARCH64_ADD_ABS_LO12_NC, gotPltEntryAddr);
412 bool AArch64::needsThunk(RelExpr expr, RelType type, const InputFile *file,
413 uint64_t branchAddr, const Symbol &s,
414 int64_t a) const {
415 // If s is an undefined weak symbol and does not have a PLT entry then it will
416 // be resolved as a branch to the next instruction. If it is hidden, its
417 // binding has been converted to local, so we just check isUndefined() here. A
418 // undefined non-weak symbol will have been errored.
419 if (s.isUndefined() && !s.isInPlt(ctx))
420 return false;
421 // ELF for the ARM 64-bit architecture, section Call and Jump relocations
422 // only permits range extension thunks for R_AARCH64_CALL26 and
423 // R_AARCH64_JUMP26 relocation types.
424 if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26 &&
425 type != R_AARCH64_PLT32)
426 return false;
427 uint64_t dst = expr == R_PLT_PC ? s.getPltVA(ctx) : s.getVA(ctx, a);
428 return !inBranchRange(type, branchAddr, dst);
431 uint32_t AArch64::getThunkSectionSpacing() const {
432 // See comment in Arch/ARM.cpp for a more detailed explanation of
433 // getThunkSectionSpacing(). For AArch64 the only branches we are permitted to
434 // Thunk have a range of +/- 128 MiB
435 return (128 * 1024 * 1024) - 0x30000;
438 bool AArch64::inBranchRange(RelType type, uint64_t src, uint64_t dst) const {
439 if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26 &&
440 type != R_AARCH64_PLT32)
441 return true;
442 // The AArch64 call and unconditional branch instructions have a range of
443 // +/- 128 MiB. The PLT32 relocation supports a range up to +/- 2 GiB.
444 uint64_t range =
445 type == R_AARCH64_PLT32 ? (UINT64_C(1) << 31) : (128 * 1024 * 1024);
446 if (dst > src) {
447 // Immediate of branch is signed.
448 range -= 4;
449 return dst - src <= range;
451 return src - dst <= range;
454 static void write32AArch64Addr(uint8_t *l, uint64_t imm) {
455 uint32_t immLo = (imm & 0x3) << 29;
456 uint32_t immHi = (imm & 0x1FFFFC) << 3;
457 uint64_t mask = (0x3 << 29) | (0x1FFFFC << 3);
458 write32le(l, (read32le(l) & ~mask) | immLo | immHi);
461 static void writeMaskedBits32le(uint8_t *p, int32_t v, uint32_t mask) {
462 write32le(p, (read32le(p) & ~mask) | v);
465 // Update the immediate field in a AARCH64 ldr, str, and add instruction.
466 static void write32Imm12(uint8_t *l, uint64_t imm) {
467 writeMaskedBits32le(l, (imm & 0xFFF) << 10, 0xFFF << 10);
470 // Update the immediate field in an AArch64 movk, movn or movz instruction
471 // for a signed relocation, and update the opcode of a movn or movz instruction
472 // to match the sign of the operand.
473 static void writeSMovWImm(uint8_t *loc, uint32_t imm) {
474 uint32_t inst = read32le(loc);
475 // Opcode field is bits 30, 29, with 10 = movz, 00 = movn and 11 = movk.
476 if (!(inst & (1 << 29))) {
477 // movn or movz.
478 if (imm & 0x10000) {
479 // Change opcode to movn, which takes an inverted operand.
480 imm ^= 0xFFFF;
481 inst &= ~(1 << 30);
482 } else {
483 // Change opcode to movz.
484 inst |= 1 << 30;
487 write32le(loc, inst | ((imm & 0xFFFF) << 5));
490 void AArch64::relocate(uint8_t *loc, const Relocation &rel,
491 uint64_t val) const {
492 switch (rel.type) {
493 case R_AARCH64_ABS16:
494 case R_AARCH64_PREL16:
495 checkIntUInt(ctx, loc, val, 16, rel);
496 write16(ctx, loc, val);
497 break;
498 case R_AARCH64_ABS32:
499 case R_AARCH64_PREL32:
500 checkIntUInt(ctx, loc, val, 32, rel);
501 write32(ctx, loc, val);
502 break;
503 case R_AARCH64_PLT32:
504 case R_AARCH64_GOTPCREL32:
505 checkInt(ctx, loc, val, 32, rel);
506 write32(ctx, loc, val);
507 break;
508 case R_AARCH64_ABS64:
509 // AArch64 relocations to tagged symbols have extended semantics, as
510 // described here:
511 // https://github.com/ARM-software/abi-aa/blob/main/memtagabielf64/memtagabielf64.rst#841extended-semantics-of-r_aarch64_relative.
512 // tl;dr: encode the symbol's special addend in the place, which is an
513 // offset to the point where the logical tag is derived from. Quick hack, if
514 // the addend is within the symbol's bounds, no need to encode the tag
515 // derivation offset.
516 if (rel.sym && rel.sym->isTagged() &&
517 (rel.addend < 0 ||
518 rel.addend >= static_cast<int64_t>(rel.sym->getSize())))
519 write64(ctx, loc, -rel.addend);
520 else
521 write64(ctx, loc, val);
522 break;
523 case R_AARCH64_PREL64:
524 write64(ctx, loc, val);
525 break;
526 case R_AARCH64_AUTH_ABS64:
527 // If val is wider than 32 bits, the relocation must have been moved from
528 // .relr.auth.dyn to .rela.dyn, and the addend write is not needed.
530 // If val fits in 32 bits, we have two potential scenarios:
531 // * True RELR: Write the 32-bit `val`.
532 // * RELA: Even if the value now fits in 32 bits, it might have been
533 // converted from RELR during an iteration in
534 // finalizeAddressDependentContent(). Writing the value is harmless
535 // because dynamic linking ignores it.
536 if (isInt<32>(val))
537 write32(ctx, loc, val);
538 break;
539 case R_AARCH64_ADD_ABS_LO12_NC:
540 case R_AARCH64_AUTH_GOT_ADD_LO12_NC:
541 write32Imm12(loc, val);
542 break;
543 case R_AARCH64_ADR_GOT_PAGE:
544 case R_AARCH64_AUTH_ADR_GOT_PAGE:
545 case R_AARCH64_ADR_PREL_PG_HI21:
546 case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
547 case R_AARCH64_TLSDESC_ADR_PAGE21:
548 checkInt(ctx, loc, val, 33, rel);
549 [[fallthrough]];
550 case R_AARCH64_ADR_PREL_PG_HI21_NC:
551 write32AArch64Addr(loc, val >> 12);
552 break;
553 case R_AARCH64_ADR_PREL_LO21:
554 case R_AARCH64_AUTH_GOT_ADR_PREL_LO21:
555 checkInt(ctx, loc, val, 21, rel);
556 write32AArch64Addr(loc, val);
557 break;
558 case R_AARCH64_JUMP26:
559 // Normally we would just write the bits of the immediate field, however
560 // when patching instructions for the cpu errata fix -fix-cortex-a53-843419
561 // we want to replace a non-branch instruction with a branch immediate
562 // instruction. By writing all the bits of the instruction including the
563 // opcode and the immediate (0 001 | 01 imm26) we can do this
564 // transformation by placing a R_AARCH64_JUMP26 relocation at the offset of
565 // the instruction we want to patch.
566 write32le(loc, 0x14000000);
567 [[fallthrough]];
568 case R_AARCH64_CALL26:
569 checkInt(ctx, loc, val, 28, rel);
570 writeMaskedBits32le(loc, (val & 0x0FFFFFFC) >> 2, 0x0FFFFFFC >> 2);
571 break;
572 case R_AARCH64_CONDBR19:
573 case R_AARCH64_LD_PREL_LO19:
574 case R_AARCH64_GOT_LD_PREL19:
575 case R_AARCH64_AUTH_GOT_LD_PREL19:
576 checkAlignment(ctx, loc, val, 4, rel);
577 checkInt(ctx, loc, val, 21, rel);
578 writeMaskedBits32le(loc, (val & 0x1FFFFC) << 3, 0x1FFFFC << 3);
579 break;
580 case R_AARCH64_LDST8_ABS_LO12_NC:
581 case R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC:
582 write32Imm12(loc, getBits(val, 0, 11));
583 break;
584 case R_AARCH64_LDST16_ABS_LO12_NC:
585 case R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC:
586 checkAlignment(ctx, loc, val, 2, rel);
587 write32Imm12(loc, getBits(val, 1, 11));
588 break;
589 case R_AARCH64_LDST32_ABS_LO12_NC:
590 case R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC:
591 checkAlignment(ctx, loc, val, 4, rel);
592 write32Imm12(loc, getBits(val, 2, 11));
593 break;
594 case R_AARCH64_LDST64_ABS_LO12_NC:
595 case R_AARCH64_LD64_GOT_LO12_NC:
596 case R_AARCH64_AUTH_LD64_GOT_LO12_NC:
597 case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
598 case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC:
599 case R_AARCH64_TLSDESC_LD64_LO12:
600 checkAlignment(ctx, loc, val, 8, rel);
601 write32Imm12(loc, getBits(val, 3, 11));
602 break;
603 case R_AARCH64_LDST128_ABS_LO12_NC:
604 case R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC:
605 checkAlignment(ctx, loc, val, 16, rel);
606 write32Imm12(loc, getBits(val, 4, 11));
607 break;
608 case R_AARCH64_LD64_GOTPAGE_LO15:
609 checkAlignment(ctx, loc, val, 8, rel);
610 write32Imm12(loc, getBits(val, 3, 14));
611 break;
612 case R_AARCH64_MOVW_UABS_G0:
613 checkUInt(ctx, loc, val, 16, rel);
614 [[fallthrough]];
615 case R_AARCH64_MOVW_UABS_G0_NC:
616 writeMaskedBits32le(loc, (val & 0xFFFF) << 5, 0xFFFF << 5);
617 break;
618 case R_AARCH64_MOVW_UABS_G1:
619 checkUInt(ctx, loc, val, 32, rel);
620 [[fallthrough]];
621 case R_AARCH64_MOVW_UABS_G1_NC:
622 writeMaskedBits32le(loc, (val & 0xFFFF0000) >> 11, 0xFFFF0000 >> 11);
623 break;
624 case R_AARCH64_MOVW_UABS_G2:
625 checkUInt(ctx, loc, val, 48, rel);
626 [[fallthrough]];
627 case R_AARCH64_MOVW_UABS_G2_NC:
628 writeMaskedBits32le(loc, (val & 0xFFFF00000000) >> 27,
629 0xFFFF00000000 >> 27);
630 break;
631 case R_AARCH64_MOVW_UABS_G3:
632 writeMaskedBits32le(loc, (val & 0xFFFF000000000000) >> 43,
633 0xFFFF000000000000 >> 43);
634 break;
635 case R_AARCH64_MOVW_PREL_G0:
636 case R_AARCH64_MOVW_SABS_G0:
637 case R_AARCH64_TLSLE_MOVW_TPREL_G0:
638 checkInt(ctx, loc, val, 17, rel);
639 [[fallthrough]];
640 case R_AARCH64_MOVW_PREL_G0_NC:
641 case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC:
642 writeSMovWImm(loc, val);
643 break;
644 case R_AARCH64_MOVW_PREL_G1:
645 case R_AARCH64_MOVW_SABS_G1:
646 case R_AARCH64_TLSLE_MOVW_TPREL_G1:
647 checkInt(ctx, loc, val, 33, rel);
648 [[fallthrough]];
649 case R_AARCH64_MOVW_PREL_G1_NC:
650 case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC:
651 writeSMovWImm(loc, val >> 16);
652 break;
653 case R_AARCH64_MOVW_PREL_G2:
654 case R_AARCH64_MOVW_SABS_G2:
655 case R_AARCH64_TLSLE_MOVW_TPREL_G2:
656 checkInt(ctx, loc, val, 49, rel);
657 [[fallthrough]];
658 case R_AARCH64_MOVW_PREL_G2_NC:
659 writeSMovWImm(loc, val >> 32);
660 break;
661 case R_AARCH64_MOVW_PREL_G3:
662 writeSMovWImm(loc, val >> 48);
663 break;
664 case R_AARCH64_TSTBR14:
665 checkInt(ctx, loc, val, 16, rel);
666 writeMaskedBits32le(loc, (val & 0xFFFC) << 3, 0xFFFC << 3);
667 break;
668 case R_AARCH64_TLSLE_ADD_TPREL_HI12:
669 checkUInt(ctx, loc, val, 24, rel);
670 write32Imm12(loc, val >> 12);
671 break;
672 case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
673 case R_AARCH64_TLSDESC_ADD_LO12:
674 write32Imm12(loc, val);
675 break;
676 case R_AARCH64_TLSDESC:
677 // For R_AARCH64_TLSDESC the addend is stored in the second 64-bit word.
678 write64(ctx, loc + 8, val);
679 break;
680 default:
681 llvm_unreachable("unknown relocation");
685 void AArch64::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
686 uint64_t val) const {
687 // TLSDESC Global-Dynamic relocation are in the form:
688 // adrp x0, :tlsdesc:v [R_AARCH64_TLSDESC_ADR_PAGE21]
689 // ldr x1, [x0, #:tlsdesc_lo12:v [R_AARCH64_TLSDESC_LD64_LO12]
690 // add x0, x0, :tlsdesc_los:v [R_AARCH64_TLSDESC_ADD_LO12]
691 // .tlsdesccall [R_AARCH64_TLSDESC_CALL]
692 // blr x1
693 // And it can optimized to:
694 // movz x0, #0x0, lsl #16
695 // movk x0, #0x10
696 // nop
697 // nop
698 checkUInt(ctx, loc, val, 32, rel);
700 switch (rel.type) {
701 case R_AARCH64_TLSDESC_ADD_LO12:
702 case R_AARCH64_TLSDESC_CALL:
703 write32le(loc, 0xd503201f); // nop
704 return;
705 case R_AARCH64_TLSDESC_ADR_PAGE21:
706 write32le(loc, 0xd2a00000 | (((val >> 16) & 0xffff) << 5)); // movz
707 return;
708 case R_AARCH64_TLSDESC_LD64_LO12:
709 write32le(loc, 0xf2800000 | ((val & 0xffff) << 5)); // movk
710 return;
711 default:
712 llvm_unreachable("unsupported relocation for TLS GD to LE relaxation");
716 void AArch64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
717 uint64_t val) const {
718 // TLSDESC Global-Dynamic relocation are in the form:
719 // adrp x0, :tlsdesc:v [R_AARCH64_TLSDESC_ADR_PAGE21]
720 // ldr x1, [x0, #:tlsdesc_lo12:v [R_AARCH64_TLSDESC_LD64_LO12]
721 // add x0, x0, :tlsdesc_los:v [R_AARCH64_TLSDESC_ADD_LO12]
722 // .tlsdesccall [R_AARCH64_TLSDESC_CALL]
723 // blr x1
724 // And it can optimized to:
725 // adrp x0, :gottprel:v
726 // ldr x0, [x0, :gottprel_lo12:v]
727 // nop
728 // nop
730 switch (rel.type) {
731 case R_AARCH64_TLSDESC_ADD_LO12:
732 case R_AARCH64_TLSDESC_CALL:
733 write32le(loc, 0xd503201f); // nop
734 break;
735 case R_AARCH64_TLSDESC_ADR_PAGE21:
736 write32le(loc, 0x90000000); // adrp
737 relocateNoSym(loc, R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21, val);
738 break;
739 case R_AARCH64_TLSDESC_LD64_LO12:
740 write32le(loc, 0xf9400000); // ldr
741 relocateNoSym(loc, R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC, val);
742 break;
743 default:
744 llvm_unreachable("unsupported relocation for TLS GD to LE relaxation");
748 void AArch64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
749 uint64_t val) const {
750 checkUInt(ctx, loc, val, 32, rel);
752 if (rel.type == R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21) {
753 // Generate MOVZ.
754 uint32_t regNo = read32le(loc) & 0x1f;
755 write32le(loc, (0xd2a00000 | regNo) | (((val >> 16) & 0xffff) << 5));
756 return;
758 if (rel.type == R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC) {
759 // Generate MOVK.
760 uint32_t regNo = read32le(loc) & 0x1f;
761 write32le(loc, (0xf2800000 | regNo) | ((val & 0xffff) << 5));
762 return;
764 llvm_unreachable("invalid relocation for TLS IE to LE relaxation");
767 AArch64Relaxer::AArch64Relaxer(Ctx &ctx, ArrayRef<Relocation> relocs)
768 : ctx(ctx) {
769 if (!ctx.arg.relax)
770 return;
771 // Check if R_AARCH64_ADR_GOT_PAGE and R_AARCH64_LD64_GOT_LO12_NC
772 // always appear in pairs.
773 size_t i = 0;
774 const size_t size = relocs.size();
775 for (; i != size; ++i) {
776 if (relocs[i].type == R_AARCH64_ADR_GOT_PAGE) {
777 if (i + 1 < size && relocs[i + 1].type == R_AARCH64_LD64_GOT_LO12_NC) {
778 ++i;
779 continue;
781 break;
782 } else if (relocs[i].type == R_AARCH64_LD64_GOT_LO12_NC) {
783 break;
786 safeToRelaxAdrpLdr = i == size;
789 bool AArch64Relaxer::tryRelaxAdrpAdd(const Relocation &adrpRel,
790 const Relocation &addRel, uint64_t secAddr,
791 uint8_t *buf) const {
792 // When the address of sym is within the range of ADR then
793 // we may relax
794 // ADRP xn, sym
795 // ADD xn, xn, :lo12: sym
796 // to
797 // NOP
798 // ADR xn, sym
799 if (!ctx.arg.relax || adrpRel.type != R_AARCH64_ADR_PREL_PG_HI21 ||
800 addRel.type != R_AARCH64_ADD_ABS_LO12_NC)
801 return false;
802 // Check if the relocations apply to consecutive instructions.
803 if (adrpRel.offset + 4 != addRel.offset)
804 return false;
805 if (adrpRel.sym != addRel.sym)
806 return false;
807 if (adrpRel.addend != 0 || addRel.addend != 0)
808 return false;
810 uint32_t adrpInstr = read32le(buf + adrpRel.offset);
811 uint32_t addInstr = read32le(buf + addRel.offset);
812 // Check if the first instruction is ADRP and the second instruction is ADD.
813 if ((adrpInstr & 0x9f000000) != 0x90000000 ||
814 (addInstr & 0xffc00000) != 0x91000000)
815 return false;
816 uint32_t adrpDestReg = adrpInstr & 0x1f;
817 uint32_t addDestReg = addInstr & 0x1f;
818 uint32_t addSrcReg = (addInstr >> 5) & 0x1f;
819 if (adrpDestReg != addDestReg || adrpDestReg != addSrcReg)
820 return false;
822 Symbol &sym = *adrpRel.sym;
823 // Check if the address difference is within 1MiB range.
824 int64_t val = sym.getVA(ctx) - (secAddr + addRel.offset);
825 if (val < -1024 * 1024 || val >= 1024 * 1024)
826 return false;
828 Relocation adrRel = {R_ABS, R_AARCH64_ADR_PREL_LO21, addRel.offset,
829 /*addend=*/0, &sym};
830 // nop
831 write32le(buf + adrpRel.offset, 0xd503201f);
832 // adr x_<dest_reg>
833 write32le(buf + adrRel.offset, 0x10000000 | adrpDestReg);
834 ctx.target->relocate(buf + adrRel.offset, adrRel, val);
835 return true;
838 bool AArch64Relaxer::tryRelaxAdrpLdr(const Relocation &adrpRel,
839 const Relocation &ldrRel, uint64_t secAddr,
840 uint8_t *buf) const {
841 if (!safeToRelaxAdrpLdr)
842 return false;
844 // When the definition of sym is not preemptible then we may
845 // be able to relax
846 // ADRP xn, :got: sym
847 // LDR xn, [ xn :got_lo12: sym]
848 // to
849 // ADRP xn, sym
850 // ADD xn, xn, :lo_12: sym
852 if (adrpRel.type != R_AARCH64_ADR_GOT_PAGE ||
853 ldrRel.type != R_AARCH64_LD64_GOT_LO12_NC)
854 return false;
855 // Check if the relocations apply to consecutive instructions.
856 if (adrpRel.offset + 4 != ldrRel.offset)
857 return false;
858 // Check if the relocations reference the same symbol and
859 // skip undefined, preemptible and STT_GNU_IFUNC symbols.
860 if (!adrpRel.sym || adrpRel.sym != ldrRel.sym || !adrpRel.sym->isDefined() ||
861 adrpRel.sym->isPreemptible || adrpRel.sym->isGnuIFunc())
862 return false;
863 // Check if the addends of the both relocations are zero.
864 if (adrpRel.addend != 0 || ldrRel.addend != 0)
865 return false;
866 uint32_t adrpInstr = read32le(buf + adrpRel.offset);
867 uint32_t ldrInstr = read32le(buf + ldrRel.offset);
868 // Check if the first instruction is ADRP and the second instruction is LDR.
869 if ((adrpInstr & 0x9f000000) != 0x90000000 ||
870 (ldrInstr & 0x3b000000) != 0x39000000)
871 return false;
872 // Check the value of the sf bit.
873 if (!(ldrInstr >> 31))
874 return false;
875 uint32_t adrpDestReg = adrpInstr & 0x1f;
876 uint32_t ldrDestReg = ldrInstr & 0x1f;
877 uint32_t ldrSrcReg = (ldrInstr >> 5) & 0x1f;
878 // Check if ADPR and LDR use the same register.
879 if (adrpDestReg != ldrDestReg || adrpDestReg != ldrSrcReg)
880 return false;
882 Symbol &sym = *adrpRel.sym;
883 // GOT references to absolute symbols can't be relaxed to use ADRP/ADD in
884 // position-independent code because these instructions produce a relative
885 // address.
886 if (ctx.arg.isPic && !cast<Defined>(sym).section)
887 return false;
888 // Check if the address difference is within 4GB range.
889 int64_t val =
890 getAArch64Page(sym.getVA(ctx)) - getAArch64Page(secAddr + adrpRel.offset);
891 if (val != llvm::SignExtend64(val, 33))
892 return false;
894 Relocation adrpSymRel = {RE_AARCH64_PAGE_PC, R_AARCH64_ADR_PREL_PG_HI21,
895 adrpRel.offset, /*addend=*/0, &sym};
896 Relocation addRel = {R_ABS, R_AARCH64_ADD_ABS_LO12_NC, ldrRel.offset,
897 /*addend=*/0, &sym};
899 // adrp x_<dest_reg>
900 write32le(buf + adrpSymRel.offset, 0x90000000 | adrpDestReg);
901 // add x_<dest reg>, x_<dest reg>
902 write32le(buf + addRel.offset, 0x91000000 | adrpDestReg | (adrpDestReg << 5));
904 ctx.target->relocate(
905 buf + adrpSymRel.offset, adrpSymRel,
906 SignExtend64(getAArch64Page(sym.getVA(ctx)) -
907 getAArch64Page(secAddr + adrpSymRel.offset),
908 64));
909 ctx.target->relocate(buf + addRel.offset, addRel,
910 SignExtend64(sym.getVA(ctx), 64));
911 tryRelaxAdrpAdd(adrpSymRel, addRel, secAddr, buf);
912 return true;
915 // Tagged symbols have upper address bits that are added by the dynamic loader,
916 // and thus need the full 64-bit GOT entry. Do not relax such symbols.
917 static bool needsGotForMemtag(const Relocation &rel) {
918 return rel.sym->isTagged() && needsGot(rel.expr);
921 void AArch64::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
922 uint64_t secAddr = sec.getOutputSection()->addr;
923 if (auto *s = dyn_cast<InputSection>(&sec))
924 secAddr += s->outSecOff;
925 else if (auto *ehIn = dyn_cast<EhInputSection>(&sec))
926 secAddr += ehIn->getParent()->outSecOff;
927 AArch64Relaxer relaxer(ctx, sec.relocs());
928 for (size_t i = 0, size = sec.relocs().size(); i != size; ++i) {
929 const Relocation &rel = sec.relocs()[i];
930 uint8_t *loc = buf + rel.offset;
931 const uint64_t val = sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset);
933 if (needsGotForMemtag(rel)) {
934 relocate(loc, rel, val);
935 continue;
938 switch (rel.expr) {
939 case RE_AARCH64_GOT_PAGE_PC:
940 if (i + 1 < size &&
941 relaxer.tryRelaxAdrpLdr(rel, sec.relocs()[i + 1], secAddr, buf)) {
942 ++i;
943 continue;
945 break;
946 case RE_AARCH64_PAGE_PC:
947 if (i + 1 < size &&
948 relaxer.tryRelaxAdrpAdd(rel, sec.relocs()[i + 1], secAddr, buf)) {
949 ++i;
950 continue;
952 break;
953 case RE_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC:
954 case R_RELAX_TLS_GD_TO_IE_ABS:
955 relaxTlsGdToIe(loc, rel, val);
956 continue;
957 case R_RELAX_TLS_GD_TO_LE:
958 relaxTlsGdToLe(loc, rel, val);
959 continue;
960 case R_RELAX_TLS_IE_TO_LE:
961 relaxTlsIeToLe(loc, rel, val);
962 continue;
963 default:
964 break;
966 relocate(loc, rel, val);
970 // AArch64 may use security features in variant PLT sequences. These are:
971 // Pointer Authentication (PAC), introduced in armv8.3-a and Branch Target
972 // Indicator (BTI) introduced in armv8.5-a. The additional instructions used
973 // in the variant Plt sequences are encoded in the Hint space so they can be
974 // deployed on older architectures, which treat the instructions as a nop.
975 // PAC and BTI can be combined leading to the following combinations:
976 // writePltHeader
977 // writePltHeaderBti (no PAC Header needed)
978 // writePlt
979 // writePltBti (BTI only)
980 // writePltPac (PAC only)
981 // writePltBtiPac (BTI and PAC)
983 // When PAC is enabled the dynamic loader encrypts the address that it places
984 // in the .got.plt using the pacia1716 instruction which encrypts the value in
985 // x17 using the modifier in x16. The static linker places autia1716 before the
986 // indirect branch to x17 to authenticate the address in x17 with the modifier
987 // in x16. This makes it more difficult for an attacker to modify the value in
988 // the .got.plt.
990 // When BTI is enabled all indirect branches must land on a bti instruction.
991 // The static linker must place a bti instruction at the start of any PLT entry
992 // that may be the target of an indirect branch. As the PLT entries call the
993 // lazy resolver indirectly this must have a bti instruction at start. In
994 // general a bti instruction is not needed for a PLT entry as indirect calls
995 // are resolved to the function address and not the PLT entry for the function.
996 // There are a small number of cases where the PLT address can escape, such as
997 // taking the address of a function or ifunc via a non got-generating
998 // relocation, and a shared library refers to that symbol.
1000 // We use the bti c variant of the instruction which permits indirect branches
1001 // (br) via x16/x17 and indirect function calls (blr) via any register. The ABI
1002 // guarantees that all indirect branches from code requiring BTI protection
1003 // will go via x16/x17
1005 namespace {
1006 class AArch64BtiPac final : public AArch64 {
1007 public:
1008 AArch64BtiPac(Ctx &);
1009 void writePltHeader(uint8_t *buf) const override;
1010 void writePlt(uint8_t *buf, const Symbol &sym,
1011 uint64_t pltEntryAddr) const override;
1013 private:
1014 bool btiHeader; // bti instruction needed in PLT Header and Entry
1015 enum {
1016 PEK_NoAuth,
1017 PEK_AuthHint, // use autia1716 instr for authenticated branch in PLT entry
1018 PEK_Auth, // use braa instr for authenticated branch in PLT entry
1019 } pacEntryKind;
1021 } // namespace
1023 AArch64BtiPac::AArch64BtiPac(Ctx &ctx) : AArch64(ctx) {
1024 btiHeader = (ctx.arg.andFeatures & GNU_PROPERTY_AARCH64_FEATURE_1_BTI);
1025 // A BTI (Branch Target Indicator) Plt Entry is only required if the
1026 // address of the PLT entry can be taken by the program, which permits an
1027 // indirect jump to the PLT entry. This can happen when the address
1028 // of the PLT entry for a function is canonicalised due to the address of
1029 // the function in an executable being taken by a shared library, or
1030 // non-preemptible ifunc referenced by non-GOT-generating, non-PLT-generating
1031 // relocations.
1032 // The PAC PLT entries require dynamic loader support and this isn't known
1033 // from properties in the objects, so we use the command line flag.
1034 // By default we only use hint-space instructions, but if we detect the
1035 // PAuthABI, which requires v8.3-A, we can use the non-hint space
1036 // instructions.
1038 if (ctx.arg.zPacPlt) {
1039 if (llvm::any_of(ctx.aarch64PauthAbiCoreInfo,
1040 [](uint8_t c) { return c != 0; }))
1041 pacEntryKind = PEK_Auth;
1042 else
1043 pacEntryKind = PEK_AuthHint;
1044 } else {
1045 pacEntryKind = PEK_NoAuth;
1048 if (btiHeader || (pacEntryKind != PEK_NoAuth)) {
1049 pltEntrySize = 24;
1050 ipltEntrySize = 24;
1054 void AArch64BtiPac::writePltHeader(uint8_t *buf) const {
1055 const uint8_t btiData[] = { 0x5f, 0x24, 0x03, 0xd5 }; // bti c
1056 const uint8_t pltData[] = {
1057 0xf0, 0x7b, 0xbf, 0xa9, // stp x16, x30, [sp,#-16]!
1058 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.got.plt[2]))
1059 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.got.plt[2]))]
1060 0x10, 0x02, 0x00, 0x91, // add x16, x16, Offset(&(.got.plt[2]))
1061 0x20, 0x02, 0x1f, 0xd6, // br x17
1062 0x1f, 0x20, 0x03, 0xd5, // nop
1063 0x1f, 0x20, 0x03, 0xd5 // nop
1065 const uint8_t nopData[] = { 0x1f, 0x20, 0x03, 0xd5 }; // nop
1067 uint64_t got = ctx.in.gotPlt->getVA();
1068 uint64_t plt = ctx.in.plt->getVA();
1070 if (btiHeader) {
1071 // PltHeader is called indirectly by plt[N]. Prefix pltData with a BTI C
1072 // instruction.
1073 memcpy(buf, btiData, sizeof(btiData));
1074 buf += sizeof(btiData);
1075 plt += sizeof(btiData);
1077 memcpy(buf, pltData, sizeof(pltData));
1079 relocateNoSym(buf + 4, R_AARCH64_ADR_PREL_PG_HI21,
1080 getAArch64Page(got + 16) - getAArch64Page(plt + 4));
1081 relocateNoSym(buf + 8, R_AARCH64_LDST64_ABS_LO12_NC, got + 16);
1082 relocateNoSym(buf + 12, R_AARCH64_ADD_ABS_LO12_NC, got + 16);
1083 if (!btiHeader)
1084 // We didn't add the BTI c instruction so round out size with NOP.
1085 memcpy(buf + sizeof(pltData), nopData, sizeof(nopData));
1088 void AArch64BtiPac::writePlt(uint8_t *buf, const Symbol &sym,
1089 uint64_t pltEntryAddr) const {
1090 // The PLT entry is of the form:
1091 // [btiData] addrInst (pacBr | stdBr) [nopData]
1092 const uint8_t btiData[] = { 0x5f, 0x24, 0x03, 0xd5 }; // bti c
1093 const uint8_t addrInst[] = {
1094 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.got.plt[n]))
1095 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.got.plt[n]))]
1096 0x10, 0x02, 0x00, 0x91 // add x16, x16, Offset(&(.got.plt[n]))
1098 const uint8_t pacHintBr[] = {
1099 0x9f, 0x21, 0x03, 0xd5, // autia1716
1100 0x20, 0x02, 0x1f, 0xd6 // br x17
1102 const uint8_t pacBr[] = {
1103 0x30, 0x0a, 0x1f, 0xd7, // braa x17, x16
1104 0x1f, 0x20, 0x03, 0xd5 // nop
1106 const uint8_t stdBr[] = {
1107 0x20, 0x02, 0x1f, 0xd6, // br x17
1108 0x1f, 0x20, 0x03, 0xd5 // nop
1110 const uint8_t nopData[] = { 0x1f, 0x20, 0x03, 0xd5 }; // nop
1112 // NEEDS_COPY indicates a non-ifunc canonical PLT entry whose address may
1113 // escape to shared objects. isInIplt indicates a non-preemptible ifunc. Its
1114 // address may escape if referenced by a direct relocation. If relative
1115 // vtables are used then if the vtable is in a shared object the offsets will
1116 // be to the PLT entry. The condition is conservative.
1117 bool hasBti = btiHeader &&
1118 (sym.hasFlag(NEEDS_COPY) || sym.isInIplt || sym.thunkAccessed);
1119 if (hasBti) {
1120 memcpy(buf, btiData, sizeof(btiData));
1121 buf += sizeof(btiData);
1122 pltEntryAddr += sizeof(btiData);
1125 uint64_t gotPltEntryAddr = sym.getGotPltVA(ctx);
1126 memcpy(buf, addrInst, sizeof(addrInst));
1127 relocateNoSym(buf, R_AARCH64_ADR_PREL_PG_HI21,
1128 getAArch64Page(gotPltEntryAddr) - getAArch64Page(pltEntryAddr));
1129 relocateNoSym(buf + 4, R_AARCH64_LDST64_ABS_LO12_NC, gotPltEntryAddr);
1130 relocateNoSym(buf + 8, R_AARCH64_ADD_ABS_LO12_NC, gotPltEntryAddr);
1132 if (pacEntryKind != PEK_NoAuth)
1133 memcpy(buf + sizeof(addrInst),
1134 pacEntryKind == PEK_AuthHint ? pacHintBr : pacBr,
1135 sizeof(pacEntryKind == PEK_AuthHint ? pacHintBr : pacBr));
1136 else
1137 memcpy(buf + sizeof(addrInst), stdBr, sizeof(stdBr));
1138 if (!hasBti)
1139 // We didn't add the BTI c instruction so round out size with NOP.
1140 memcpy(buf + sizeof(addrInst) + sizeof(stdBr), nopData, sizeof(nopData));
1143 template <class ELFT>
1144 static void
1145 addTaggedSymbolReferences(Ctx &ctx, InputSectionBase &sec,
1146 DenseMap<Symbol *, unsigned> &referenceCount) {
1147 assert(sec.type == SHT_AARCH64_MEMTAG_GLOBALS_STATIC);
1149 const RelsOrRelas<ELFT> rels = sec.relsOrRelas<ELFT>();
1150 if (rels.areRelocsRel())
1151 ErrAlways(ctx)
1152 << "non-RELA relocations are not allowed with memtag globals";
1154 for (const typename ELFT::Rela &rel : rels.relas) {
1155 Symbol &sym = sec.file->getRelocTargetSym(rel);
1156 // Linker-synthesized symbols such as __executable_start may be referenced
1157 // as tagged in input objfiles, and we don't want them to be tagged. A
1158 // cheap way to exclude them is the type check, but their type is
1159 // STT_NOTYPE. In addition, this save us from checking untaggable symbols,
1160 // like functions or TLS symbols.
1161 if (sym.type != STT_OBJECT)
1162 continue;
1163 // STB_LOCAL symbols can't be referenced from outside the object file, and
1164 // thus don't need to be checked for references from other object files.
1165 if (sym.binding == STB_LOCAL) {
1166 sym.setIsTagged(true);
1167 continue;
1169 ++referenceCount[&sym];
1171 sec.markDead();
1174 // A tagged symbol must be denoted as being tagged by all references and the
1175 // chosen definition. For simplicity, here, it must also be denoted as tagged
1176 // for all definitions. Otherwise:
1178 // 1. A tagged definition can be used by an untagged declaration, in which case
1179 // the untagged access may be PC-relative, causing a tag mismatch at
1180 // runtime.
1181 // 2. An untagged definition can be used by a tagged declaration, where the
1182 // compiler has taken advantage of the increased alignment of the tagged
1183 // declaration, but the alignment at runtime is wrong, causing a fault.
1185 // Ideally, this isn't a problem, as any TU that imports or exports tagged
1186 // symbols should also be built with tagging. But, to handle these cases, we
1187 // demote the symbol to be untagged.
1188 void elf::createTaggedSymbols(Ctx &ctx) {
1189 assert(hasMemtag(ctx));
1191 // First, collect all symbols that are marked as tagged, and count how many
1192 // times they're marked as tagged.
1193 DenseMap<Symbol *, unsigned> taggedSymbolReferenceCount;
1194 for (InputFile *file : ctx.objectFiles) {
1195 if (file->kind() != InputFile::ObjKind)
1196 continue;
1197 for (InputSectionBase *section : file->getSections()) {
1198 if (!section || section->type != SHT_AARCH64_MEMTAG_GLOBALS_STATIC ||
1199 section == &InputSection::discarded)
1200 continue;
1201 invokeELFT(addTaggedSymbolReferences, ctx, *section,
1202 taggedSymbolReferenceCount);
1206 // Now, go through all the symbols. If the number of declarations +
1207 // definitions to a symbol exceeds the amount of times they're marked as
1208 // tagged, it means we have an objfile that uses the untagged variant of the
1209 // symbol.
1210 for (InputFile *file : ctx.objectFiles) {
1211 if (file->kind() != InputFile::BinaryKind &&
1212 file->kind() != InputFile::ObjKind)
1213 continue;
1215 for (Symbol *symbol : file->getSymbols()) {
1216 // See `addTaggedSymbolReferences` for more details.
1217 if (symbol->type != STT_OBJECT ||
1218 symbol->binding == STB_LOCAL)
1219 continue;
1220 auto it = taggedSymbolReferenceCount.find(symbol);
1221 if (it == taggedSymbolReferenceCount.end()) continue;
1222 unsigned &remainingAllowedTaggedRefs = it->second;
1223 if (remainingAllowedTaggedRefs == 0) {
1224 taggedSymbolReferenceCount.erase(it);
1225 continue;
1227 --remainingAllowedTaggedRefs;
1231 // `addTaggedSymbolReferences` has already checked that we have RELA
1232 // relocations, the only other way to get written addends is with
1233 // --apply-dynamic-relocs.
1234 if (!taggedSymbolReferenceCount.empty() && ctx.arg.writeAddends)
1235 ErrAlways(ctx) << "--apply-dynamic-relocs cannot be used with MTE globals";
1237 // Now, `taggedSymbolReferenceCount` should only contain symbols that are
1238 // defined as tagged exactly the same amount as it's referenced, meaning all
1239 // uses are tagged.
1240 for (auto &[symbol, remainingTaggedRefs] : taggedSymbolReferenceCount) {
1241 assert(remainingTaggedRefs == 0 &&
1242 "Symbol is defined as tagged more times than it's used");
1243 symbol->setIsTagged(true);
1247 void elf::setAArch64TargetInfo(Ctx &ctx) {
1248 if ((ctx.arg.andFeatures & GNU_PROPERTY_AARCH64_FEATURE_1_BTI) ||
1249 ctx.arg.zPacPlt)
1250 ctx.target.reset(new AArch64BtiPac(ctx));
1251 else
1252 ctx.target.reset(new AArch64(ctx));