1 //===- ARM.cpp ------------------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "InputFiles.h"
10 #include "OutputSections.h"
11 #include "SymbolTable.h"
13 #include "SyntheticSections.h"
15 #include "lld/Common/ErrorHandler.h"
16 #include "lld/Common/Filesystem.h"
17 #include "llvm/BinaryFormat/ELF.h"
18 #include "llvm/Support/Endian.h"
21 using namespace llvm::support::endian
;
22 using namespace llvm::support
;
23 using namespace llvm::ELF
;
25 using namespace lld::elf
;
26 using namespace llvm::object
;
29 class ARM final
: public TargetInfo
{
32 uint32_t calcEFlags() const override
;
33 RelExpr
getRelExpr(RelType type
, const Symbol
&s
,
34 const uint8_t *loc
) const override
;
35 RelType
getDynRel(RelType type
) const override
;
36 int64_t getImplicitAddend(const uint8_t *buf
, RelType type
) const override
;
37 void writeGotPlt(uint8_t *buf
, const Symbol
&s
) const override
;
38 void writeIgotPlt(uint8_t *buf
, const Symbol
&s
) const override
;
39 void writePltHeader(uint8_t *buf
) const override
;
40 void writePlt(uint8_t *buf
, const Symbol
&sym
,
41 uint64_t pltEntryAddr
) const override
;
42 void addPltSymbols(InputSection
&isec
, uint64_t off
) const override
;
43 void addPltHeaderSymbols(InputSection
&isd
) const override
;
44 bool needsThunk(RelExpr expr
, RelType type
, const InputFile
*file
,
45 uint64_t branchAddr
, const Symbol
&s
,
46 int64_t a
) const override
;
47 uint32_t getThunkSectionSpacing() const override
;
48 bool inBranchRange(RelType type
, uint64_t src
, uint64_t dst
) const override
;
49 void relocate(uint8_t *loc
, const Relocation
&rel
,
50 uint64_t val
) const override
;
52 DenseMap
<InputSection
*, SmallVector
<const Defined
*, 0>> sectionMap
;
55 void encodeAluGroup(uint8_t *loc
, const Relocation
&rel
, uint64_t val
,
56 int group
, bool check
) const;
58 enum class CodeState
{ Data
= 0, Thumb
= 2, Arm
= 4 };
61 ARM::ARM(Ctx
&ctx
) : TargetInfo(ctx
) {
63 relativeRel
= R_ARM_RELATIVE
;
64 iRelativeRel
= R_ARM_IRELATIVE
;
65 gotRel
= R_ARM_GLOB_DAT
;
66 pltRel
= R_ARM_JUMP_SLOT
;
67 symbolicRel
= R_ARM_ABS32
;
68 tlsGotRel
= R_ARM_TLS_TPOFF32
;
69 tlsModuleIndexRel
= R_ARM_TLS_DTPMOD32
;
70 tlsOffsetRel
= R_ARM_TLS_DTPOFF32
;
74 trapInstr
= {0xd4, 0xd4, 0xd4, 0xd4};
76 defaultMaxPageSize
= 65536;
79 uint32_t ARM::calcEFlags() const {
80 // The ABIFloatType is used by loaders to detect the floating point calling
82 uint32_t abiFloatType
= 0;
84 // Set the EF_ARM_BE8 flag in the ELF header, if ELF file is big-endian
88 if (ctx
.arg
.armVFPArgs
== ARMVFPArgKind::Base
||
89 ctx
.arg
.armVFPArgs
== ARMVFPArgKind::Default
)
90 abiFloatType
= EF_ARM_ABI_FLOAT_SOFT
;
91 else if (ctx
.arg
.armVFPArgs
== ARMVFPArgKind::VFP
)
92 abiFloatType
= EF_ARM_ABI_FLOAT_HARD
;
94 if (!ctx
.arg
.isLE
&& ctx
.arg
.armBe8
)
97 // We don't currently use any features incompatible with EF_ARM_EABI_VER5,
98 // but we don't have any firm guarantees of conformance. Linux AArch64
99 // kernels (as of 2016) require an EABI version to be set.
100 return EF_ARM_EABI_VER5
| abiFloatType
| armBE8
;
103 RelExpr
ARM::getRelExpr(RelType type
, const Symbol
&s
,
104 const uint8_t *loc
) const {
107 case R_ARM_MOVW_ABS_NC
:
109 case R_ARM_THM_MOVW_ABS_NC
:
110 case R_ARM_THM_MOVT_ABS
:
111 case R_ARM_THM_ALU_ABS_G0_NC
:
112 case R_ARM_THM_ALU_ABS_G1_NC
:
113 case R_ARM_THM_ALU_ABS_G2_NC
:
114 case R_ARM_THM_ALU_ABS_G3
:
116 case R_ARM_THM_JUMP8
:
117 case R_ARM_THM_JUMP11
:
124 case R_ARM_THM_JUMP19
:
125 case R_ARM_THM_JUMP24
:
132 // GOT(S) + A - GOT_ORG
141 return ctx
.arg
.target1Rel
? R_PC
: R_ABS
;
143 if (ctx
.arg
.target2
== Target2Policy::Rel
)
145 if (ctx
.arg
.target2
== Target2Policy::Abs
)
150 case R_ARM_TLS_LDM32
:
152 case R_ARM_TLS_LDO32
:
154 case R_ARM_BASE_PREL
:
156 // FIXME: currently B(S) assumed to be .got, this may not hold for all
159 case R_ARM_MOVW_PREL_NC
:
160 case R_ARM_MOVT_PREL
:
162 case R_ARM_THM_MOVW_PREL_NC
:
163 case R_ARM_THM_MOVT_PREL
:
165 case R_ARM_ALU_PC_G0
:
166 case R_ARM_ALU_PC_G0_NC
:
167 case R_ARM_ALU_PC_G1
:
168 case R_ARM_ALU_PC_G1_NC
:
169 case R_ARM_ALU_PC_G2
:
170 case R_ARM_LDR_PC_G0
:
171 case R_ARM_LDR_PC_G1
:
172 case R_ARM_LDR_PC_G2
:
173 case R_ARM_LDRS_PC_G0
:
174 case R_ARM_LDRS_PC_G1
:
175 case R_ARM_LDRS_PC_G2
:
176 case R_ARM_THM_ALU_PREL_11_0
:
180 case R_ARM_MOVW_BREL_NC
:
181 case R_ARM_MOVW_BREL
:
182 case R_ARM_MOVT_BREL
:
183 case R_ARM_THM_MOVW_BREL_NC
:
184 case R_ARM_THM_MOVW_BREL
:
185 case R_ARM_THM_MOVT_BREL
:
192 // V4BX is just a marker to indicate there's a "bx rN" instruction at the
193 // given address. It can be used to implement a special linker mode which
194 // rewrites ARMv4T inputs to ARMv4. Since we support only ARMv4 input and
195 // not ARMv4 output, we can just ignore it.
198 Err(ctx
) << getErrorLoc(ctx
, loc
) << "unknown relocation (" << type
.v
199 << ") against symbol " << &s
;
204 RelType
ARM::getDynRel(RelType type
) const {
205 if ((type
== R_ARM_ABS32
) || (type
== R_ARM_TARGET1
&& !ctx
.arg
.target1Rel
))
210 void ARM::writeGotPlt(uint8_t *buf
, const Symbol
&) const {
211 write32(ctx
, buf
, ctx
.in
.plt
->getVA());
214 void ARM::writeIgotPlt(uint8_t *buf
, const Symbol
&s
) const {
215 // An ARM entry is the address of the ifunc resolver function.
216 write32(ctx
, buf
, s
.getVA(ctx
));
219 // Long form PLT Header that does not have any restrictions on the displacement
220 // of the .plt from the .got.plt.
221 static void writePltHeaderLong(Ctx
&ctx
, uint8_t *buf
) {
222 write32(ctx
, buf
+ 0, 0xe52de004); // str lr, [sp,#-4]!
223 write32(ctx
, buf
+ 4, 0xe59fe004); // ldr lr, L2
224 write32(ctx
, buf
+ 8, 0xe08fe00e); // L1: add lr, pc, lr
225 write32(ctx
, buf
+ 12, 0xe5bef008); // ldr pc, [lr, #8]
226 write32(ctx
, buf
+ 16, 0x00000000); // L2: .word &(.got.plt) - L1 - 8
227 write32(ctx
, buf
+ 20, 0xd4d4d4d4); // Pad to 32-byte boundary
228 write32(ctx
, buf
+ 24, 0xd4d4d4d4); // Pad to 32-byte boundary
229 write32(ctx
, buf
+ 28, 0xd4d4d4d4);
230 uint64_t gotPlt
= ctx
.in
.gotPlt
->getVA();
231 uint64_t l1
= ctx
.in
.plt
->getVA() + 8;
232 write32(ctx
, buf
+ 16, gotPlt
- l1
- 8);
235 // True if we should use Thumb PLTs, which currently require Thumb2, and are
236 // only used if the target does not have the ARM ISA.
237 static bool useThumbPLTs(Ctx
&ctx
) {
238 return ctx
.arg
.armHasThumb2ISA
&& !ctx
.arg
.armHasArmISA
;
241 // The default PLT header requires the .got.plt to be within 128 Mb of the
242 // .plt in the positive direction.
243 void ARM::writePltHeader(uint8_t *buf
) const {
244 if (useThumbPLTs(ctx
)) {
245 // The instruction sequence for thumb:
248 // 2: f8df e008 ldr.w lr, [pc, #0x8] @ 0xe <func+0xe>
249 // 6: 44fe add lr, pc
250 // 8: f85e ff08 ldr pc, [lr, #8]!
251 // e: .word .got.plt - .plt - 16
253 // At 0x8, we want to jump to .got.plt, the -16 accounts for 8 bytes from
254 // `pc` in the add instruction and 8 bytes for the `lr` adjustment.
256 uint64_t offset
= ctx
.in
.gotPlt
->getVA() - ctx
.in
.plt
->getVA() - 16;
257 assert(llvm::isUInt
<32>(offset
) && "This should always fit into a 32-bit offset");
258 write16(ctx
, buf
+ 0, 0xb500);
259 // Split into two halves to support endianness correctly.
260 write16(ctx
, buf
+ 2, 0xf8df);
261 write16(ctx
, buf
+ 4, 0xe008);
262 write16(ctx
, buf
+ 6, 0x44fe);
263 // Split into two halves to support endianness correctly.
264 write16(ctx
, buf
+ 8, 0xf85e);
265 write16(ctx
, buf
+ 10, 0xff08);
266 write32(ctx
, buf
+ 12, offset
);
268 memcpy(buf
+ 16, trapInstr
.data(), 4); // Pad to 32-byte boundary
269 memcpy(buf
+ 20, trapInstr
.data(), 4);
270 memcpy(buf
+ 24, trapInstr
.data(), 4);
271 memcpy(buf
+ 28, trapInstr
.data(), 4);
273 // Use a similar sequence to that in writePlt(), the difference is the
274 // calling conventions mean we use lr instead of ip. The PLT entry is
275 // responsible for saving lr on the stack, the dynamic loader is responsible
277 const uint32_t pltData
[] = {
278 0xe52de004, // L1: str lr, [sp,#-4]!
279 0xe28fe600, // add lr, pc, #0x0NN00000 &(.got.plt - L1 - 4)
280 0xe28eea00, // add lr, lr, #0x000NN000 &(.got.plt - L1 - 4)
281 0xe5bef000, // ldr pc, [lr, #0x00000NNN] &(.got.plt -L1 - 4)
284 uint64_t offset
= ctx
.in
.gotPlt
->getVA() - ctx
.in
.plt
->getVA() - 4;
285 if (!llvm::isUInt
<27>(offset
)) {
286 // We cannot encode the Offset, use the long form.
287 writePltHeaderLong(ctx
, buf
);
290 write32(ctx
, buf
+ 0, pltData
[0]);
291 write32(ctx
, buf
+ 4, pltData
[1] | ((offset
>> 20) & 0xff));
292 write32(ctx
, buf
+ 8, pltData
[2] | ((offset
>> 12) & 0xff));
293 write32(ctx
, buf
+ 12, pltData
[3] | (offset
& 0xfff));
294 memcpy(buf
+ 16, trapInstr
.data(), 4); // Pad to 32-byte boundary
295 memcpy(buf
+ 20, trapInstr
.data(), 4);
296 memcpy(buf
+ 24, trapInstr
.data(), 4);
297 memcpy(buf
+ 28, trapInstr
.data(), 4);
301 void ARM::addPltHeaderSymbols(InputSection
&isec
) const {
302 if (useThumbPLTs(ctx
)) {
303 addSyntheticLocal(ctx
, "$t", STT_NOTYPE
, 0, 0, isec
);
304 addSyntheticLocal(ctx
, "$d", STT_NOTYPE
, 12, 0, isec
);
306 addSyntheticLocal(ctx
, "$a", STT_NOTYPE
, 0, 0, isec
);
307 addSyntheticLocal(ctx
, "$d", STT_NOTYPE
, 16, 0, isec
);
311 // Long form PLT entries that do not have any restrictions on the displacement
312 // of the .plt from the .got.plt.
313 static void writePltLong(Ctx
&ctx
, uint8_t *buf
, uint64_t gotPltEntryAddr
,
314 uint64_t pltEntryAddr
) {
315 write32(ctx
, buf
+ 0, 0xe59fc004); // ldr ip, L2
316 write32(ctx
, buf
+ 4, 0xe08cc00f); // L1: add ip, ip, pc
317 write32(ctx
, buf
+ 8, 0xe59cf000); // ldr pc, [ip]
318 write32(ctx
, buf
+ 12, 0x00000000); // L2: .word Offset(&(.got.plt) - L1 - 8
319 uint64_t l1
= pltEntryAddr
+ 4;
320 write32(ctx
, buf
+ 12, gotPltEntryAddr
- l1
- 8);
323 // The default PLT entries require the .got.plt to be within 128 Mb of the
324 // .plt in the positive direction.
325 void ARM::writePlt(uint8_t *buf
, const Symbol
&sym
,
326 uint64_t pltEntryAddr
) const {
327 if (!useThumbPLTs(ctx
)) {
328 uint64_t offset
= sym
.getGotPltVA(ctx
) - pltEntryAddr
- 8;
330 // The PLT entry is similar to the example given in Appendix A of ELF for
331 // the Arm Architecture. Instead of using the Group Relocations to find the
332 // optimal rotation for the 8-bit immediate used in the add instructions we
333 // hard code the most compact rotations for simplicity. This saves a load
334 // instruction over the long plt sequences.
335 const uint32_t pltData
[] = {
336 0xe28fc600, // L1: add ip, pc, #0x0NN00000 Offset(&(.got.plt) - L1 - 8
337 0xe28cca00, // add ip, ip, #0x000NN000 Offset(&(.got.plt) - L1 - 8
338 0xe5bcf000, // ldr pc, [ip, #0x00000NNN] Offset(&(.got.plt) - L1 - 8
340 if (!llvm::isUInt
<27>(offset
)) {
341 // We cannot encode the Offset, use the long form.
342 writePltLong(ctx
, buf
, sym
.getGotPltVA(ctx
), pltEntryAddr
);
345 write32(ctx
, buf
+ 0, pltData
[0] | ((offset
>> 20) & 0xff));
346 write32(ctx
, buf
+ 4, pltData
[1] | ((offset
>> 12) & 0xff));
347 write32(ctx
, buf
+ 8, pltData
[2] | (offset
& 0xfff));
348 memcpy(buf
+ 12, trapInstr
.data(), 4); // Pad to 16-byte boundary
350 uint64_t offset
= sym
.getGotPltVA(ctx
) - pltEntryAddr
- 12;
351 assert(llvm::isUInt
<32>(offset
) && "This should always fit into a 32-bit offset");
353 // A PLT entry will be:
355 // movw ip, #<lower 16 bits>
356 // movt ip, #<upper 16 bits>
358 // L1: ldr.w pc, [ip]
361 // where ip = r12 = 0xc
363 // movw ip, #<lower 16 bits>
364 write16(ctx
, buf
+ 2, 0x0c00); // use `ip`
365 relocateNoSym(buf
, R_ARM_THM_MOVW_ABS_NC
, offset
);
367 // movt ip, #<upper 16 bits>
368 write16(ctx
, buf
+ 6, 0x0c00); // use `ip`
369 relocateNoSym(buf
+ 4, R_ARM_THM_MOVT_ABS
, offset
);
371 write16(ctx
, buf
+ 8, 0x44fc); // add ip, pc
372 write16(ctx
, buf
+ 10, 0xf8dc); // ldr.w pc, [ip] (bottom half)
373 write16(ctx
, buf
+ 12, 0xf000); // ldr.w pc, [ip] (upper half)
374 write16(ctx
, buf
+ 14, 0xe7fc); // Branch to previous instruction
378 void ARM::addPltSymbols(InputSection
&isec
, uint64_t off
) const {
379 if (useThumbPLTs(ctx
)) {
380 addSyntheticLocal(ctx
, "$t", STT_NOTYPE
, off
, 0, isec
);
382 addSyntheticLocal(ctx
, "$a", STT_NOTYPE
, off
, 0, isec
);
383 addSyntheticLocal(ctx
, "$d", STT_NOTYPE
, off
+ 12, 0, isec
);
387 bool ARM::needsThunk(RelExpr expr
, RelType type
, const InputFile
*file
,
388 uint64_t branchAddr
, const Symbol
&s
,
390 // If s is an undefined weak symbol and does not have a PLT entry then it will
391 // be resolved as a branch to the next instruction. If it is hidden, its
392 // binding has been converted to local, so we just check isUndefined() here. A
393 // undefined non-weak symbol will have been errored.
394 if (s
.isUndefined() && !s
.isInPlt(ctx
))
396 // A state change from ARM to Thumb and vice versa must go through an
397 // interworking thunk if the relocation type is not R_ARM_CALL or
403 // Source is ARM, all PLT entries are ARM so no interworking required.
404 // Otherwise we need to interwork if STT_FUNC Symbol has bit 0 set (Thumb).
405 assert(!useThumbPLTs(ctx
) &&
406 "If the source is ARM, we should not need Thumb PLTs");
407 if (s
.isFunc() && expr
== R_PC
&& (s
.getVA(ctx
) & 1))
411 uint64_t dst
= (expr
== R_PLT_PC
) ? s
.getPltVA(ctx
) : s
.getVA(ctx
);
412 return !inBranchRange(type
, branchAddr
, dst
+ a
) ||
413 (!ctx
.arg
.armHasBlx
&& (s
.getVA(ctx
) & 1));
415 case R_ARM_THM_JUMP19
:
416 case R_ARM_THM_JUMP24
:
417 // Source is Thumb, when all PLT entries are ARM interworking is required.
418 // Otherwise we need to interwork if STT_FUNC Symbol has bit 0 clear (ARM).
419 if ((expr
== R_PLT_PC
&& !useThumbPLTs(ctx
)) ||
420 (s
.isFunc() && (s
.getVA(ctx
) & 1) == 0))
423 case R_ARM_THM_CALL
: {
424 uint64_t dst
= (expr
== R_PLT_PC
) ? s
.getPltVA(ctx
) : s
.getVA(ctx
);
425 return !inBranchRange(type
, branchAddr
, dst
+ a
) ||
426 (!ctx
.arg
.armHasBlx
&& (s
.getVA(ctx
) & 1) == 0);
432 uint32_t ARM::getThunkSectionSpacing() const {
433 // The placing of pre-created ThunkSections is controlled by the value
434 // thunkSectionSpacing returned by getThunkSectionSpacing(). The aim is to
435 // place the ThunkSection such that all branches from the InputSections
436 // prior to the ThunkSection can reach a Thunk placed at the end of the
437 // ThunkSection. Graphically:
438 // | up to thunkSectionSpacing .text input sections |
440 // | up to thunkSectionSpacing .text input sections |
443 // Pre-created ThunkSections are spaced roughly 16MiB apart on ARMv7. This
444 // is to match the most common expected case of a Thumb 2 encoded BL, BLX or
446 // ARM B, BL, BLX range +/- 32MiB
447 // Thumb B.W, BL, BLX range +/- 16MiB
448 // Thumb B<cc>.W range +/- 1MiB
449 // If a branch cannot reach a pre-created ThunkSection a new one will be
450 // created so we can handle the rare cases of a Thumb 2 conditional branch.
451 // We intentionally use a lower size for thunkSectionSpacing than the maximum
452 // branch range so the end of the ThunkSection is more likely to be within
453 // range of the branch instruction that is furthest away. The value we shorten
454 // thunkSectionSpacing by is set conservatively to allow us to create 16,384
455 // 12 byte Thunks at any offset in a ThunkSection without risk of a branch to
456 // one of the Thunks going out of range.
458 // On Arm the thunkSectionSpacing depends on the range of the Thumb Branch
459 // range. On earlier Architectures such as ARMv4, ARMv5 and ARMv6 (except
460 // ARMv6T2) the range is +/- 4MiB.
462 return (ctx
.arg
.armJ1J2BranchEncoding
) ? 0x1000000 - 0x30000
466 bool ARM::inBranchRange(RelType type
, uint64_t src
, uint64_t dst
) const {
467 if ((dst
& 0x1) == 0)
468 // Destination is ARM, if ARM caller then Src is already 4-byte aligned.
469 // If Thumb Caller (BLX) the Src address has bottom 2 bits cleared to ensure
470 // destination will be 4 byte aligned.
473 // Bit 0 == 1 denotes Thumb state, it is not part of the range.
476 int64_t offset
= dst
- src
;
482 return llvm::isInt
<26>(offset
);
483 case R_ARM_THM_JUMP19
:
484 return llvm::isInt
<21>(offset
);
485 case R_ARM_THM_JUMP24
:
487 return ctx
.arg
.armJ1J2BranchEncoding
? llvm::isInt
<25>(offset
)
488 : llvm::isInt
<23>(offset
);
494 // Helper to produce message text when LLD detects that a CALL relocation to
495 // a non STT_FUNC symbol that may result in incorrect interworking between ARM
497 static void stateChangeWarning(Ctx
&ctx
, uint8_t *loc
, RelType relt
,
500 const ErrorPlace place
= getErrorPlace(ctx
, loc
);
502 if (!place
.srcLoc
.empty())
503 hint
= "; " + place
.srcLoc
;
505 // Section symbols must be defined and in a section. Users cannot change
506 // the type. Use the section name as getName() returns an empty string.
507 Warn(ctx
) << place
.loc
<< "branch and link relocation: " << relt
508 << " to STT_SECTION symbol " << cast
<Defined
>(s
).section
->name
509 << " ; interworking not performed" << hint
;
511 // Warn with hint on how to alter the symbol type.
513 << getErrorLoc(ctx
, loc
) << "branch and link relocation: " << relt
514 << " to non STT_FUNC symbol: " << s
.getName()
515 << " interworking not performed; consider using directive '.type "
517 << ", %function' to give symbol type STT_FUNC if interworking between "
518 "ARM and Thumb is required"
523 // Rotate a 32-bit unsigned value right by a specified amt of bits.
524 static uint32_t rotr32(uint32_t val
, uint32_t amt
) {
525 assert(amt
< 32 && "Invalid rotate amount");
526 return (val
>> amt
) | (val
<< ((32 - amt
) & 31));
529 static std::pair
<uint32_t, uint32_t> getRemAndLZForGroup(unsigned group
,
533 lz
= llvm::countl_zero(val
) & ~1;
535 if (lz
== 32) // implies rem == 0
537 val
&= 0xffffff >> lz
;
542 void ARM::encodeAluGroup(uint8_t *loc
, const Relocation
&rel
, uint64_t val
,
543 int group
, bool check
) const {
544 // ADD/SUB (immediate) add = bit23, sub = bit22
545 // immediate field carries is a 12-bit modified immediate, made up of a 4-bit
546 // even rotate right and an 8-bit immediate.
547 uint32_t opcode
= 0x00800000;
553 std::tie(imm
, lz
) = getRemAndLZForGroup(group
, val
);
556 imm
= rotr32(imm
, 24 - lz
);
559 if (check
&& imm
> 0xff)
560 Err(ctx
) << getErrorLoc(ctx
, loc
) << "unencodeable immediate " << val
561 << " for relocation " << rel
.type
;
563 (read32(ctx
, loc
) & 0xff3ff000) | opcode
| rot
| (imm
& 0xff));
566 static void encodeLdrGroup(Ctx
&ctx
, uint8_t *loc
, const Relocation
&rel
,
567 uint64_t val
, int group
) {
568 // R_ARM_LDR_PC_Gn is S + A - P, we have ((S + A) | T) - P, if S is a
569 // function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear
570 // bottom bit to recover S + A - P.
571 if (rel
.sym
->isFunc())
573 // LDR (literal) u = bit23
574 uint32_t opcode
= 0x00800000;
579 uint32_t imm
= getRemAndLZForGroup(group
, val
).first
;
580 checkUInt(ctx
, loc
, imm
, 12, rel
);
581 write32(ctx
, loc
, (read32(ctx
, loc
) & 0xff7ff000) | opcode
| imm
);
584 static void encodeLdrsGroup(Ctx
&ctx
, uint8_t *loc
, const Relocation
&rel
,
585 uint64_t val
, int group
) {
586 // R_ARM_LDRS_PC_Gn is S + A - P, we have ((S + A) | T) - P, if S is a
587 // function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear
588 // bottom bit to recover S + A - P.
589 if (rel
.sym
->isFunc())
591 // LDRD/LDRH/LDRSB/LDRSH (literal) u = bit23
592 uint32_t opcode
= 0x00800000;
597 uint32_t imm
= getRemAndLZForGroup(group
, val
).first
;
598 checkUInt(ctx
, loc
, imm
, 8, rel
);
600 (read32(ctx
, loc
) & 0xff7ff0f0) | opcode
| ((imm
& 0xf0) << 4) |
604 void ARM::relocate(uint8_t *loc
, const Relocation
&rel
, uint64_t val
) const {
607 case R_ARM_BASE_PREL
:
618 case R_ARM_TLS_LDM32
:
619 case R_ARM_TLS_LDO32
:
621 case R_ARM_TLS_TPOFF32
:
622 case R_ARM_TLS_DTPOFF32
:
623 write32(ctx
, loc
, val
);
626 checkInt(ctx
, loc
, val
, 31, rel
);
627 write32(ctx
, loc
, (read32(ctx
, loc
) & 0x80000000) | (val
& ~0x80000000));
630 // R_ARM_CALL is used for BL and BLX instructions, for symbols of type
631 // STT_FUNC we choose whether to write a BL or BLX depending on the
632 // value of bit 0 of Val. With bit 0 == 1 denoting Thumb. If the symbol is
633 // not of type STT_FUNC then we must preserve the original instruction.
634 assert(rel
.sym
); // R_ARM_CALL is always reached via relocate().
635 bool bit0Thumb
= val
& 1;
636 bool isBlx
= (read32(ctx
, loc
) & 0xfe000000) == 0xfa000000;
637 // lld 10.0 and before always used bit0Thumb when deciding to write a BLX
638 // even when type not STT_FUNC.
639 if (!rel
.sym
->isFunc() && isBlx
!= bit0Thumb
)
640 stateChangeWarning(ctx
, loc
, rel
.type
, *rel
.sym
);
641 if (rel
.sym
->isFunc() ? bit0Thumb
: isBlx
) {
642 // The BLX encoding is 0xfa:H:imm24 where Val = imm24:H:'1'
643 checkInt(ctx
, loc
, val
, 26, rel
);
645 0xfa000000 | // opcode
646 ((val
& 2) << 23) | // H
647 ((val
>> 2) & 0x00ffffff)); // imm24
650 // BLX (always unconditional) instruction to an ARM Target, select an
652 write32(ctx
, loc
, 0xeb000000 | (read32(ctx
, loc
) & 0x00ffffff));
653 // fall through as BL encoding is shared with B
659 checkInt(ctx
, loc
, val
, 26, rel
);
661 (read32(ctx
, loc
) & ~0x00ffffff) | ((val
>> 2) & 0x00ffffff));
663 case R_ARM_THM_JUMP8
:
664 // We do a 9 bit check because val is right-shifted by 1 bit.
665 checkInt(ctx
, loc
, val
, 9, rel
);
666 write16(ctx
, loc
, (read32(ctx
, loc
) & 0xff00) | ((val
>> 1) & 0x00ff));
668 case R_ARM_THM_JUMP11
:
669 // We do a 12 bit check because val is right-shifted by 1 bit.
670 checkInt(ctx
, loc
, val
, 12, rel
);
671 write16(ctx
, loc
, (read32(ctx
, loc
) & 0xf800) | ((val
>> 1) & 0x07ff));
673 case R_ARM_THM_JUMP19
:
674 // Encoding T3: Val = S:J2:J1:imm6:imm11:0
675 checkInt(ctx
, loc
, val
, 21, rel
);
677 (read16(ctx
, loc
) & 0xfbc0) | // opcode cond
678 ((val
>> 10) & 0x0400) | // S
679 ((val
>> 12) & 0x003f)); // imm6
680 write16(ctx
, loc
+ 2,
682 ((val
>> 8) & 0x0800) | // J2
683 ((val
>> 5) & 0x2000) | // J1
684 ((val
>> 1) & 0x07ff)); // imm11
686 case R_ARM_THM_CALL
: {
687 // R_ARM_THM_CALL is used for BL and BLX instructions, for symbols of type
688 // STT_FUNC we choose whether to write a BL or BLX depending on the
689 // value of bit 0 of Val. With bit 0 == 0 denoting ARM, if the symbol is
690 // not of type STT_FUNC then we must preserve the original instruction.
691 // PLT entries are always ARM state so we know we need to interwork.
692 assert(rel
.sym
); // R_ARM_THM_CALL is always reached via relocate().
693 bool bit0Thumb
= val
& 1;
694 bool useThumb
= bit0Thumb
|| useThumbPLTs(ctx
);
695 bool isBlx
= (read16(ctx
, loc
+ 2) & 0x1000) == 0;
696 // lld 10.0 and before always used bit0Thumb when deciding to write a BLX
697 // even when type not STT_FUNC.
698 if (!rel
.sym
->isFunc() && !rel
.sym
->isInPlt(ctx
) && isBlx
== useThumb
)
699 stateChangeWarning(ctx
, loc
, rel
.type
, *rel
.sym
);
700 if ((rel
.sym
->isFunc() || rel
.sym
->isInPlt(ctx
)) ? !useThumb
: isBlx
) {
701 // We are writing a BLX. Ensure BLX destination is 4-byte aligned. As
702 // the BLX instruction may only be two byte aligned. This must be done
703 // before overflow check.
704 val
= alignTo(val
, 4);
705 write16(ctx
, loc
+ 2, read16(ctx
, loc
+ 2) & ~0x1000);
707 write16(ctx
, loc
+ 2, (read16(ctx
, loc
+ 2) & ~0x1000) | 1 << 12);
709 if (!ctx
.arg
.armJ1J2BranchEncoding
) {
710 // Older Arm architectures do not support R_ARM_THM_JUMP24 and have
711 // different encoding rules and range due to J1 and J2 always being 1.
712 checkInt(ctx
, loc
, val
, 23, rel
);
715 ((val
>> 12) & 0x07ff)); // imm11
716 write16(ctx
, loc
+ 2,
717 (read16(ctx
, loc
+ 2) & 0xd000) | // opcode
718 0x2800 | // J1 == J2 == 1
719 ((val
>> 1) & 0x07ff)); // imm11
723 // Fall through as rest of encoding is the same as B.W
725 case R_ARM_THM_JUMP24
:
726 // Encoding B T4, BL T1, BLX T2: Val = S:I1:I2:imm10:imm11:0
727 checkInt(ctx
, loc
, val
, 25, rel
);
730 ((val
>> 14) & 0x0400) | // S
731 ((val
>> 12) & 0x03ff)); // imm10
732 write16(ctx
, loc
+ 2,
733 (read16(ctx
, loc
+ 2) & 0xd000) | // opcode
734 (((~(val
>> 10)) ^ (val
>> 11)) & 0x2000) | // J1
735 (((~(val
>> 11)) ^ (val
>> 13)) & 0x0800) | // J2
736 ((val
>> 1) & 0x07ff)); // imm11
738 case R_ARM_MOVW_ABS_NC
:
739 case R_ARM_MOVW_PREL_NC
:
740 case R_ARM_MOVW_BREL_NC
:
742 (read32(ctx
, loc
) & ~0x000f0fff) | ((val
& 0xf000) << 4) |
746 case R_ARM_MOVT_PREL
:
747 case R_ARM_MOVT_BREL
:
749 (read32(ctx
, loc
) & ~0x000f0fff) | (((val
>> 16) & 0xf000) << 4) |
750 ((val
>> 16) & 0xfff));
752 case R_ARM_THM_MOVT_ABS
:
753 case R_ARM_THM_MOVT_PREL
:
754 case R_ARM_THM_MOVT_BREL
:
755 // Encoding T1: A = imm4:i:imm3:imm8
759 ((val
>> 17) & 0x0400) | // i
760 ((val
>> 28) & 0x000f)); // imm4
762 write16(ctx
, loc
+ 2,
763 (read16(ctx
, loc
+ 2) & 0x8f00) | // opcode
764 ((val
>> 12) & 0x7000) | // imm3
765 ((val
>> 16) & 0x00ff)); // imm8
767 case R_ARM_THM_MOVW_ABS_NC
:
768 case R_ARM_THM_MOVW_PREL_NC
:
769 case R_ARM_THM_MOVW_BREL_NC
:
770 // Encoding T3: A = imm4:i:imm3:imm8
773 ((val
>> 1) & 0x0400) | // i
774 ((val
>> 12) & 0x000f)); // imm4
775 write16(ctx
, loc
+ 2,
776 (read16(ctx
, loc
+ 2) & 0x8f00) | // opcode
777 ((val
<< 4) & 0x7000) | // imm3
778 (val
& 0x00ff)); // imm8
780 case R_ARM_THM_ALU_ABS_G3
:
781 write16(ctx
, loc
, (read16(ctx
, loc
) & ~0x00ff) | ((val
>> 24) & 0x00ff));
783 case R_ARM_THM_ALU_ABS_G2_NC
:
784 write16(ctx
, loc
, (read16(ctx
, loc
) & ~0x00ff) | ((val
>> 16) & 0x00ff));
786 case R_ARM_THM_ALU_ABS_G1_NC
:
787 write16(ctx
, loc
, (read16(ctx
, loc
) & ~0x00ff) | ((val
>> 8) & 0x00ff));
789 case R_ARM_THM_ALU_ABS_G0_NC
:
790 write16(ctx
, loc
, (read16(ctx
, loc
) & ~0x00ff) | (val
& 0x00ff));
792 case R_ARM_ALU_PC_G0
:
793 encodeAluGroup(loc
, rel
, val
, 0, true);
795 case R_ARM_ALU_PC_G0_NC
:
796 encodeAluGroup(loc
, rel
, val
, 0, false);
798 case R_ARM_ALU_PC_G1
:
799 encodeAluGroup(loc
, rel
, val
, 1, true);
801 case R_ARM_ALU_PC_G1_NC
:
802 encodeAluGroup(loc
, rel
, val
, 1, false);
804 case R_ARM_ALU_PC_G2
:
805 encodeAluGroup(loc
, rel
, val
, 2, true);
807 case R_ARM_LDR_PC_G0
:
808 encodeLdrGroup(ctx
, loc
, rel
, val
, 0);
810 case R_ARM_LDR_PC_G1
:
811 encodeLdrGroup(ctx
, loc
, rel
, val
, 1);
813 case R_ARM_LDR_PC_G2
:
814 encodeLdrGroup(ctx
, loc
, rel
, val
, 2);
816 case R_ARM_LDRS_PC_G0
:
817 encodeLdrsGroup(ctx
, loc
, rel
, val
, 0);
819 case R_ARM_LDRS_PC_G1
:
820 encodeLdrsGroup(ctx
, loc
, rel
, val
, 1);
822 case R_ARM_LDRS_PC_G2
:
823 encodeLdrsGroup(ctx
, loc
, rel
, val
, 2);
825 case R_ARM_THM_ALU_PREL_11_0
: {
826 // ADR encoding T2 (sub), T3 (add) i:imm3:imm8
833 checkUInt(ctx
, loc
, imm
, 12, rel
);
834 write16(ctx
, loc
, (read16(ctx
, loc
) & 0xfb0f) | sub
| (imm
& 0x800) >> 1);
835 write16(ctx
, loc
+ 2,
836 (read16(ctx
, loc
+ 2) & 0x8f00) | (imm
& 0x700) << 4 |
841 // ADR and LDR literal encoding T1 positive offset only imm8:00
842 // R_ARM_THM_PC8 is S + A - Pa, we have ((S + A) | T) - Pa, if S is a
843 // function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear
844 // bottom bit to recover S + A - Pa.
845 if (rel
.sym
->isFunc())
847 checkUInt(ctx
, loc
, val
, 10, rel
);
848 checkAlignment(ctx
, loc
, val
, 4, rel
);
849 write16(ctx
, loc
, (read16(ctx
, loc
) & 0xff00) | (val
& 0x3fc) >> 2);
851 case R_ARM_THM_PC12
: {
852 // LDR (literal) encoding T2, add = (U == '1') imm12
854 // R_ARM_THM_PC12 is S + A - Pa, we have ((S + A) | T) - Pa, if S is a
855 // function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear
856 // bottom bit to recover S + A - Pa.
857 if (rel
.sym
->isFunc())
865 checkUInt(ctx
, loc
, imm12
, 12, rel
);
866 write16(ctx
, loc
, read16(ctx
, loc
) | u
);
867 write16(ctx
, loc
+ 2, (read16(ctx
, loc
+ 2) & 0xf000) | imm12
);
871 llvm_unreachable("unknown relocation");
875 int64_t ARM::getImplicitAddend(const uint8_t *buf
, RelType type
) const {
878 InternalErr(ctx
, buf
) << "cannot read addend for relocation " << type
;
881 case R_ARM_BASE_PREL
:
886 case R_ARM_IRELATIVE
:
892 case R_ARM_TLS_DTPMOD32
:
893 case R_ARM_TLS_DTPOFF32
:
896 case R_ARM_TLS_LDM32
:
898 case R_ARM_TLS_LDO32
:
899 case R_ARM_TLS_TPOFF32
:
900 return SignExtend64
<32>(read32(ctx
, buf
));
902 return SignExtend64
<31>(read32(ctx
, buf
));
907 return SignExtend64
<26>(read32(ctx
, buf
) << 2);
908 case R_ARM_THM_JUMP8
:
909 return SignExtend64
<9>(read16(ctx
, buf
) << 1);
910 case R_ARM_THM_JUMP11
:
911 return SignExtend64
<12>(read16(ctx
, buf
) << 1);
912 case R_ARM_THM_JUMP19
: {
913 // Encoding T3: A = S:J2:J1:imm10:imm6:0
914 uint16_t hi
= read16(ctx
, buf
);
915 uint16_t lo
= read16(ctx
, buf
+ 2);
916 return SignExtend64
<20>(((hi
& 0x0400) << 10) | // S
917 ((lo
& 0x0800) << 8) | // J2
918 ((lo
& 0x2000) << 5) | // J1
919 ((hi
& 0x003f) << 12) | // imm6
920 ((lo
& 0x07ff) << 1)); // imm11:0
923 if (!ctx
.arg
.armJ1J2BranchEncoding
) {
924 // Older Arm architectures do not support R_ARM_THM_JUMP24 and have
925 // different encoding rules and range due to J1 and J2 always being 1.
926 uint16_t hi
= read16(ctx
, buf
);
927 uint16_t lo
= read16(ctx
, buf
+ 2);
928 return SignExtend64
<22>(((hi
& 0x7ff) << 12) | // imm11
929 ((lo
& 0x7ff) << 1)); // imm11:0
933 case R_ARM_THM_JUMP24
: {
934 // Encoding B T4, BL T1, BLX T2: A = S:I1:I2:imm10:imm11:0
935 // I1 = NOT(J1 EOR S), I2 = NOT(J2 EOR S)
936 uint16_t hi
= read16(ctx
, buf
);
937 uint16_t lo
= read16(ctx
, buf
+ 2);
938 return SignExtend64
<24>(((hi
& 0x0400) << 14) | // S
939 (~((lo
^ (hi
<< 3)) << 10) & 0x00800000) | // I1
940 (~((lo
^ (hi
<< 1)) << 11) & 0x00400000) | // I2
941 ((hi
& 0x003ff) << 12) | // imm0
942 ((lo
& 0x007ff) << 1)); // imm11:0
944 // ELF for the ARM Architecture 4.6.1.1 the implicit addend for MOVW and
945 // MOVT is in the range -32768 <= A < 32768
946 case R_ARM_MOVW_ABS_NC
:
948 case R_ARM_MOVW_PREL_NC
:
949 case R_ARM_MOVT_PREL
:
950 case R_ARM_MOVW_BREL_NC
:
951 case R_ARM_MOVT_BREL
: {
952 uint64_t val
= read32(ctx
, buf
) & 0x000f0fff;
953 return SignExtend64
<16>(((val
& 0x000f0000) >> 4) | (val
& 0x00fff));
955 case R_ARM_THM_MOVW_ABS_NC
:
956 case R_ARM_THM_MOVT_ABS
:
957 case R_ARM_THM_MOVW_PREL_NC
:
958 case R_ARM_THM_MOVT_PREL
:
959 case R_ARM_THM_MOVW_BREL_NC
:
960 case R_ARM_THM_MOVT_BREL
: {
961 // Encoding T3: A = imm4:i:imm3:imm8
962 uint16_t hi
= read16(ctx
, buf
);
963 uint16_t lo
= read16(ctx
, buf
+ 2);
964 return SignExtend64
<16>(((hi
& 0x000f) << 12) | // imm4
965 ((hi
& 0x0400) << 1) | // i
966 ((lo
& 0x7000) >> 4) | // imm3
967 (lo
& 0x00ff)); // imm8
969 case R_ARM_THM_ALU_ABS_G0_NC
:
970 case R_ARM_THM_ALU_ABS_G1_NC
:
971 case R_ARM_THM_ALU_ABS_G2_NC
:
972 case R_ARM_THM_ALU_ABS_G3
:
973 return read16(ctx
, buf
) & 0xff;
974 case R_ARM_ALU_PC_G0
:
975 case R_ARM_ALU_PC_G0_NC
:
976 case R_ARM_ALU_PC_G1
:
977 case R_ARM_ALU_PC_G1_NC
:
978 case R_ARM_ALU_PC_G2
: {
979 // 12-bit immediate is a modified immediate made up of a 4-bit even
980 // right rotation and 8-bit constant. After the rotation the value
981 // is zero-extended. When bit 23 is set the instruction is an add, when
982 // bit 22 is set it is a sub.
983 uint32_t instr
= read32(ctx
, buf
);
984 uint32_t val
= rotr32(instr
& 0xff, ((instr
& 0xf00) >> 8) * 2);
985 return (instr
& 0x00400000) ? -val
: val
;
987 case R_ARM_LDR_PC_G0
:
988 case R_ARM_LDR_PC_G1
:
989 case R_ARM_LDR_PC_G2
: {
990 // ADR (literal) add = bit23, sub = bit22
991 // LDR (literal) u = bit23 unsigned imm12
992 bool u
= read32(ctx
, buf
) & 0x00800000;
993 uint32_t imm12
= read32(ctx
, buf
) & 0xfff;
994 return u
? imm12
: -imm12
;
996 case R_ARM_LDRS_PC_G0
:
997 case R_ARM_LDRS_PC_G1
:
998 case R_ARM_LDRS_PC_G2
: {
999 // LDRD/LDRH/LDRSB/LDRSH (literal) u = bit23 unsigned imm8
1000 uint32_t opcode
= read32(ctx
, buf
);
1001 bool u
= opcode
& 0x00800000;
1002 uint32_t imm4l
= opcode
& 0xf;
1003 uint32_t imm4h
= (opcode
& 0xf00) >> 4;
1004 return u
? (imm4h
| imm4l
) : -(imm4h
| imm4l
);
1006 case R_ARM_THM_ALU_PREL_11_0
: {
1007 // Thumb2 ADR, which is an alias for a sub or add instruction with an
1008 // unsigned immediate.
1009 // ADR encoding T2 (sub), T3 (add) i:imm3:imm8
1010 uint16_t hi
= read16(ctx
, buf
);
1011 uint16_t lo
= read16(ctx
, buf
+ 2);
1012 uint64_t imm
= (hi
& 0x0400) << 1 | // i
1013 (lo
& 0x7000) >> 4 | // imm3
1014 (lo
& 0x00ff); // imm8
1015 // For sub, addend is negative, add is positive.
1016 return (hi
& 0x00f0) ? -imm
: imm
;
1019 // ADR and LDR (literal) encoding T1
1020 // From ELF for the ARM Architecture the initial signed addend is formed
1021 // from an unsigned field using expression (((imm8:00 + 4) & 0x3ff) ā 4)
1022 // this trick permits the PC bias of -4 to be encoded using imm8 = 0xff
1023 return ((((read16(ctx
, buf
) & 0xff) << 2) + 4) & 0x3ff) - 4;
1024 case R_ARM_THM_PC12
: {
1025 // LDR (literal) encoding T2, add = (U == '1') imm12
1026 bool u
= read16(ctx
, buf
) & 0x0080;
1027 uint64_t imm12
= read16(ctx
, buf
+ 2) & 0x0fff;
1028 return u
? imm12
: -imm12
;
1032 case R_ARM_JUMP_SLOT
:
1033 // These relocations are defined as not having an implicit addend.
1038 static bool isArmMapSymbol(const Symbol
*b
) {
1039 return b
->getName() == "$a" || b
->getName().starts_with("$a.");
1042 static bool isThumbMapSymbol(const Symbol
*s
) {
1043 return s
->getName() == "$t" || s
->getName().starts_with("$t.");
1046 static bool isDataMapSymbol(const Symbol
*b
) {
1047 return b
->getName() == "$d" || b
->getName().starts_with("$d.");
1050 void elf::sortArmMappingSymbols(Ctx
&ctx
) {
1051 // For each input section make sure the mapping symbols are sorted in
1053 for (auto &kv
: static_cast<ARM
&>(*ctx
.target
).sectionMap
) {
1054 SmallVector
<const Defined
*, 0> &mapSyms
= kv
.second
;
1055 llvm::stable_sort(mapSyms
, [](const Defined
*a
, const Defined
*b
) {
1056 return a
->value
< b
->value
;
1061 void elf::addArmInputSectionMappingSymbols(Ctx
&ctx
) {
1062 // Collect mapping symbols for every executable input sections.
1063 // The linker generated mapping symbols for all the synthetic
1064 // sections are adding into the sectionmap through the function
1065 // addArmSyntheitcSectionMappingSymbol.
1066 auto §ionMap
= static_cast<ARM
&>(*ctx
.target
).sectionMap
;
1067 for (ELFFileBase
*file
: ctx
.objectFiles
) {
1068 for (Symbol
*sym
: file
->getLocalSymbols()) {
1069 auto *def
= dyn_cast
<Defined
>(sym
);
1072 if (!isArmMapSymbol(def
) && !isDataMapSymbol(def
) &&
1073 !isThumbMapSymbol(def
))
1075 if (auto *sec
= cast_if_present
<InputSection
>(def
->section
))
1076 if (sec
->flags
& SHF_EXECINSTR
)
1077 sectionMap
[sec
].push_back(def
);
1082 // Synthetic sections are not backed by an ELF file where we can access the
1083 // symbol table, instead mapping symbols added to synthetic sections are stored
1084 // in the synthetic symbol table. Due to the presence of strip (--strip-all),
1085 // we can not rely on the synthetic symbol table retaining the mapping symbols.
1086 // Instead we record the mapping symbols locally.
1087 void elf::addArmSyntheticSectionMappingSymbol(Defined
*sym
) {
1088 if (!isArmMapSymbol(sym
) && !isDataMapSymbol(sym
) && !isThumbMapSymbol(sym
))
1090 if (auto *sec
= cast_if_present
<InputSection
>(sym
->section
))
1091 if (sec
->flags
& SHF_EXECINSTR
)
1092 static_cast<ARM
&>(*sec
->file
->ctx
.target
).sectionMap
[sec
].push_back(sym
);
1095 static void toLittleEndianInstructions(uint8_t *buf
, uint64_t start
,
1096 uint64_t end
, uint64_t width
) {
1097 CodeState curState
= static_cast<CodeState
>(width
);
1098 if (curState
== CodeState::Arm
)
1099 for (uint64_t i
= start
; i
< end
; i
+= width
)
1100 write32le(buf
+ i
, read32be(buf
+ i
));
1102 if (curState
== CodeState::Thumb
)
1103 for (uint64_t i
= start
; i
< end
; i
+= width
)
1104 write16le(buf
+ i
, read16be(buf
+ i
));
1107 // Arm BE8 big endian format requires instructions to be little endian, with
1108 // the initial contents big-endian. Convert the big-endian instructions to
1109 // little endian leaving literal data untouched. We use mapping symbols to
1110 // identify half open intervals of Arm code [$a, non $a) and Thumb code
1111 // [$t, non $t) and convert these to little endian a word or half word at a
1112 // time respectively.
1113 void elf::convertArmInstructionstoBE8(Ctx
&ctx
, InputSection
*sec
,
1115 auto §ionMap
= static_cast<ARM
&>(*ctx
.target
).sectionMap
;
1116 auto it
= sectionMap
.find(sec
);
1117 if (it
== sectionMap
.end())
1120 SmallVector
<const Defined
*, 0> &mapSyms
= it
->second
;
1122 if (mapSyms
.empty())
1125 CodeState curState
= CodeState::Data
;
1126 uint64_t start
= 0, width
= 0, size
= sec
->getSize();
1127 for (auto &msym
: mapSyms
) {
1128 CodeState newState
= CodeState::Data
;
1129 if (isThumbMapSymbol(msym
))
1130 newState
= CodeState::Thumb
;
1131 else if (isArmMapSymbol(msym
))
1132 newState
= CodeState::Arm
;
1134 if (newState
== curState
)
1137 if (curState
!= CodeState::Data
) {
1138 width
= static_cast<uint64_t>(curState
);
1139 toLittleEndianInstructions(buf
, start
, msym
->value
, width
);
1141 start
= msym
->value
;
1142 curState
= newState
;
1145 // Passed last mapping symbol, may need to reverse
1146 // up to end of section.
1147 if (curState
!= CodeState::Data
) {
1148 width
= static_cast<uint64_t>(curState
);
1149 toLittleEndianInstructions(buf
, start
, size
, width
);
1153 // The Arm Cortex-M Security Extensions (CMSE) splits a system into two parts;
1154 // the non-secure and secure states with the secure state inaccessible from the
1155 // non-secure state, apart from an area of memory in secure state called the
1156 // secure gateway which is accessible from non-secure state. The secure gateway
1157 // contains one or more entry points which must start with a landing pad
1158 // instruction SG. Arm recommends that the secure gateway consists only of
1159 // secure gateway veneers, which are made up of a SG instruction followed by a
1160 // branch to the destination in secure state. Full details can be found in Arm
1161 // v8-M Security Extensions Requirements on Development Tools.
1163 // The CMSE model of software development requires the non-secure and secure
1164 // states to be developed as two separate programs. The non-secure developer is
1165 // provided with an import library defining symbols describing the entry points
1166 // in the secure gateway. No additional linker support is required for the
1167 // non-secure state.
1169 // Development of the secure state requires linker support to manage the secure
1170 // gateway veneers. The management consists of:
1171 // - Creation of new secure gateway veneers based on symbol conventions.
1172 // - Checking the address of existing secure gateway veneers.
1173 // - Warning when existing secure gateway veneers removed.
1175 // The secure gateway veneers are created in an import library, which is just an
1176 // ELF object with a symbol table. The import library is controlled by two
1177 // command line options:
1178 // --in-implib (specify an input import library from a previous revision of the
1180 // --out-implib (specify an output import library to be created by the linker).
1182 // The input import library is used to manage consistency of the secure entry
1183 // points. The output import library is for new and updated secure entry points.
1185 // The symbol convention that identifies secure entry functions is the prefix
1186 // __acle_se_ for a symbol called name the linker is expected to create a secure
1187 // gateway veneer if symbols __acle_se_name and name have the same address.
1188 // After creating a secure gateway veneer the symbol name labels the secure
1189 // gateway veneer and the __acle_se_name labels the function definition.
1191 // The LLD implementation:
1192 // - Reads an existing import library with importCmseSymbols().
1193 // - Determines which new secure gateway veneers to create and redirects calls
1194 // within the secure state to the __acle_se_ prefixed symbol with
1195 // processArmCmseSymbols().
1196 // - Models the SG veneers as a synthetic section.
1198 // Initialize symbols. symbols is a parallel array to the corresponding ELF
1200 template <class ELFT
> void ObjFile
<ELFT
>::importCmseSymbols() {
1201 ArrayRef
<Elf_Sym
> eSyms
= getELFSyms
<ELFT
>();
1202 // Error for local symbols. The symbol at index 0 is LOCAL. So skip it.
1203 for (size_t i
= 1, end
= firstGlobal
; i
!= end
; ++i
) {
1204 Err(ctx
) << "CMSE symbol '" << CHECK2(eSyms
[i
].getName(stringTable
), this)
1205 << "' in import library '" << this << "' is not global";
1208 for (size_t i
= firstGlobal
, end
= eSyms
.size(); i
!= end
; ++i
) {
1209 const Elf_Sym
&eSym
= eSyms
[i
];
1210 Defined
*sym
= reinterpret_cast<Defined
*>(make
<SymbolUnion
>());
1212 // Initialize symbol fields.
1213 memset(static_cast<void *>(sym
), 0, sizeof(Symbol
));
1214 sym
->setName(CHECK2(eSyms
[i
].getName(stringTable
), this));
1215 sym
->value
= eSym
.st_value
;
1216 sym
->size
= eSym
.st_size
;
1217 sym
->type
= eSym
.getType();
1218 sym
->binding
= eSym
.getBinding();
1219 sym
->stOther
= eSym
.st_other
;
1221 if (eSym
.st_shndx
!= SHN_ABS
) {
1222 Err(ctx
) << "CMSE symbol '" << sym
->getName() << "' in import library '"
1223 << this << "' is not absolute";
1227 if (!(eSym
.st_value
& 1) || (eSym
.getType() != STT_FUNC
)) {
1228 Err(ctx
) << "CMSE symbol '" << sym
->getName() << "' in import library '"
1229 << this << "' is not a Thumb function definition";
1233 if (ctx
.symtab
->cmseImportLib
.count(sym
->getName())) {
1234 Err(ctx
) << "CMSE symbol '" << sym
->getName()
1235 << "' is multiply defined in import library '" << this << "'";
1239 if (eSym
.st_size
!= ACLESESYM_SIZE
) {
1240 Warn(ctx
) << "CMSE symbol '" << sym
->getName() << "' in import library '"
1241 << this << "' does not have correct size of " << ACLESESYM_SIZE
1245 ctx
.symtab
->cmseImportLib
[sym
->getName()] = sym
;
1249 // Check symbol attributes of the acleSeSym, sym pair.
1250 // Both symbols should be global/weak Thumb code symbol definitions.
1251 static std::string
checkCmseSymAttributes(Ctx
&ctx
, Symbol
*acleSeSym
,
1253 auto check
= [&](Symbol
*s
, StringRef type
) -> std::optional
<std::string
> {
1254 auto d
= dyn_cast_or_null
<Defined
>(s
);
1255 if (!(d
&& d
->isFunc() && (d
->value
& 1)))
1256 return (Twine(toStr(ctx
, s
->file
)) + ": cmse " + type
+ " symbol '" +
1257 s
->getName() + "' is not a Thumb function definition")
1260 return (Twine(toStr(ctx
, s
->file
)) + ": cmse " + type
+ " symbol '" +
1261 s
->getName() + "' cannot be an absolute symbol")
1263 return std::nullopt
;
1265 for (auto [sym
, type
] :
1266 {std::make_pair(acleSeSym
, "special"), std::make_pair(sym
, "entry")})
1267 if (auto err
= check(sym
, type
))
1272 // Look for [__acle_se_<sym>, <sym>] pairs, as specified in the Cortex-M
1273 // Security Extensions specification.
1274 // 1) <sym> : A standard function name.
1275 // 2) __acle_se_<sym> : A special symbol that prefixes the standard function
1276 // name with __acle_se_.
1277 // Both these symbols are Thumb function symbols with external linkage.
1278 // <sym> may be redefined in .gnu.sgstubs.
1279 void elf::processArmCmseSymbols(Ctx
&ctx
) {
1280 if (!ctx
.arg
.cmseImplib
)
1282 // Only symbols with external linkage end up in ctx.symtab, so no need to do
1283 // linkage checks. Only check symbol type.
1284 for (Symbol
*acleSeSym
: ctx
.symtab
->getSymbols()) {
1285 if (!acleSeSym
->getName().starts_with(ACLESESYM_PREFIX
))
1287 // If input object build attributes do not support CMSE, error and disable
1288 // further scanning for <sym>, __acle_se_<sym> pairs.
1289 if (!ctx
.arg
.armCMSESupport
) {
1290 Err(ctx
) << "CMSE is only supported by ARMv8-M architecture or later";
1291 ctx
.arg
.cmseImplib
= false;
1295 // Try to find the associated symbol definition.
1296 // Symbol must have external linkage.
1297 StringRef name
= acleSeSym
->getName().substr(std::strlen(ACLESESYM_PREFIX
));
1298 Symbol
*sym
= ctx
.symtab
->find(name
);
1300 Err(ctx
) << acleSeSym
->file
<< ": cmse special symbol '"
1301 << acleSeSym
->getName()
1302 << "' detected, but no associated entry function definition '"
1303 << name
<< "' with external linkage found";
1307 std::string errMsg
= checkCmseSymAttributes(ctx
, acleSeSym
, sym
);
1308 if (!errMsg
.empty()) {
1313 // <sym> may be redefined later in the link in .gnu.sgstubs
1314 ctx
.symtab
->cmseSymMap
[name
] = {acleSeSym
, sym
};
1317 // If this is an Arm CMSE secure app, replace references to entry symbol <sym>
1318 // with its corresponding special symbol __acle_se_<sym>.
1319 parallelForEach(ctx
.objectFiles
, [&](InputFile
*file
) {
1320 MutableArrayRef
<Symbol
*> syms
= file
->getMutableSymbols();
1321 for (size_t i
= 0, e
= syms
.size(); i
!= e
; ++i
) {
1322 StringRef symName
= syms
[i
]->getName();
1323 if (ctx
.symtab
->cmseSymMap
.count(symName
))
1324 syms
[i
] = ctx
.symtab
->cmseSymMap
[symName
].acleSeSym
;
1329 ArmCmseSGSection::ArmCmseSGSection(Ctx
&ctx
)
1330 : SyntheticSection(ctx
, ".gnu.sgstubs", SHT_PROGBITS
,
1331 SHF_ALLOC
| SHF_EXECINSTR
,
1333 entsize
= ACLESESYM_SIZE
;
1334 // The range of addresses used in the CMSE import library should be fixed.
1335 for (auto &[_
, sym
] : ctx
.symtab
->cmseImportLib
) {
1336 if (impLibMaxAddr
<= sym
->value
)
1337 impLibMaxAddr
= sym
->value
+ sym
->size
;
1339 if (ctx
.symtab
->cmseSymMap
.empty())
1342 for (auto &[_
, entryFunc
] : ctx
.symtab
->cmseSymMap
)
1343 addSGVeneer(cast
<Defined
>(entryFunc
.acleSeSym
),
1344 cast
<Defined
>(entryFunc
.sym
));
1345 for (auto &[_
, sym
] : ctx
.symtab
->cmseImportLib
) {
1346 if (!ctx
.symtab
->inCMSEOutImpLib
.count(sym
->getName()))
1348 << "entry function '" << sym
->getName()
1349 << "' from CMSE import library is not present in secure application";
1352 if (!ctx
.symtab
->cmseImportLib
.empty() && ctx
.arg
.cmseOutputLib
.empty()) {
1353 for (auto &[_
, entryFunc
] : ctx
.symtab
->cmseSymMap
) {
1354 Symbol
*sym
= entryFunc
.sym
;
1355 if (!ctx
.symtab
->inCMSEOutImpLib
.count(sym
->getName()))
1356 Warn(ctx
) << "new entry function '" << sym
->getName()
1357 << "' introduced but no output import library specified";
1362 void ArmCmseSGSection::addSGVeneer(Symbol
*acleSeSym
, Symbol
*sym
) {
1363 entries
.emplace_back(acleSeSym
, sym
);
1364 if (ctx
.symtab
->cmseImportLib
.count(sym
->getName()))
1365 ctx
.symtab
->inCMSEOutImpLib
[sym
->getName()] = true;
1366 // Symbol addresses different, nothing to do.
1367 if (acleSeSym
->file
!= sym
->file
||
1368 cast
<Defined
>(*acleSeSym
).value
!= cast
<Defined
>(*sym
).value
)
1370 // Only secure symbols with values equal to that of it's non-secure
1371 // counterpart needs to be in the .gnu.sgstubs section.
1372 std::unique_ptr
<ArmCmseSGVeneer
> ss
;
1373 if (ctx
.symtab
->cmseImportLib
.count(sym
->getName())) {
1374 Defined
*impSym
= ctx
.symtab
->cmseImportLib
[sym
->getName()];
1375 ss
= std::make_unique
<ArmCmseSGVeneer
>(sym
, acleSeSym
, impSym
->value
);
1377 ss
= std::make_unique
<ArmCmseSGVeneer
>(sym
, acleSeSym
);
1380 sgVeneers
.emplace_back(std::move(ss
));
1383 void ArmCmseSGSection::writeTo(uint8_t *buf
) {
1384 for (std::unique_ptr
<ArmCmseSGVeneer
> &s
: sgVeneers
) {
1385 uint8_t *p
= buf
+ s
->offset
;
1386 write16(ctx
, p
+ 0, 0xe97f); // SG
1387 write16(ctx
, p
+ 2, 0xe97f);
1388 write16(ctx
, p
+ 4, 0xf000); // B.W S
1389 write16(ctx
, p
+ 6, 0xb000);
1390 ctx
.target
->relocateNoSym(p
+ 4, R_ARM_THM_JUMP24
,
1391 s
->acleSeSym
->getVA(ctx
) -
1392 (getVA() + s
->offset
+ s
->size
));
1396 void ArmCmseSGSection::addMappingSymbol() {
1397 addSyntheticLocal(ctx
, "$t", STT_NOTYPE
, /*off=*/0, /*size=*/0, *this);
1400 size_t ArmCmseSGSection::getSize() const {
1401 if (sgVeneers
.empty())
1402 return (impLibMaxAddr
? impLibMaxAddr
- getVA() : 0) + newEntries
* entsize
;
1404 return entries
.size() * entsize
;
1407 void ArmCmseSGSection::finalizeContents() {
1408 if (sgVeneers
.empty())
1412 std::stable_partition(sgVeneers
.begin(), sgVeneers
.end(),
1413 [](auto &i
) { return i
->getAddr().has_value(); });
1414 std::sort(sgVeneers
.begin(), it
, [](auto &a
, auto &b
) {
1415 return a
->getAddr().value() < b
->getAddr().value();
1417 // This is the partition of the veneers with fixed addresses.
1418 uint64_t addr
= (*sgVeneers
.begin())->getAddr().has_value()
1419 ? (*sgVeneers
.begin())->getAddr().value()
1421 // Check if the start address of '.gnu.sgstubs' correspond to the
1422 // linker-synthesized veneer with the lowest address.
1423 if ((getVA() & ~1) != (addr
& ~1)) {
1425 << "start address of '.gnu.sgstubs' is different from previous link";
1429 for (auto [i
, s
] : enumerate(sgVeneers
)) {
1430 s
->offset
= i
* s
->size
;
1431 Defined(ctx
, file
, StringRef(), s
->sym
->binding
, s
->sym
->stOther
,
1432 s
->sym
->type
, s
->offset
| 1, s
->size
, this)
1433 .overwrite(*s
->sym
);
1437 // Write the CMSE import library to disk.
1438 // The CMSE import library is a relocatable object with only a symbol table.
1439 // The symbols are copies of the (absolute) symbols of the secure gateways
1440 // in the executable output by this link.
1441 // See ArmĀ® v8-M Security Extensions: Requirements on Development Tools
1442 // https://developer.arm.com/documentation/ecm0359818/latest
1443 template <typename ELFT
> void elf::writeARMCmseImportLib(Ctx
&ctx
) {
1445 std::make_unique
<StringTableSection
>(ctx
, ".shstrtab", /*dynamic=*/false);
1447 std::make_unique
<StringTableSection
>(ctx
, ".strtab", /*dynamic=*/false);
1448 auto impSymTab
= std::make_unique
<SymbolTableSection
<ELFT
>>(ctx
, *strtab
);
1450 SmallVector
<std::pair
<std::unique_ptr
<OutputSection
>, SyntheticSection
*>, 0>
1452 osIsPairs
.emplace_back(
1453 std::make_unique
<OutputSection
>(ctx
, strtab
->name
, 0, 0), strtab
.get());
1454 osIsPairs
.emplace_back(
1455 std::make_unique
<OutputSection
>(ctx
, impSymTab
->name
, 0, 0),
1457 osIsPairs
.emplace_back(
1458 std::make_unique
<OutputSection
>(ctx
, shstrtab
->name
, 0, 0),
1461 llvm::sort(ctx
.symtab
->cmseSymMap
, [&](const auto &a
, const auto &b
) {
1462 return a
.second
.sym
->getVA(ctx
) < b
.second
.sym
->getVA(ctx
);
1464 // Copy the secure gateway entry symbols to the import library symbol table.
1465 for (auto &p
: ctx
.symtab
->cmseSymMap
) {
1466 Defined
*d
= cast
<Defined
>(p
.second
.sym
);
1467 impSymTab
->addSymbol(makeDefined(
1468 ctx
, ctx
.internalFile
, d
->getName(), d
->computeBinding(ctx
),
1469 /*stOther=*/0, STT_FUNC
, d
->getVA(ctx
), d
->getSize(), nullptr));
1473 uint64_t off
= sizeof(typename
ELFT::Ehdr
);
1474 for (auto &[osec
, isec
] : osIsPairs
) {
1475 osec
->sectionIndex
= ++idx
;
1476 osec
->recordSection(isec
);
1477 osec
->finalizeInputSections();
1478 osec
->shName
= shstrtab
->addString(osec
->name
);
1479 osec
->size
= isec
->getSize();
1480 isec
->finalizeContents();
1481 osec
->offset
= alignToPowerOf2(off
, osec
->addralign
);
1482 off
= osec
->offset
+ osec
->size
;
1485 const uint64_t sectionHeaderOff
= alignToPowerOf2(off
, ctx
.arg
.wordsize
);
1486 const auto shnum
= osIsPairs
.size() + 1;
1487 const uint64_t fileSize
=
1488 sectionHeaderOff
+ shnum
* sizeof(typename
ELFT::Shdr
);
1489 const unsigned flags
=
1490 ctx
.arg
.mmapOutputFile
? 0 : (unsigned)FileOutputBuffer::F_no_mmap
;
1491 unlinkAsync(ctx
.arg
.cmseOutputLib
);
1492 Expected
<std::unique_ptr
<FileOutputBuffer
>> bufferOrErr
=
1493 FileOutputBuffer::create(ctx
.arg
.cmseOutputLib
, fileSize
, flags
);
1495 Err(ctx
) << "failed to open " << ctx
.arg
.cmseOutputLib
<< ": "
1496 << bufferOrErr
.takeError();
1500 // Write the ELF Header
1501 std::unique_ptr
<FileOutputBuffer
> &buffer
= *bufferOrErr
;
1502 uint8_t *const buf
= buffer
->getBufferStart();
1503 memcpy(buf
, "\177ELF", 4);
1504 auto *eHdr
= reinterpret_cast<typename
ELFT::Ehdr
*>(buf
);
1505 eHdr
->e_type
= ET_REL
;
1507 eHdr
->e_shoff
= sectionHeaderOff
;
1508 eHdr
->e_ident
[EI_CLASS
] = ELFCLASS32
;
1509 eHdr
->e_ident
[EI_DATA
] = ctx
.arg
.isLE
? ELFDATA2LSB
: ELFDATA2MSB
;
1510 eHdr
->e_ident
[EI_VERSION
] = EV_CURRENT
;
1511 eHdr
->e_ident
[EI_OSABI
] = ctx
.arg
.osabi
;
1512 eHdr
->e_ident
[EI_ABIVERSION
] = 0;
1513 eHdr
->e_machine
= EM_ARM
;
1514 eHdr
->e_version
= EV_CURRENT
;
1515 eHdr
->e_flags
= ctx
.arg
.eflags
;
1516 eHdr
->e_ehsize
= sizeof(typename
ELFT::Ehdr
);
1518 eHdr
->e_shentsize
= sizeof(typename
ELFT::Shdr
);
1520 eHdr
->e_phentsize
= 0;
1521 eHdr
->e_shnum
= shnum
;
1522 eHdr
->e_shstrndx
= shstrtab
->getParent()->sectionIndex
;
1524 // Write the section header table.
1525 auto *sHdrs
= reinterpret_cast<typename
ELFT::Shdr
*>(buf
+ eHdr
->e_shoff
);
1526 for (auto &[osec
, _
] : osIsPairs
)
1527 osec
->template writeHeaderTo
<ELFT
>(++sHdrs
);
1529 // Write section contents to a mmap'ed file.
1531 parallel::TaskGroup tg
;
1532 for (auto &[osec
, _
] : osIsPairs
)
1533 osec
->template writeTo
<ELFT
>(ctx
, buf
+ osec
->offset
, tg
);
1536 if (auto e
= buffer
->commit())
1537 Fatal(ctx
) << "failed to write output '" << buffer
->getPath()
1538 << "': " << std::move(e
);
1541 void elf::setARMTargetInfo(Ctx
&ctx
) { ctx
.target
.reset(new ARM(ctx
)); }
1543 template void elf::writeARMCmseImportLib
<ELF32LE
>(Ctx
&);
1544 template void elf::writeARMCmseImportLib
<ELF32BE
>(Ctx
&);
1545 template void elf::writeARMCmseImportLib
<ELF64LE
>(Ctx
&);
1546 template void elf::writeARMCmseImportLib
<ELF64BE
>(Ctx
&);
1548 template void ObjFile
<ELF32LE
>::importCmseSymbols();
1549 template void ObjFile
<ELF32BE
>::importCmseSymbols();
1550 template void ObjFile
<ELF64LE
>::importCmseSymbols();
1551 template void ObjFile
<ELF64BE
>::importCmseSymbols();