1 //===- ARMErrataFix.cpp ---------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
8 // This file implements Section Patching for the purpose of working around the
9 // Cortex-a8 erratum 657417 "A 32bit branch instruction that spans 2 4K regions
10 // can result in an incorrect instruction fetch or processor deadlock." The
11 // erratum affects all but r1p7, r2p5, r2p6, r3p1 and r3p2 revisions of the
12 // Cortex-A8. A high level description of the patching technique is given in
13 // the opening comment of AArch64ErrataFix.cpp.
14 //===----------------------------------------------------------------------===//
16 #include "ARMErrataFix.h"
17 #include "InputFiles.h"
18 #include "LinkerScript.h"
19 #include "OutputSections.h"
20 #include "Relocations.h"
22 #include "SyntheticSections.h"
24 #include "lld/Common/CommonLinkerContext.h"
25 #include "lld/Common/Strings.h"
26 #include "llvm/Support/Endian.h"
30 using namespace llvm::ELF
;
31 using namespace llvm::object
;
32 using namespace llvm::support
;
33 using namespace llvm::support::endian
;
35 using namespace lld::elf
;
37 // The documented title for Erratum 657417 is:
38 // "A 32bit branch instruction that spans two 4K regions can result in an
39 // incorrect instruction fetch or processor deadlock". Graphically using a
40 // 32-bit B.w instruction encoded as a pair of halfwords 0xf7fe 0xbfff
41 // xxxxxx000 // Memory region 1 start
44 // xxxxxxffe f7fe // First halfword of branch to target:
45 // xxxxxx000 // Memory region 2 start
46 // xxxxxx002 bfff // Second halfword of branch to target:
48 // The specific trigger conditions that can be detected at link time are:
49 // - There is a 32-bit Thumb-2 branch instruction with an address of the form
50 // xxxxxxFFE. The first 2 bytes of the instruction are in 4KiB region 1, the
51 // second 2 bytes are in region 2.
52 // - The branch instruction is one of BLX, BL, B.w BCC.w
53 // - The instruction preceding the branch is a 32-bit non-branch instruction.
54 // - The target of the branch is in region 1.
56 // The linker mitigation for the fix is to redirect any branch that meets the
57 // erratum conditions to a patch section containing a branch to the target.
59 // As adding patch sections may move branches onto region boundaries the patch
60 // must iterate until no more patches are added.
63 // 00000FFA func: NOP.w // 32-bit Thumb function
64 // 00000FFE B.W func // 32-bit branch spanning 2 regions, dest in 1st.
66 // 00000FFA func: NOP.w // 32-bit Thumb function
67 // 00000FFE B.w __CortexA8657417_00000FFE
68 // 00001002 2 - bytes padding
69 // 00001004 __CortexA8657417_00000FFE: B.w func
71 class elf::Patch657417Section final
: public SyntheticSection
{
73 Patch657417Section(Ctx
&, InputSection
*p
, uint64_t off
, uint32_t instr
,
76 void writeTo(uint8_t *buf
) override
;
78 size_t getSize() const override
{ return 4; }
80 // Get the virtual address of the branch instruction at patcheeOffset.
81 uint64_t getBranchAddr() const;
83 static bool classof(const SectionBase
*d
) {
84 return d
->kind() == InputSectionBase::Synthetic
&& d
->name
==".text.patch";
87 // The Section we are patching.
88 const InputSection
*patchee
;
89 // The offset of the instruction in the Patchee section we are patching.
90 uint64_t patcheeOffset
;
91 // A label for the start of the Patch that we can use as a relocation target.
93 // A decoding of the branch instruction at patcheeOffset.
95 // True If the patch is to be written in ARM state, otherwise the patch will
96 // be written in Thumb state.
100 // Return true if the half-word, when taken as the first of a pair of halfwords
101 // is the first half of a 32-bit instruction.
102 // Reference from ARM Architecture Reference Manual ARMv7-A and ARMv7-R edition
103 // section A6.3: 32-bit Thumb instruction encoding
105 // | 1 1 1 | op1 (2) | op2 (7) | x (4) |op| x (15) |
106 // With op1 == 0b00, a 16-bit instruction is encoded.
108 // We test only the first halfword, looking for op != 0b00.
109 static bool is32bitInstruction(uint16_t hw
) {
110 return (hw
& 0xe000) == 0xe000 && (hw
& 0x1800) != 0x0000;
113 // Reference from ARM Architecture Reference Manual ARMv7-A and ARMv7-R edition
114 // section A6.3.4 Branches and miscellaneous control.
116 // | 1 1 1 | 1 0 | op (7) | x (4) | 1 | op1 (3) | op2 (4) | imm8 (8) |
117 // op1 == 0x0 op != x111xxx | Conditional branch (Bcc.W)
118 // op1 == 0x1 | Branch (B.W)
119 // op1 == 1x0 | Branch with Link and Exchange (BLX.w)
120 // op1 == 1x1 | Branch with Link (BL.W)
122 static bool isBcc(uint32_t instr
) {
123 return (instr
& 0xf800d000) == 0xf0008000 &&
124 (instr
& 0x03800000) != 0x03800000;
127 static bool isB(uint32_t instr
) { return (instr
& 0xf800d000) == 0xf0009000; }
129 static bool isBLX(uint32_t instr
) { return (instr
& 0xf800d000) == 0xf000c000; }
131 static bool isBL(uint32_t instr
) { return (instr
& 0xf800d000) == 0xf000d000; }
133 static bool is32bitBranch(uint32_t instr
) {
134 return isBcc(instr
) || isB(instr
) || isBL(instr
) || isBLX(instr
);
137 Patch657417Section::Patch657417Section(Ctx
&ctx
, InputSection
*p
, uint64_t off
,
138 uint32_t instr
, bool isARM
)
139 : SyntheticSection(ctx
, SHF_ALLOC
| SHF_EXECINSTR
, SHT_PROGBITS
, 4,
141 patchee(p
), patcheeOffset(off
), instr(instr
), isARM(isARM
) {
142 parent
= p
->getParent();
143 patchSym
= addSyntheticLocal(
144 ctx
, ctx
.saver
.save("__CortexA8657417_" + utohexstr(getBranchAddr())),
145 STT_FUNC
, isARM
? 0 : 1, getSize(), *this);
146 addSyntheticLocal(ctx
, ctx
.saver
.save(isARM
? "$a" : "$t"), STT_NOTYPE
, 0, 0,
150 uint64_t Patch657417Section::getBranchAddr() const {
151 return patchee
->getVA(patcheeOffset
);
154 // Given a branch instruction instr at sourceAddr work out its destination
155 // address. This is only used when the branch instruction has no relocation.
156 static uint64_t getThumbDestAddr(Ctx
&ctx
, uint64_t sourceAddr
,
159 write16le(buf
, instr
>> 16);
160 write16le(buf
+ 2, instr
& 0x0000ffff);
163 offset
= ctx
.target
->getImplicitAddend(buf
, R_ARM_THM_JUMP19
);
165 offset
= ctx
.target
->getImplicitAddend(buf
, R_ARM_THM_JUMP24
);
167 offset
= ctx
.target
->getImplicitAddend(buf
, R_ARM_THM_CALL
);
168 // A BLX instruction from Thumb to Arm may have an address that is
169 // not 4-byte aligned. As Arm instructions are always 4-byte aligned
170 // the instruction is calculated (from Arm ARM):
171 // targetAddress = Align(PC, 4) + imm32
173 // Align(x, y) = y * (x Div y)
174 // which corresponds to alignDown.
176 sourceAddr
= alignDown(sourceAddr
, 4);
177 return sourceAddr
+ offset
+ 4;
180 void Patch657417Section::writeTo(uint8_t *buf
) {
181 // The base instruction of the patch is always a 32-bit unconditional branch.
183 write32le(buf
, 0xea000000);
185 write32le(buf
, 0x9000f000);
186 // If we have a relocation then apply it.
187 if (!relocs().empty()) {
188 ctx
.target
->relocateAlloc(*this, buf
);
192 // If we don't have a relocation then we must calculate and write the offset
194 // Get the destination offset from the addend in the branch instruction.
195 // We cannot use the instruction in the patchee section as this will have
196 // been altered to point to us!
197 uint64_t s
= getThumbDestAddr(ctx
, getBranchAddr(), instr
);
198 // A BLX changes the state of the branch in the patch to Arm state, which
199 // has a PC Bias of 8, whereas in all other cases the branch is in Thumb
200 // state with a PC Bias of 4.
201 uint64_t pcBias
= isBLX(instr
) ? 8 : 4;
202 uint64_t p
= getVA(pcBias
);
203 ctx
.target
->relocateNoSym(buf
, isARM
? R_ARM_JUMP24
: R_ARM_THM_JUMP24
,
207 // Given a branch instruction spanning two 4KiB regions, at offset off from the
208 // start of isec, return true if the destination of the branch is within the
209 // first of the two 4Kib regions.
210 static bool branchDestInFirstRegion(Ctx
&ctx
, const InputSection
*isec
,
211 uint64_t off
, uint32_t instr
,
212 const Relocation
*r
) {
213 uint64_t sourceAddr
= isec
->getVA(0) + off
;
214 assert((sourceAddr
& 0xfff) == 0xffe);
216 // If there is a branch relocation at the same offset we must use this to
217 // find the destination address as the branch could be indirected via a thunk
221 r
->expr
== R_PLT_PC
? r
->sym
->getPltVA(ctx
) : r
->sym
->getVA(ctx
);
222 // Account for Thumb PC bias, usually cancelled to 0 by addend of -4.
223 destAddr
= dst
+ r
->addend
+ 4;
225 // If there is no relocation, we must have an intra-section branch
226 // We must extract the offset from the addend manually.
227 destAddr
= getThumbDestAddr(ctx
, sourceAddr
, instr
);
230 return (destAddr
& 0xfffff000) == (sourceAddr
& 0xfffff000);
233 // Return true if a branch can reach a patch section placed after isec.
234 // The Bcc.w instruction has a range of 1 MiB, all others have 16 MiB.
235 static bool patchInRange(Ctx
&ctx
, const InputSection
*isec
, uint64_t off
,
238 // We need the branch at source to reach a patch section placed immediately
239 // after isec. As there can be more than one patch in the patch section we
240 // add 0x100 as contingency to account for worst case of 1 branch every 4KiB
241 // for a 1 MiB range.
242 return ctx
.target
->inBranchRange(
243 isBcc(instr
) ? R_ARM_THM_JUMP19
: R_ARM_THM_JUMP24
, isec
->getVA(off
),
244 isec
->getVA() + isec
->getSize() + 0x100);
248 // Offset of branch within its InputSection.
250 // Cached decoding of the branch instruction.
252 // Branch relocation at off. Will be nullptr if no relocation exists.
256 // Detect the erratum sequence, returning the offset of the branch instruction
257 // and a decoding of the branch. If the erratum sequence is not found then
258 // return an offset of 0 for the branch. 0 is a safe value to use for no patch
259 // as there must be at least one 32-bit non-branch instruction before the
260 // branch so the minimum offset for a patch is 4.
261 static ScanResult
scanCortexA8Errata657417(InputSection
*isec
, uint64_t &off
,
263 Ctx
&ctx
= isec
->getCtx();
264 uint64_t isecAddr
= isec
->getVA(0);
265 // Advance Off so that (isecAddr + off) modulo 0x1000 is at least 0xffa. We
266 // need to check for a 32-bit instruction immediately before a 32-bit branch
267 // at 0xffe modulo 0x1000.
268 off
= alignTo(isecAddr
+ off
, 0x1000, 0xffa) - isecAddr
;
269 if (off
>= limit
|| limit
- off
< 8) {
270 // Need at least 2 4-byte sized instructions to trigger erratum.
272 return {0, 0, nullptr};
275 ScanResult scanRes
= {0, 0, nullptr};
276 const uint8_t *buf
= isec
->content().begin();
277 // ARMv7-A Thumb 32-bit instructions are encoded 2 consecutive
278 // little-endian halfwords.
279 const ulittle16_t
*instBuf
= reinterpret_cast<const ulittle16_t
*>(buf
+ off
);
280 uint16_t hw11
= *instBuf
++;
281 uint16_t hw12
= *instBuf
++;
282 uint16_t hw21
= *instBuf
++;
283 uint16_t hw22
= *instBuf
++;
284 if (is32bitInstruction(hw11
) && is32bitInstruction(hw21
)) {
285 uint32_t instr1
= (hw11
<< 16) | hw12
;
286 uint32_t instr2
= (hw21
<< 16) | hw22
;
287 if (!is32bitBranch(instr1
) && is32bitBranch(instr2
)) {
288 // Find a relocation for the branch if it exists. This will be used
289 // to determine the target.
290 uint64_t branchOff
= off
+ 4;
291 auto relIt
= llvm::find_if(isec
->relocs(), [=](const Relocation
&r
) {
292 return r
.offset
== branchOff
&&
293 (r
.type
== R_ARM_THM_JUMP19
|| r
.type
== R_ARM_THM_JUMP24
||
294 r
.type
== R_ARM_THM_CALL
);
296 if (relIt
!= isec
->relocs().end())
297 scanRes
.rel
= &(*relIt
);
298 if (branchDestInFirstRegion(ctx
, isec
, branchOff
, instr2
, scanRes
.rel
)) {
299 if (patchInRange(ctx
, isec
, branchOff
, instr2
)) {
300 scanRes
.off
= branchOff
;
301 scanRes
.instr
= instr2
;
303 Warn(ctx
) << isec
->file
304 << ": skipping cortex-a8 657417 erratum sequence, section "
305 << isec
->name
<< " is too large to patch";
314 void ARMErr657417Patcher::init() {
315 // The Arm ABI permits a mix of ARM, Thumb and Data in the same
316 // InputSection. We must only scan Thumb instructions to avoid false
317 // matches. We use the mapping symbols in the InputObjects to identify this
318 // data, caching the results in sectionMap so we don't have to recalculate
321 // The ABI Section 4.5.5 Mapping symbols; defines local symbols that describe
322 // half open intervals [Symbol Value, Next Symbol Value) of code and data
323 // within sections. If there is no next symbol then the half open interval is
324 // [Symbol Value, End of section). The type, code or data, is determined by
325 // the mapping symbol name, $a for Arm code, $t for Thumb code, $d for data.
326 auto isArmMapSymbol
= [](const Symbol
*s
) {
327 return s
->getName() == "$a" || s
->getName().starts_with("$a.");
329 auto isThumbMapSymbol
= [](const Symbol
*s
) {
330 return s
->getName() == "$t" || s
->getName().starts_with("$t.");
332 auto isDataMapSymbol
= [](const Symbol
*s
) {
333 return s
->getName() == "$d" || s
->getName().starts_with("$d.");
336 // Collect mapping symbols for every executable InputSection.
337 for (ELFFileBase
*file
: ctx
.objectFiles
) {
338 for (Symbol
*s
: file
->getLocalSymbols()) {
339 auto *def
= dyn_cast
<Defined
>(s
);
342 if (!isArmMapSymbol(def
) && !isThumbMapSymbol(def
) &&
343 !isDataMapSymbol(def
))
345 if (auto *sec
= dyn_cast_or_null
<InputSection
>(def
->section
))
346 if (sec
->flags
& SHF_EXECINSTR
)
347 sectionMap
[sec
].push_back(def
);
350 // For each InputSection make sure the mapping symbols are in sorted in
351 // ascending order and are in alternating Thumb, non-Thumb order.
352 for (auto &kv
: sectionMap
) {
353 std::vector
<const Defined
*> &mapSyms
= kv
.second
;
354 llvm::stable_sort(mapSyms
, [](const Defined
*a
, const Defined
*b
) {
355 return a
->value
< b
->value
;
357 mapSyms
.erase(std::unique(mapSyms
.begin(), mapSyms
.end(),
358 [=](const Defined
*a
, const Defined
*b
) {
359 return (isThumbMapSymbol(a
) ==
360 isThumbMapSymbol(b
));
363 // Always start with a Thumb Mapping Symbol
364 if (!mapSyms
.empty() && !isThumbMapSymbol(mapSyms
.front()))
365 mapSyms
.erase(mapSyms
.begin());
370 void ARMErr657417Patcher::insertPatches(
371 InputSectionDescription
&isd
, std::vector
<Patch657417Section
*> &patches
) {
372 uint64_t spacing
= 0x100000 - 0x7500;
374 uint64_t prevIsecLimit
= isd
.sections
.front()->outSecOff
;
375 uint64_t patchUpperBound
= prevIsecLimit
+ spacing
;
376 uint64_t outSecAddr
= isd
.sections
.front()->getParent()->addr
;
378 // Set the outSecOff of patches to the place where we want to insert them.
379 // We use a similar strategy to initial thunk placement, using 1 MiB as the
380 // range of the Thumb-2 conditional branch with a contingency accounting for
382 auto patchIt
= patches
.begin();
383 auto patchEnd
= patches
.end();
384 for (const InputSection
*isec
: isd
.sections
) {
385 isecLimit
= isec
->outSecOff
+ isec
->getSize();
386 if (isecLimit
> patchUpperBound
) {
387 for (; patchIt
!= patchEnd
; ++patchIt
) {
388 if ((*patchIt
)->getBranchAddr() - outSecAddr
>= prevIsecLimit
)
390 (*patchIt
)->outSecOff
= prevIsecLimit
;
392 patchUpperBound
= prevIsecLimit
+ spacing
;
394 prevIsecLimit
= isecLimit
;
396 for (; patchIt
!= patchEnd
; ++patchIt
)
397 (*patchIt
)->outSecOff
= isecLimit
;
399 // Merge all patch sections. We use the outSecOff assigned above to
400 // determine the insertion point. This is ok as we only merge into an
401 // InputSectionDescription once per pass, and at the end of the pass
402 // assignAddresses() will recalculate all the outSecOff values.
403 SmallVector
<InputSection
*, 0> tmp
;
404 tmp
.reserve(isd
.sections
.size() + patches
.size());
405 auto mergeCmp
= [](const InputSection
*a
, const InputSection
*b
) {
406 if (a
->outSecOff
!= b
->outSecOff
)
407 return a
->outSecOff
< b
->outSecOff
;
408 return isa
<Patch657417Section
>(a
) && !isa
<Patch657417Section
>(b
);
410 std::merge(isd
.sections
.begin(), isd
.sections
.end(), patches
.begin(),
411 patches
.end(), std::back_inserter(tmp
), mergeCmp
);
412 isd
.sections
= std::move(tmp
);
415 // Given a branch instruction described by ScanRes redirect it to a patch
416 // section containing an unconditional branch instruction to the target.
417 // Ensure that this patch section is 4-byte aligned so that the branch cannot
418 // span two 4 KiB regions. Place the patch section so that it is always after
419 // isec so the branch we are patching always goes forwards.
420 static void implementPatch(ScanResult sr
, InputSection
*isec
,
421 std::vector
<Patch657417Section
*> &patches
) {
422 Ctx
&ctx
= isec
->getCtx();
423 Log(ctx
) << "detected cortex-a8-657419 erratum sequence starting at " <<
424 utohexstr(isec
->getVA(sr
.off
)) << " in unpatched output";
425 Patch657417Section
*psec
;
426 // We have two cases to deal with.
427 // Case 1. There is a relocation at patcheeOffset to a symbol. The
428 // unconditional branch in the patch must have a relocation so that any
429 // further redirection via the PLT or a Thunk happens as normal. At
430 // patcheeOffset we redirect the existing relocation to a Symbol defined at
431 // the start of the patch section.
433 // Case 2. There is no relocation at patcheeOffset. We are unlikely to have
434 // a symbol that we can use as a target for a relocation in the patch section.
435 // Luckily we know that the destination cannot be indirected via the PLT or
436 // a Thunk so we can just write the destination directly.
438 // Case 1. We have an existing relocation to redirect to patch and a
441 // Create a branch relocation for the unconditional branch in the patch.
442 // This can be redirected via the PLT or Thunks.
443 RelType patchRelType
= R_ARM_THM_JUMP24
;
444 int64_t patchRelAddend
= sr
.rel
->addend
;
445 bool destIsARM
= false;
446 if (isBL(sr
.instr
) || isBLX(sr
.instr
)) {
447 // The final target of the branch may be ARM or Thumb, if the target
448 // is ARM then we write the patch in ARM state to avoid a state change
449 // Thunk from the patch to the target.
450 uint64_t dstSymAddr
= (sr
.rel
->expr
== R_PLT_PC
)
451 ? sr
.rel
->sym
->getPltVA(ctx
)
452 : sr
.rel
->sym
->getVA(ctx
);
453 destIsARM
= (dstSymAddr
& 1) == 0;
455 psec
= make
<Patch657417Section
>(ctx
, isec
, sr
.off
, sr
.instr
, destIsARM
);
457 // The patch will be in ARM state. Use an ARM relocation and account for
458 // the larger ARM PC-bias of 8 rather than Thumb's 4.
459 patchRelType
= R_ARM_JUMP24
;
463 Relocation
{sr
.rel
->expr
, patchRelType
, 0, patchRelAddend
, sr
.rel
->sym
});
464 // Redirect the existing branch relocation to the patch.
467 sr
.rel
->sym
= psec
->patchSym
;
469 // Case 2. We do not have a relocation to the patch. Add a relocation of the
470 // appropriate type to the patch at patcheeOffset.
472 // The destination is ARM if we have a BLX.
474 make
<Patch657417Section
>(ctx
, isec
, sr
.off
, sr
.instr
, isBLX(sr
.instr
));
477 type
= R_ARM_THM_JUMP19
;
478 else if (isB(sr
.instr
))
479 type
= R_ARM_THM_JUMP24
;
481 type
= R_ARM_THM_CALL
;
482 isec
->addReloc(Relocation
{R_PC
, type
, sr
.off
, -4, psec
->patchSym
});
484 patches
.push_back(psec
);
487 // Scan all the instructions in InputSectionDescription, for each instance of
488 // the erratum sequence create a Patch657417Section. We return the list of
489 // Patch657417Sections that need to be applied to the InputSectionDescription.
490 std::vector
<Patch657417Section
*>
491 ARMErr657417Patcher::patchInputSectionDescription(
492 InputSectionDescription
&isd
) {
493 std::vector
<Patch657417Section
*> patches
;
494 for (InputSection
*isec
: isd
.sections
) {
495 // LLD doesn't use the erratum sequence in SyntheticSections.
496 if (isa
<SyntheticSection
>(isec
))
498 // Use sectionMap to make sure we only scan Thumb code and not Arm or inline
499 // data. We have already sorted mapSyms in ascending order and removed
500 // consecutive mapping symbols of the same type. Our range of executable
501 // instructions to scan is therefore [thumbSym->value, nonThumbSym->value)
502 // or [thumbSym->value, section size).
503 std::vector
<const Defined
*> &mapSyms
= sectionMap
[isec
];
505 auto thumbSym
= mapSyms
.begin();
506 while (thumbSym
!= mapSyms
.end()) {
507 auto nonThumbSym
= std::next(thumbSym
);
508 uint64_t off
= (*thumbSym
)->value
;
509 uint64_t limit
= nonThumbSym
== mapSyms
.end() ? isec
->content().size()
510 : (*nonThumbSym
)->value
;
512 while (off
< limit
) {
513 ScanResult sr
= scanCortexA8Errata657417(isec
, off
, limit
);
515 implementPatch(sr
, isec
, patches
);
517 if (nonThumbSym
== mapSyms
.end())
519 thumbSym
= std::next(nonThumbSym
);
525 bool ARMErr657417Patcher::createFixes() {
529 bool addressesChanged
= false;
530 for (OutputSection
*os
: ctx
.outputSections
) {
531 if (!(os
->flags
& SHF_ALLOC
) || !(os
->flags
& SHF_EXECINSTR
))
533 for (SectionCommand
*cmd
: os
->commands
)
534 if (auto *isd
= dyn_cast
<InputSectionDescription
>(cmd
)) {
535 std::vector
<Patch657417Section
*> patches
=
536 patchInputSectionDescription(*isd
);
537 if (!patches
.empty()) {
538 insertPatches(*isd
, patches
);
539 addressesChanged
= true;
543 return addressesChanged
;