Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / lld / ELF / ARMErrataFix.cpp
blobcb9ff9ca22aae2c81d9f6b8576b03acc088020b8
1 //===- ARMErrataFix.cpp ---------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // This file implements Section Patching for the purpose of working around the
9 // Cortex-a8 erratum 657417 "A 32bit branch instruction that spans 2 4K regions
10 // can result in an incorrect instruction fetch or processor deadlock." The
11 // erratum affects all but r1p7, r2p5, r2p6, r3p1 and r3p2 revisions of the
12 // Cortex-A8. A high level description of the patching technique is given in
13 // the opening comment of AArch64ErrataFix.cpp.
14 //===----------------------------------------------------------------------===//
16 #include "ARMErrataFix.h"
17 #include "InputFiles.h"
18 #include "LinkerScript.h"
19 #include "OutputSections.h"
20 #include "Relocations.h"
21 #include "Symbols.h"
22 #include "SyntheticSections.h"
23 #include "Target.h"
24 #include "lld/Common/CommonLinkerContext.h"
25 #include "lld/Common/Strings.h"
26 #include "llvm/Support/Endian.h"
27 #include <algorithm>
29 using namespace llvm;
30 using namespace llvm::ELF;
31 using namespace llvm::object;
32 using namespace llvm::support;
33 using namespace llvm::support::endian;
34 using namespace lld;
35 using namespace lld::elf;
37 // The documented title for Erratum 657417 is:
38 // "A 32bit branch instruction that spans two 4K regions can result in an
39 // incorrect instruction fetch or processor deadlock". Graphically using a
40 // 32-bit B.w instruction encoded as a pair of halfwords 0xf7fe 0xbfff
41 // xxxxxx000 // Memory region 1 start
42 // target:
43 // ...
44 // xxxxxxffe f7fe // First halfword of branch to target:
45 // xxxxxx000 // Memory region 2 start
46 // xxxxxx002 bfff // Second halfword of branch to target:
48 // The specific trigger conditions that can be detected at link time are:
49 // - There is a 32-bit Thumb-2 branch instruction with an address of the form
50 // xxxxxxFFE. The first 2 bytes of the instruction are in 4KiB region 1, the
51 // second 2 bytes are in region 2.
52 // - The branch instruction is one of BLX, BL, B.w BCC.w
53 // - The instruction preceding the branch is a 32-bit non-branch instruction.
54 // - The target of the branch is in region 1.
56 // The linker mitigation for the fix is to redirect any branch that meets the
57 // erratum conditions to a patch section containing a branch to the target.
59 // As adding patch sections may move branches onto region boundaries the patch
60 // must iterate until no more patches are added.
62 // Example, before:
63 // 00000FFA func: NOP.w // 32-bit Thumb function
64 // 00000FFE B.W func // 32-bit branch spanning 2 regions, dest in 1st.
65 // Example, after:
66 // 00000FFA func: NOP.w // 32-bit Thumb function
67 // 00000FFE B.w __CortexA8657417_00000FFE
68 // 00001002 2 - bytes padding
69 // 00001004 __CortexA8657417_00000FFE: B.w func
71 class elf::Patch657417Section final : public SyntheticSection {
72 public:
73 Patch657417Section(InputSection *p, uint64_t off, uint32_t instr, bool isARM);
75 void writeTo(uint8_t *buf) override;
77 size_t getSize() const override { return 4; }
79 // Get the virtual address of the branch instruction at patcheeOffset.
80 uint64_t getBranchAddr() const;
82 static bool classof(const SectionBase *d) {
83 return d->kind() == InputSectionBase::Synthetic && d->name ==".text.patch";
86 // The Section we are patching.
87 const InputSection *patchee;
88 // The offset of the instruction in the Patchee section we are patching.
89 uint64_t patcheeOffset;
90 // A label for the start of the Patch that we can use as a relocation target.
91 Symbol *patchSym;
92 // A decoding of the branch instruction at patcheeOffset.
93 uint32_t instr;
94 // True If the patch is to be written in ARM state, otherwise the patch will
95 // be written in Thumb state.
96 bool isARM;
99 // Return true if the half-word, when taken as the first of a pair of halfwords
100 // is the first half of a 32-bit instruction.
101 // Reference from ARM Architecture Reference Manual ARMv7-A and ARMv7-R edition
102 // section A6.3: 32-bit Thumb instruction encoding
103 // | HW1 | HW2 |
104 // | 1 1 1 | op1 (2) | op2 (7) | x (4) |op| x (15) |
105 // With op1 == 0b00, a 16-bit instruction is encoded.
107 // We test only the first halfword, looking for op != 0b00.
108 static bool is32bitInstruction(uint16_t hw) {
109 return (hw & 0xe000) == 0xe000 && (hw & 0x1800) != 0x0000;
112 // Reference from ARM Architecture Reference Manual ARMv7-A and ARMv7-R edition
113 // section A6.3.4 Branches and miscellaneous control.
114 // | HW1 | HW2 |
115 // | 1 1 1 | 1 0 | op (7) | x (4) | 1 | op1 (3) | op2 (4) | imm8 (8) |
116 // op1 == 0x0 op != x111xxx | Conditional branch (Bcc.W)
117 // op1 == 0x1 | Branch (B.W)
118 // op1 == 1x0 | Branch with Link and Exchange (BLX.w)
119 // op1 == 1x1 | Branch with Link (BL.W)
121 static bool isBcc(uint32_t instr) {
122 return (instr & 0xf800d000) == 0xf0008000 &&
123 (instr & 0x03800000) != 0x03800000;
126 static bool isB(uint32_t instr) { return (instr & 0xf800d000) == 0xf0009000; }
128 static bool isBLX(uint32_t instr) { return (instr & 0xf800d000) == 0xf000c000; }
130 static bool isBL(uint32_t instr) { return (instr & 0xf800d000) == 0xf000d000; }
132 static bool is32bitBranch(uint32_t instr) {
133 return isBcc(instr) || isB(instr) || isBL(instr) || isBLX(instr);
136 Patch657417Section::Patch657417Section(InputSection *p, uint64_t off,
137 uint32_t instr, bool isARM)
138 : SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 4,
139 ".text.patch"),
140 patchee(p), patcheeOffset(off), instr(instr), isARM(isARM) {
141 parent = p->getParent();
142 patchSym = addSyntheticLocal(
143 saver().save("__CortexA8657417_" + utohexstr(getBranchAddr())), STT_FUNC,
144 isARM ? 0 : 1, getSize(), *this);
145 addSyntheticLocal(saver().save(isARM ? "$a" : "$t"), STT_NOTYPE, 0, 0, *this);
148 uint64_t Patch657417Section::getBranchAddr() const {
149 return patchee->getVA(patcheeOffset);
152 // Given a branch instruction instr at sourceAddr work out its destination
153 // address. This is only used when the branch instruction has no relocation.
154 static uint64_t getThumbDestAddr(uint64_t sourceAddr, uint32_t instr) {
155 uint8_t buf[4];
156 write16le(buf, instr >> 16);
157 write16le(buf + 2, instr & 0x0000ffff);
158 int64_t offset;
159 if (isBcc(instr))
160 offset = target->getImplicitAddend(buf, R_ARM_THM_JUMP19);
161 else if (isB(instr))
162 offset = target->getImplicitAddend(buf, R_ARM_THM_JUMP24);
163 else
164 offset = target->getImplicitAddend(buf, R_ARM_THM_CALL);
165 // A BLX instruction from Thumb to Arm may have an address that is
166 // not 4-byte aligned. As Arm instructions are always 4-byte aligned
167 // the instruction is calculated (from Arm ARM):
168 // targetAddress = Align(PC, 4) + imm32
169 // where
170 // Align(x, y) = y * (x Div y)
171 // which corresponds to alignDown.
172 if (isBLX(instr))
173 sourceAddr = alignDown(sourceAddr, 4);
174 return sourceAddr + offset + 4;
177 void Patch657417Section::writeTo(uint8_t *buf) {
178 // The base instruction of the patch is always a 32-bit unconditional branch.
179 if (isARM)
180 write32le(buf, 0xea000000);
181 else
182 write32le(buf, 0x9000f000);
183 // If we have a relocation then apply it.
184 if (!relocs().empty()) {
185 target->relocateAlloc(*this, buf);
186 return;
189 // If we don't have a relocation then we must calculate and write the offset
190 // ourselves.
191 // Get the destination offset from the addend in the branch instruction.
192 // We cannot use the instruction in the patchee section as this will have
193 // been altered to point to us!
194 uint64_t s = getThumbDestAddr(getBranchAddr(), instr);
195 // A BLX changes the state of the branch in the patch to Arm state, which
196 // has a PC Bias of 8, whereas in all other cases the branch is in Thumb
197 // state with a PC Bias of 4.
198 uint64_t pcBias = isBLX(instr) ? 8 : 4;
199 uint64_t p = getVA(pcBias);
200 target->relocateNoSym(buf, isARM ? R_ARM_JUMP24 : R_ARM_THM_JUMP24, s - p);
203 // Given a branch instruction spanning two 4KiB regions, at offset off from the
204 // start of isec, return true if the destination of the branch is within the
205 // first of the two 4Kib regions.
206 static bool branchDestInFirstRegion(const InputSection *isec, uint64_t off,
207 uint32_t instr, const Relocation *r) {
208 uint64_t sourceAddr = isec->getVA(0) + off;
209 assert((sourceAddr & 0xfff) == 0xffe);
210 uint64_t destAddr;
211 // If there is a branch relocation at the same offset we must use this to
212 // find the destination address as the branch could be indirected via a thunk
213 // or the PLT.
214 if (r) {
215 uint64_t dst = (r->expr == R_PLT_PC) ? r->sym->getPltVA() : r->sym->getVA();
216 // Account for Thumb PC bias, usually cancelled to 0 by addend of -4.
217 destAddr = dst + r->addend + 4;
218 } else {
219 // If there is no relocation, we must have an intra-section branch
220 // We must extract the offset from the addend manually.
221 destAddr = getThumbDestAddr(sourceAddr, instr);
224 return (destAddr & 0xfffff000) == (sourceAddr & 0xfffff000);
227 // Return true if a branch can reach a patch section placed after isec.
228 // The Bcc.w instruction has a range of 1 MiB, all others have 16 MiB.
229 static bool patchInRange(const InputSection *isec, uint64_t off,
230 uint32_t instr) {
232 // We need the branch at source to reach a patch section placed immediately
233 // after isec. As there can be more than one patch in the patch section we
234 // add 0x100 as contingency to account for worst case of 1 branch every 4KiB
235 // for a 1 MiB range.
236 return target->inBranchRange(
237 isBcc(instr) ? R_ARM_THM_JUMP19 : R_ARM_THM_JUMP24, isec->getVA(off),
238 isec->getVA() + isec->getSize() + 0x100);
241 struct ScanResult {
242 // Offset of branch within its InputSection.
243 uint64_t off;
244 // Cached decoding of the branch instruction.
245 uint32_t instr;
246 // Branch relocation at off. Will be nullptr if no relocation exists.
247 Relocation *rel;
250 // Detect the erratum sequence, returning the offset of the branch instruction
251 // and a decoding of the branch. If the erratum sequence is not found then
252 // return an offset of 0 for the branch. 0 is a safe value to use for no patch
253 // as there must be at least one 32-bit non-branch instruction before the
254 // branch so the minimum offset for a patch is 4.
255 static ScanResult scanCortexA8Errata657417(InputSection *isec, uint64_t &off,
256 uint64_t limit) {
257 uint64_t isecAddr = isec->getVA(0);
258 // Advance Off so that (isecAddr + off) modulo 0x1000 is at least 0xffa. We
259 // need to check for a 32-bit instruction immediately before a 32-bit branch
260 // at 0xffe modulo 0x1000.
261 off = alignTo(isecAddr + off, 0x1000, 0xffa) - isecAddr;
262 if (off >= limit || limit - off < 8) {
263 // Need at least 2 4-byte sized instructions to trigger erratum.
264 off = limit;
265 return {0, 0, nullptr};
268 ScanResult scanRes = {0, 0, nullptr};
269 const uint8_t *buf = isec->content().begin();
270 // ARMv7-A Thumb 32-bit instructions are encoded 2 consecutive
271 // little-endian halfwords.
272 const ulittle16_t *instBuf = reinterpret_cast<const ulittle16_t *>(buf + off);
273 uint16_t hw11 = *instBuf++;
274 uint16_t hw12 = *instBuf++;
275 uint16_t hw21 = *instBuf++;
276 uint16_t hw22 = *instBuf++;
277 if (is32bitInstruction(hw11) && is32bitInstruction(hw21)) {
278 uint32_t instr1 = (hw11 << 16) | hw12;
279 uint32_t instr2 = (hw21 << 16) | hw22;
280 if (!is32bitBranch(instr1) && is32bitBranch(instr2)) {
281 // Find a relocation for the branch if it exists. This will be used
282 // to determine the target.
283 uint64_t branchOff = off + 4;
284 auto relIt = llvm::find_if(isec->relocs(), [=](const Relocation &r) {
285 return r.offset == branchOff &&
286 (r.type == R_ARM_THM_JUMP19 || r.type == R_ARM_THM_JUMP24 ||
287 r.type == R_ARM_THM_CALL);
289 if (relIt != isec->relocs().end())
290 scanRes.rel = &(*relIt);
291 if (branchDestInFirstRegion(isec, branchOff, instr2, scanRes.rel)) {
292 if (patchInRange(isec, branchOff, instr2)) {
293 scanRes.off = branchOff;
294 scanRes.instr = instr2;
295 } else {
296 warn(toString(isec->file) +
297 ": skipping cortex-a8 657417 erratum sequence, section " +
298 isec->name + " is too large to patch");
303 off += 0x1000;
304 return scanRes;
307 void ARMErr657417Patcher::init() {
308 // The Arm ABI permits a mix of ARM, Thumb and Data in the same
309 // InputSection. We must only scan Thumb instructions to avoid false
310 // matches. We use the mapping symbols in the InputObjects to identify this
311 // data, caching the results in sectionMap so we don't have to recalculate
312 // it each pass.
314 // The ABI Section 4.5.5 Mapping symbols; defines local symbols that describe
315 // half open intervals [Symbol Value, Next Symbol Value) of code and data
316 // within sections. If there is no next symbol then the half open interval is
317 // [Symbol Value, End of section). The type, code or data, is determined by
318 // the mapping symbol name, $a for Arm code, $t for Thumb code, $d for data.
319 auto isArmMapSymbol = [](const Symbol *s) {
320 return s->getName() == "$a" || s->getName().starts_with("$a.");
322 auto isThumbMapSymbol = [](const Symbol *s) {
323 return s->getName() == "$t" || s->getName().starts_with("$t.");
325 auto isDataMapSymbol = [](const Symbol *s) {
326 return s->getName() == "$d" || s->getName().starts_with("$d.");
329 // Collect mapping symbols for every executable InputSection.
330 for (ELFFileBase *file : ctx.objectFiles) {
331 for (Symbol *s : file->getLocalSymbols()) {
332 auto *def = dyn_cast<Defined>(s);
333 if (!def)
334 continue;
335 if (!isArmMapSymbol(def) && !isThumbMapSymbol(def) &&
336 !isDataMapSymbol(def))
337 continue;
338 if (auto *sec = dyn_cast_or_null<InputSection>(def->section))
339 if (sec->flags & SHF_EXECINSTR)
340 sectionMap[sec].push_back(def);
343 // For each InputSection make sure the mapping symbols are in sorted in
344 // ascending order and are in alternating Thumb, non-Thumb order.
345 for (auto &kv : sectionMap) {
346 std::vector<const Defined *> &mapSyms = kv.second;
347 llvm::stable_sort(mapSyms, [](const Defined *a, const Defined *b) {
348 return a->value < b->value;
350 mapSyms.erase(std::unique(mapSyms.begin(), mapSyms.end(),
351 [=](const Defined *a, const Defined *b) {
352 return (isThumbMapSymbol(a) ==
353 isThumbMapSymbol(b));
355 mapSyms.end());
356 // Always start with a Thumb Mapping Symbol
357 if (!mapSyms.empty() && !isThumbMapSymbol(mapSyms.front()))
358 mapSyms.erase(mapSyms.begin());
360 initialized = true;
363 void ARMErr657417Patcher::insertPatches(
364 InputSectionDescription &isd, std::vector<Patch657417Section *> &patches) {
365 uint64_t spacing = 0x100000 - 0x7500;
366 uint64_t isecLimit;
367 uint64_t prevIsecLimit = isd.sections.front()->outSecOff;
368 uint64_t patchUpperBound = prevIsecLimit + spacing;
369 uint64_t outSecAddr = isd.sections.front()->getParent()->addr;
371 // Set the outSecOff of patches to the place where we want to insert them.
372 // We use a similar strategy to initial thunk placement, using 1 MiB as the
373 // range of the Thumb-2 conditional branch with a contingency accounting for
374 // thunk generation.
375 auto patchIt = patches.begin();
376 auto patchEnd = patches.end();
377 for (const InputSection *isec : isd.sections) {
378 isecLimit = isec->outSecOff + isec->getSize();
379 if (isecLimit > patchUpperBound) {
380 for (; patchIt != patchEnd; ++patchIt) {
381 if ((*patchIt)->getBranchAddr() - outSecAddr >= prevIsecLimit)
382 break;
383 (*patchIt)->outSecOff = prevIsecLimit;
385 patchUpperBound = prevIsecLimit + spacing;
387 prevIsecLimit = isecLimit;
389 for (; patchIt != patchEnd; ++patchIt)
390 (*patchIt)->outSecOff = isecLimit;
392 // Merge all patch sections. We use the outSecOff assigned above to
393 // determine the insertion point. This is ok as we only merge into an
394 // InputSectionDescription once per pass, and at the end of the pass
395 // assignAddresses() will recalculate all the outSecOff values.
396 SmallVector<InputSection *, 0> tmp;
397 tmp.reserve(isd.sections.size() + patches.size());
398 auto mergeCmp = [](const InputSection *a, const InputSection *b) {
399 if (a->outSecOff != b->outSecOff)
400 return a->outSecOff < b->outSecOff;
401 return isa<Patch657417Section>(a) && !isa<Patch657417Section>(b);
403 std::merge(isd.sections.begin(), isd.sections.end(), patches.begin(),
404 patches.end(), std::back_inserter(tmp), mergeCmp);
405 isd.sections = std::move(tmp);
408 // Given a branch instruction described by ScanRes redirect it to a patch
409 // section containing an unconditional branch instruction to the target.
410 // Ensure that this patch section is 4-byte aligned so that the branch cannot
411 // span two 4 KiB regions. Place the patch section so that it is always after
412 // isec so the branch we are patching always goes forwards.
413 static void implementPatch(ScanResult sr, InputSection *isec,
414 std::vector<Patch657417Section *> &patches) {
416 log("detected cortex-a8-657419 erratum sequence starting at " +
417 utohexstr(isec->getVA(sr.off)) + " in unpatched output.");
418 Patch657417Section *psec;
419 // We have two cases to deal with.
420 // Case 1. There is a relocation at patcheeOffset to a symbol. The
421 // unconditional branch in the patch must have a relocation so that any
422 // further redirection via the PLT or a Thunk happens as normal. At
423 // patcheeOffset we redirect the existing relocation to a Symbol defined at
424 // the start of the patch section.
426 // Case 2. There is no relocation at patcheeOffset. We are unlikely to have
427 // a symbol that we can use as a target for a relocation in the patch section.
428 // Luckily we know that the destination cannot be indirected via the PLT or
429 // a Thunk so we can just write the destination directly.
430 if (sr.rel) {
431 // Case 1. We have an existing relocation to redirect to patch and a
432 // Symbol target.
434 // Create a branch relocation for the unconditional branch in the patch.
435 // This can be redirected via the PLT or Thunks.
436 RelType patchRelType = R_ARM_THM_JUMP24;
437 int64_t patchRelAddend = sr.rel->addend;
438 bool destIsARM = false;
439 if (isBL(sr.instr) || isBLX(sr.instr)) {
440 // The final target of the branch may be ARM or Thumb, if the target
441 // is ARM then we write the patch in ARM state to avoid a state change
442 // Thunk from the patch to the target.
443 uint64_t dstSymAddr = (sr.rel->expr == R_PLT_PC) ? sr.rel->sym->getPltVA()
444 : sr.rel->sym->getVA();
445 destIsARM = (dstSymAddr & 1) == 0;
447 psec = make<Patch657417Section>(isec, sr.off, sr.instr, destIsARM);
448 if (destIsARM) {
449 // The patch will be in ARM state. Use an ARM relocation and account for
450 // the larger ARM PC-bias of 8 rather than Thumb's 4.
451 patchRelType = R_ARM_JUMP24;
452 patchRelAddend -= 4;
454 psec->addReloc(
455 Relocation{sr.rel->expr, patchRelType, 0, patchRelAddend, sr.rel->sym});
456 // Redirect the existing branch relocation to the patch.
457 sr.rel->expr = R_PC;
458 sr.rel->addend = -4;
459 sr.rel->sym = psec->patchSym;
460 } else {
461 // Case 2. We do not have a relocation to the patch. Add a relocation of the
462 // appropriate type to the patch at patcheeOffset.
464 // The destination is ARM if we have a BLX.
465 psec = make<Patch657417Section>(isec, sr.off, sr.instr, isBLX(sr.instr));
466 RelType type;
467 if (isBcc(sr.instr))
468 type = R_ARM_THM_JUMP19;
469 else if (isB(sr.instr))
470 type = R_ARM_THM_JUMP24;
471 else
472 type = R_ARM_THM_CALL;
473 isec->addReloc(Relocation{R_PC, type, sr.off, -4, psec->patchSym});
475 patches.push_back(psec);
478 // Scan all the instructions in InputSectionDescription, for each instance of
479 // the erratum sequence create a Patch657417Section. We return the list of
480 // Patch657417Sections that need to be applied to the InputSectionDescription.
481 std::vector<Patch657417Section *>
482 ARMErr657417Patcher::patchInputSectionDescription(
483 InputSectionDescription &isd) {
484 std::vector<Patch657417Section *> patches;
485 for (InputSection *isec : isd.sections) {
486 // LLD doesn't use the erratum sequence in SyntheticSections.
487 if (isa<SyntheticSection>(isec))
488 continue;
489 // Use sectionMap to make sure we only scan Thumb code and not Arm or inline
490 // data. We have already sorted mapSyms in ascending order and removed
491 // consecutive mapping symbols of the same type. Our range of executable
492 // instructions to scan is therefore [thumbSym->value, nonThumbSym->value)
493 // or [thumbSym->value, section size).
494 std::vector<const Defined *> &mapSyms = sectionMap[isec];
496 auto thumbSym = mapSyms.begin();
497 while (thumbSym != mapSyms.end()) {
498 auto nonThumbSym = std::next(thumbSym);
499 uint64_t off = (*thumbSym)->value;
500 uint64_t limit = nonThumbSym == mapSyms.end() ? isec->content().size()
501 : (*nonThumbSym)->value;
503 while (off < limit) {
504 ScanResult sr = scanCortexA8Errata657417(isec, off, limit);
505 if (sr.off)
506 implementPatch(sr, isec, patches);
508 if (nonThumbSym == mapSyms.end())
509 break;
510 thumbSym = std::next(nonThumbSym);
513 return patches;
516 bool ARMErr657417Patcher::createFixes() {
517 if (!initialized)
518 init();
520 bool addressesChanged = false;
521 for (OutputSection *os : outputSections) {
522 if (!(os->flags & SHF_ALLOC) || !(os->flags & SHF_EXECINSTR))
523 continue;
524 for (SectionCommand *cmd : os->commands)
525 if (auto *isd = dyn_cast<InputSectionDescription>(cmd)) {
526 std::vector<Patch657417Section *> patches =
527 patchInputSectionDescription(*isd);
528 if (!patches.empty()) {
529 insertPatches(*isd, patches);
530 addressesChanged = true;
534 return addressesChanged;