[SLP][REVEC] The vectorized result for ShuffleVector may not be ShuffleVectorInst...
[llvm-project.git] / bolt / lib / Rewrite / LinuxKernelRewriter.cpp
blob03b414b71caca77ace98c99c11440241db258a99
1 //===- bolt/Rewrite/LinuxKernelRewriter.cpp -------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Support for updating Linux Kernel metadata.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Core/BinaryFunction.h"
14 #include "bolt/Rewrite/MetadataRewriter.h"
15 #include "bolt/Rewrite/MetadataRewriters.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/DenseSet.h"
19 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
20 #include "llvm/Support/BinaryStreamWriter.h"
21 #include "llvm/Support/CommandLine.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/Errc.h"
25 #define DEBUG_TYPE "bolt-linux"
27 using namespace llvm;
28 using namespace bolt;
30 namespace opts {
32 static cl::opt<bool>
33 AltInstHasPadLen("alt-inst-has-padlen",
34 cl::desc("specify that .altinstructions has padlen field"),
35 cl::init(false), cl::Hidden, cl::cat(BoltCategory));
37 static cl::opt<uint32_t>
38 AltInstFeatureSize("alt-inst-feature-size",
39 cl::desc("size of feature field in .altinstructions"),
40 cl::init(2), cl::Hidden, cl::cat(BoltCategory));
42 static cl::opt<bool>
43 DumpAltInstructions("dump-alt-instructions",
44 cl::desc("dump Linux alternative instructions info"),
45 cl::init(false), cl::Hidden, cl::cat(BoltCategory));
47 static cl::opt<bool>
48 DumpExceptions("dump-linux-exceptions",
49 cl::desc("dump Linux kernel exception table"),
50 cl::init(false), cl::Hidden, cl::cat(BoltCategory));
52 static cl::opt<bool>
53 DumpORC("dump-orc", cl::desc("dump raw ORC unwind information (sorted)"),
54 cl::init(false), cl::Hidden, cl::cat(BoltCategory));
56 static cl::opt<bool> DumpParavirtualPatchSites(
57 "dump-para-sites", cl::desc("dump Linux kernel paravitual patch sites"),
58 cl::init(false), cl::Hidden, cl::cat(BoltCategory));
60 static cl::opt<bool>
61 DumpPCIFixups("dump-pci-fixups",
62 cl::desc("dump Linux kernel PCI fixup table"),
63 cl::init(false), cl::Hidden, cl::cat(BoltCategory));
65 static cl::opt<bool> DumpSMPLocks("dump-smp-locks",
66 cl::desc("dump Linux kernel SMP locks"),
67 cl::init(false), cl::Hidden,
68 cl::cat(BoltCategory));
70 static cl::opt<bool> DumpStaticCalls("dump-static-calls",
71 cl::desc("dump Linux kernel static calls"),
72 cl::init(false), cl::Hidden,
73 cl::cat(BoltCategory));
75 static cl::opt<bool>
76 DumpStaticKeys("dump-static-keys",
77 cl::desc("dump Linux kernel static keys jump table"),
78 cl::init(false), cl::Hidden, cl::cat(BoltCategory));
80 static cl::opt<bool> LongJumpLabels(
81 "long-jump-labels",
82 cl::desc("always use long jumps/nops for Linux kernel static keys"),
83 cl::init(false), cl::Hidden, cl::cat(BoltCategory));
85 static cl::opt<bool>
86 PrintORC("print-orc",
87 cl::desc("print ORC unwind information for instructions"),
88 cl::init(true), cl::Hidden, cl::cat(BoltCategory));
90 } // namespace opts
92 /// Linux Kernel supports stack unwinding using ORC (oops rewind capability).
93 /// ORC state at every IP can be described by the following data structure.
94 struct ORCState {
95 int16_t SPOffset;
96 int16_t BPOffset;
97 int16_t Info;
99 bool operator==(const ORCState &Other) const {
100 return SPOffset == Other.SPOffset && BPOffset == Other.BPOffset &&
101 Info == Other.Info;
104 bool operator!=(const ORCState &Other) const { return !(*this == Other); }
107 /// Section terminator ORC entry.
108 static ORCState NullORC = {0, 0, 0};
110 /// Basic printer for ORC entry. It does not provide the same level of
111 /// information as objtool (for now).
112 inline raw_ostream &operator<<(raw_ostream &OS, const ORCState &E) {
113 if (!opts::PrintORC)
114 return OS;
115 if (E != NullORC)
116 OS << format("{sp: %d, bp: %d, info: 0x%x}", E.SPOffset, E.BPOffset,
117 E.Info);
118 else
119 OS << "{terminator}";
121 return OS;
124 namespace {
126 class LinuxKernelRewriter final : public MetadataRewriter {
127 /// Information required for updating metadata referencing an instruction.
128 struct InstructionFixup {
129 BinarySection &Section; // Section referencing the instruction.
130 uint64_t Offset; // Offset in the section above.
131 BinaryFunction &BF; // Function containing the instruction.
132 MCSymbol &Label; // Label marking the instruction.
133 bool IsPCRelative; // If the reference type is relative.
135 std::vector<InstructionFixup> Fixups;
137 /// Size of an entry in .smp_locks section.
138 static constexpr size_t SMP_LOCKS_ENTRY_SIZE = 4;
140 /// Linux ORC sections.
141 ErrorOr<BinarySection &> ORCUnwindSection = std::errc::bad_address;
142 ErrorOr<BinarySection &> ORCUnwindIPSection = std::errc::bad_address;
144 /// Size of entries in ORC sections.
145 static constexpr size_t ORC_UNWIND_ENTRY_SIZE = 6;
146 static constexpr size_t ORC_UNWIND_IP_ENTRY_SIZE = 4;
148 struct ORCListEntry {
149 uint64_t IP; /// Instruction address.
150 BinaryFunction *BF; /// Binary function corresponding to the entry.
151 ORCState ORC; /// Stack unwind info in ORC format.
153 /// ORC entries are sorted by their IPs. Terminator entries (NullORC)
154 /// should precede other entries with the same address.
155 bool operator<(const ORCListEntry &Other) const {
156 if (IP < Other.IP)
157 return 1;
158 if (IP > Other.IP)
159 return 0;
160 return ORC == NullORC && Other.ORC != NullORC;
164 using ORCListType = std::vector<ORCListEntry>;
165 ORCListType ORCEntries;
167 /// Number of entries in the input file ORC sections.
168 uint64_t NumORCEntries = 0;
170 /// Section containing static keys jump table.
171 ErrorOr<BinarySection &> StaticKeysJumpSection = std::errc::bad_address;
172 uint64_t StaticKeysJumpTableAddress = 0;
173 static constexpr size_t STATIC_KEYS_JUMP_ENTRY_SIZE = 8;
175 struct JumpInfoEntry {
176 bool Likely;
177 bool InitValue;
179 SmallVector<JumpInfoEntry, 16> JumpInfo;
181 /// Static key entries that need nop conversion.
182 DenseSet<uint32_t> NopIDs;
184 /// Section containing static call table.
185 ErrorOr<BinarySection &> StaticCallSection = std::errc::bad_address;
186 uint64_t StaticCallTableAddress = 0;
187 static constexpr size_t STATIC_CALL_ENTRY_SIZE = 8;
189 struct StaticCallInfo {
190 uint32_t ID; /// Identifier of the entry in the table.
191 BinaryFunction *Function; /// Function containing associated call.
192 MCSymbol *Label; /// Label attached to the call.
194 using StaticCallListType = std::vector<StaticCallInfo>;
195 StaticCallListType StaticCallEntries;
197 /// Section containing the Linux exception table.
198 ErrorOr<BinarySection &> ExceptionsSection = std::errc::bad_address;
199 static constexpr size_t EXCEPTION_TABLE_ENTRY_SIZE = 12;
201 /// Functions with exception handling code.
202 DenseSet<BinaryFunction *> FunctionsWithExceptions;
204 /// Section with paravirtual patch sites.
205 ErrorOr<BinarySection &> ParavirtualPatchSection = std::errc::bad_address;
207 /// Alignment of paravirtual patch structures.
208 static constexpr size_t PARA_PATCH_ALIGN = 8;
210 /// .altinstructions section.
211 ErrorOr<BinarySection &> AltInstrSection = std::errc::bad_address;
213 /// Section containing Linux bug table.
214 ErrorOr<BinarySection &> BugTableSection = std::errc::bad_address;
216 /// Size of bug_entry struct.
217 static constexpr size_t BUG_TABLE_ENTRY_SIZE = 12;
219 /// List of bug entries per function.
220 using FunctionBugListType =
221 DenseMap<BinaryFunction *, SmallVector<uint32_t, 2>>;
222 FunctionBugListType FunctionBugList;
224 /// .pci_fixup section.
225 ErrorOr<BinarySection &> PCIFixupSection = std::errc::bad_address;
226 static constexpr size_t PCI_FIXUP_ENTRY_SIZE = 16;
228 /// Process linux kernel special sections and their relocations.
229 void processLKSections();
231 /// Process __ksymtab and __ksymtab_gpl.
232 void processLKKSymtab(bool IsGPL = false);
234 // Create relocations in sections requiring fixups.
236 // Make sure functions that will not be emitted are marked as such before this
237 // function is executed.
238 void processInstructionFixups();
240 /// Process .smp_locks section.
241 Error processSMPLocks();
243 /// Read ORC unwind information and annotate instructions.
244 Error readORCTables();
246 /// Update ORC for functions once CFG is constructed.
247 Error processORCPostCFG();
249 /// Update ORC data in the binary.
250 Error rewriteORCTables();
252 /// Validate written ORC tables after binary emission.
253 Error validateORCTables();
255 /// Static call table handling.
256 Error readStaticCalls();
257 Error rewriteStaticCalls();
259 Error readExceptionTable();
260 Error rewriteExceptionTable();
262 /// Paravirtual instruction patch sites.
263 Error readParaInstructions();
264 Error rewriteParaInstructions();
266 /// __bug_table section handling.
267 Error readBugTable();
268 Error rewriteBugTable();
270 /// Do no process functions containing instruction annotated with
271 /// \p Annotation.
272 void skipFunctionsWithAnnotation(StringRef Annotation) const;
274 /// Handle alternative instruction info from .altinstructions.
275 Error readAltInstructions();
276 void processAltInstructionsPostCFG();
277 Error tryReadAltInstructions(uint32_t AltInstFeatureSize,
278 bool AltInstHasPadLen, bool ParseOnly);
280 /// Read .pci_fixup
281 Error readPCIFixupTable();
283 /// Handle static keys jump table.
284 Error readStaticKeysJumpTable();
285 Error rewriteStaticKeysJumpTable();
286 Error updateStaticKeysJumpTablePostEmit();
288 public:
289 LinuxKernelRewriter(BinaryContext &BC)
290 : MetadataRewriter("linux-kernel-rewriter", BC) {}
292 Error preCFGInitializer() override {
293 processLKSections();
295 if (Error E = processSMPLocks())
296 return E;
298 if (Error E = readStaticCalls())
299 return E;
301 if (Error E = readExceptionTable())
302 return E;
304 if (Error E = readParaInstructions())
305 return E;
307 if (Error E = readBugTable())
308 return E;
310 if (Error E = readAltInstructions())
311 return E;
313 // Some ORC entries could be linked to alternative instruction
314 // sequences. Hence, we read ORC after .altinstructions.
315 if (Error E = readORCTables())
316 return E;
318 if (Error E = readPCIFixupTable())
319 return E;
321 if (Error E = readStaticKeysJumpTable())
322 return E;
324 return Error::success();
327 Error postCFGInitializer() override {
328 if (Error E = processORCPostCFG())
329 return E;
331 processAltInstructionsPostCFG();
333 return Error::success();
336 Error preEmitFinalizer() override {
337 // Since rewriteExceptionTable() can mark functions as non-simple, run it
338 // before other rewriters that depend on simple/emit status.
339 if (Error E = rewriteExceptionTable())
340 return E;
342 if (Error E = rewriteParaInstructions())
343 return E;
345 if (Error E = rewriteORCTables())
346 return E;
348 if (Error E = rewriteStaticCalls())
349 return E;
351 if (Error E = rewriteStaticKeysJumpTable())
352 return E;
354 if (Error E = rewriteBugTable())
355 return E;
357 processInstructionFixups();
359 return Error::success();
362 Error postEmitFinalizer() override {
363 if (Error E = updateStaticKeysJumpTablePostEmit())
364 return E;
366 if (Error E = validateORCTables())
367 return E;
369 return Error::success();
373 void LinuxKernelRewriter::processLKSections() {
374 processLKKSymtab();
375 processLKKSymtab(true);
378 /// Process __ksymtab[_gpl] sections of Linux Kernel.
379 /// This section lists all the vmlinux symbols that kernel modules can access.
381 /// All the entries are 4 bytes each and hence we can read them by one by one
382 /// and ignore the ones that are not pointing to the .text section. All pointers
383 /// are PC relative offsets. Always, points to the beginning of the function.
384 void LinuxKernelRewriter::processLKKSymtab(bool IsGPL) {
385 StringRef SectionName = "__ksymtab";
386 if (IsGPL)
387 SectionName = "__ksymtab_gpl";
388 ErrorOr<BinarySection &> SectionOrError =
389 BC.getUniqueSectionByName(SectionName);
390 assert(SectionOrError &&
391 "__ksymtab[_gpl] section not found in Linux Kernel binary");
392 const uint64_t SectionSize = SectionOrError->getSize();
393 const uint64_t SectionAddress = SectionOrError->getAddress();
394 assert((SectionSize % 4) == 0 &&
395 "The size of the __ksymtab[_gpl] section should be a multiple of 4");
397 for (uint64_t I = 0; I < SectionSize; I += 4) {
398 const uint64_t EntryAddress = SectionAddress + I;
399 ErrorOr<int64_t> Offset = BC.getSignedValueAtAddress(EntryAddress, 4);
400 assert(Offset && "Reading valid PC-relative offset for a ksymtab entry");
401 const int32_t SignedOffset = *Offset;
402 const uint64_t RefAddress = EntryAddress + SignedOffset;
403 BinaryFunction *BF = BC.getBinaryFunctionAtAddress(RefAddress);
404 if (!BF)
405 continue;
407 BC.addRelocation(EntryAddress, BF->getSymbol(), Relocation::getPC32(), 0,
408 *Offset);
412 /// .smp_locks section contains PC-relative references to instructions with LOCK
413 /// prefix. The prefix can be converted to NOP at boot time on non-SMP systems.
414 Error LinuxKernelRewriter::processSMPLocks() {
415 ErrorOr<BinarySection &> SMPLocksSection =
416 BC.getUniqueSectionByName(".smp_locks");
417 if (!SMPLocksSection)
418 return Error::success();
420 const uint64_t SectionSize = SMPLocksSection->getSize();
421 const uint64_t SectionAddress = SMPLocksSection->getAddress();
422 if (SectionSize % SMP_LOCKS_ENTRY_SIZE)
423 return createStringError(errc::executable_format_error,
424 "bad size of .smp_locks section");
426 DataExtractor DE = DataExtractor(SMPLocksSection->getContents(),
427 BC.AsmInfo->isLittleEndian(),
428 BC.AsmInfo->getCodePointerSize());
429 DataExtractor::Cursor Cursor(0);
430 while (Cursor && Cursor.tell() < SectionSize) {
431 const uint64_t Offset = Cursor.tell();
432 const uint64_t IP = SectionAddress + Offset + (int32_t)DE.getU32(Cursor);
434 // Consume the status of the cursor.
435 if (!Cursor)
436 return createStringError(errc::executable_format_error,
437 "error while reading .smp_locks: %s",
438 toString(Cursor.takeError()).c_str());
440 if (opts::DumpSMPLocks)
441 BC.outs() << "SMP lock at 0x: " << Twine::utohexstr(IP) << '\n';
443 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(IP);
444 if (!BF || !BC.shouldEmit(*BF))
445 continue;
447 MCInst *Inst = BF->getInstructionAtOffset(IP - BF->getAddress());
448 if (!Inst)
449 return createStringError(errc::executable_format_error,
450 "no instruction matches lock at 0x%" PRIx64, IP);
452 // Check for duplicate entries.
453 if (BC.MIB->hasAnnotation(*Inst, "SMPLock"))
454 return createStringError(errc::executable_format_error,
455 "duplicate SMP lock at 0x%" PRIx64, IP);
457 BC.MIB->addAnnotation(*Inst, "SMPLock", true);
458 MCSymbol *Label =
459 BC.MIB->getOrCreateInstLabel(*Inst, "__SMPLock_", BC.Ctx.get());
461 Fixups.push_back({*SMPLocksSection, Offset, *BF, *Label,
462 /*IsPCRelative*/ true});
465 const uint64_t NumEntries = SectionSize / SMP_LOCKS_ENTRY_SIZE;
466 BC.outs() << "BOLT-INFO: parsed " << NumEntries << " SMP lock entries\n";
468 return Error::success();
471 void LinuxKernelRewriter::processInstructionFixups() {
472 for (InstructionFixup &Fixup : Fixups) {
473 if (!BC.shouldEmit(Fixup.BF))
474 continue;
476 Fixup.Section.addRelocation(Fixup.Offset, &Fixup.Label,
477 Fixup.IsPCRelative ? ELF::R_X86_64_PC32
478 : ELF::R_X86_64_64,
479 /*Addend*/ 0);
483 Error LinuxKernelRewriter::readORCTables() {
484 // NOTE: we should ignore relocations for orc tables as the tables are sorted
485 // post-link time and relocations are not updated.
486 ORCUnwindSection = BC.getUniqueSectionByName(".orc_unwind");
487 ORCUnwindIPSection = BC.getUniqueSectionByName(".orc_unwind_ip");
489 if (!ORCUnwindSection && !ORCUnwindIPSection)
490 return Error::success();
492 if (!ORCUnwindSection || !ORCUnwindIPSection)
493 return createStringError(errc::executable_format_error,
494 "missing ORC section");
496 NumORCEntries = ORCUnwindIPSection->getSize() / ORC_UNWIND_IP_ENTRY_SIZE;
497 if (ORCUnwindSection->getSize() != NumORCEntries * ORC_UNWIND_ENTRY_SIZE ||
498 ORCUnwindIPSection->getSize() != NumORCEntries * ORC_UNWIND_IP_ENTRY_SIZE)
499 return createStringError(errc::executable_format_error,
500 "ORC entries number mismatch detected");
502 const uint64_t IPSectionAddress = ORCUnwindIPSection->getAddress();
503 DataExtractor OrcDE = DataExtractor(ORCUnwindSection->getContents(),
504 BC.AsmInfo->isLittleEndian(),
505 BC.AsmInfo->getCodePointerSize());
506 DataExtractor IPDE = DataExtractor(ORCUnwindIPSection->getContents(),
507 BC.AsmInfo->isLittleEndian(),
508 BC.AsmInfo->getCodePointerSize());
509 DataExtractor::Cursor ORCCursor(0);
510 DataExtractor::Cursor IPCursor(0);
511 uint64_t PrevIP = 0;
512 for (uint32_t Index = 0; Index < NumORCEntries; ++Index) {
513 const uint64_t IP =
514 IPSectionAddress + IPCursor.tell() + (int32_t)IPDE.getU32(IPCursor);
516 // Consume the status of the cursor.
517 if (!IPCursor)
518 return createStringError(errc::executable_format_error,
519 "out of bounds while reading ORC IP table: %s",
520 toString(IPCursor.takeError()).c_str());
522 if (IP < PrevIP && opts::Verbosity)
523 BC.errs() << "BOLT-WARNING: out of order IP 0x" << Twine::utohexstr(IP)
524 << " detected while reading ORC\n";
526 PrevIP = IP;
528 // Store all entries, includes those we are not going to update as the
529 // tables need to be sorted globally before being written out.
530 ORCEntries.push_back(ORCListEntry());
531 ORCListEntry &Entry = ORCEntries.back();
533 Entry.IP = IP;
534 Entry.ORC.SPOffset = (int16_t)OrcDE.getU16(ORCCursor);
535 Entry.ORC.BPOffset = (int16_t)OrcDE.getU16(ORCCursor);
536 Entry.ORC.Info = (int16_t)OrcDE.getU16(ORCCursor);
537 Entry.BF = nullptr;
539 // Consume the status of the cursor.
540 if (!ORCCursor)
541 return createStringError(errc::executable_format_error,
542 "out of bounds while reading ORC: %s",
543 toString(ORCCursor.takeError()).c_str());
545 if (Entry.ORC == NullORC)
546 continue;
548 BinaryFunction *&BF = Entry.BF;
549 BF = BC.getBinaryFunctionContainingAddress(IP, /*CheckPastEnd*/ true);
551 // If the entry immediately pointing past the end of the function is not
552 // the terminator entry, then it does not belong to this function.
553 if (BF && BF->getAddress() + BF->getSize() == IP)
554 BF = 0;
556 if (!BF) {
557 if (opts::Verbosity)
558 BC.errs() << "BOLT-WARNING: no binary function found matching ORC 0x"
559 << Twine::utohexstr(IP) << ": " << Entry.ORC << '\n';
560 continue;
563 BF->setHasORC(true);
565 if (!BF->hasInstructions())
566 continue;
568 const uint64_t Offset = IP - BF->getAddress();
569 MCInst *Inst = BF->getInstructionAtOffset(Offset);
570 if (!Inst) {
571 // Check if there is an alternative instruction(s) at this IP. Multiple
572 // alternative instructions can take a place of a single original
573 // instruction and each alternative can have a separate ORC entry.
574 // Since ORC table is shared between all alternative sequences, there's
575 // a requirement that only one (out of many) sequences can have an
576 // instruction from the ORC table to avoid ambiguities/conflicts.
578 // For now, we have limited support for alternatives. I.e. we still print
579 // functions with them, but will not change the code in the output binary.
580 // As such, we can ignore alternative ORC entries. They will be preserved
581 // in the binary, but will not get printed in the instruction stream.
582 Inst = BF->getInstructionContainingOffset(Offset);
583 if (Inst || BC.MIB->hasAnnotation(*Inst, "AltInst"))
584 continue;
586 return createStringError(
587 errc::executable_format_error,
588 "no instruction at address 0x%" PRIx64 " in .orc_unwind_ip", IP);
591 // Some addresses will have two entries associated with them. The first
592 // one being a "weak" section terminator. Since we ignore the terminator,
593 // we should only assign one entry per instruction.
594 if (BC.MIB->hasAnnotation(*Inst, "ORC"))
595 return createStringError(
596 errc::executable_format_error,
597 "duplicate non-terminal ORC IP 0x%" PRIx64 " in .orc_unwind_ip", IP);
599 BC.MIB->addAnnotation(*Inst, "ORC", Entry.ORC);
602 BC.outs() << "BOLT-INFO: parsed " << NumORCEntries << " ORC entries\n";
604 if (opts::DumpORC) {
605 BC.outs() << "BOLT-INFO: ORC unwind information:\n";
606 for (const ORCListEntry &E : ORCEntries) {
607 BC.outs() << "0x" << Twine::utohexstr(E.IP) << ": " << E.ORC;
608 if (E.BF)
609 BC.outs() << ": " << *E.BF;
610 BC.outs() << '\n';
614 // Add entries for functions that don't have explicit ORC info at the start.
615 // We'll have the correct info for them even if ORC for the preceding function
616 // changes.
617 ORCListType NewEntries;
618 for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) {
619 auto It = llvm::partition_point(ORCEntries, [&](const ORCListEntry &E) {
620 return E.IP <= BF.getAddress();
622 if (It != ORCEntries.begin())
623 --It;
625 if (It->BF == &BF)
626 continue;
628 if (It->ORC == NullORC && It->IP == BF.getAddress()) {
629 assert(!It->BF);
630 It->BF = &BF;
631 continue;
634 NewEntries.push_back({BF.getAddress(), &BF, It->ORC});
635 if (It->ORC != NullORC)
636 BF.setHasORC(true);
639 llvm::copy(NewEntries, std::back_inserter(ORCEntries));
640 llvm::sort(ORCEntries);
642 if (opts::DumpORC) {
643 BC.outs() << "BOLT-INFO: amended ORC unwind information:\n";
644 for (const ORCListEntry &E : ORCEntries) {
645 BC.outs() << "0x" << Twine::utohexstr(E.IP) << ": " << E.ORC;
646 if (E.BF)
647 BC.outs() << ": " << *E.BF;
648 BC.outs() << '\n';
652 return Error::success();
655 Error LinuxKernelRewriter::processORCPostCFG() {
656 if (!NumORCEntries)
657 return Error::success();
659 // Propagate ORC to the rest of the function. We can annotate every
660 // instruction in every function, but to minimize the overhead, we annotate
661 // the first instruction in every basic block to reflect the state at the
662 // entry. This way, the ORC state can be calculated based on annotations
663 // regardless of the basic block layout. Note that if we insert/delete
664 // instructions, we must take care to attach ORC info to the new/deleted ones.
665 for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) {
667 std::optional<ORCState> CurrentState;
668 for (BinaryBasicBlock &BB : BF) {
669 for (MCInst &Inst : BB) {
670 ErrorOr<ORCState> State =
671 BC.MIB->tryGetAnnotationAs<ORCState>(Inst, "ORC");
673 if (State) {
674 CurrentState = *State;
675 continue;
678 // Get state for the start of the function.
679 if (!CurrentState) {
680 // A terminator entry (NullORC) can match the function address. If
681 // there's also a non-terminator entry, it will be placed after the
682 // terminator. Hence, we are looking for the last ORC entry that
683 // matches the address.
684 auto It =
685 llvm::partition_point(ORCEntries, [&](const ORCListEntry &E) {
686 return E.IP <= BF.getAddress();
688 if (It != ORCEntries.begin())
689 --It;
691 assert(It->IP == BF.getAddress() && (!It->BF || It->BF == &BF) &&
692 "ORC info at function entry expected.");
694 if (It->ORC == NullORC && BF.hasORC()) {
695 BC.errs() << "BOLT-WARNING: ORC unwind info excludes prologue for "
696 << BF << '\n';
699 It->BF = &BF;
701 CurrentState = It->ORC;
702 if (It->ORC != NullORC)
703 BF.setHasORC(true);
706 // While printing ORC, attach info to every instruction for convenience.
707 if (opts::PrintORC || &Inst == &BB.front())
708 BC.MIB->addAnnotation(Inst, "ORC", *CurrentState);
713 return Error::success();
716 Error LinuxKernelRewriter::rewriteORCTables() {
717 if (!NumORCEntries)
718 return Error::success();
720 // Update ORC sections in-place. As we change the code, the number of ORC
721 // entries may increase for some functions. However, as we remove terminator
722 // redundancy (see below), more space is freed up and we should always be able
723 // to fit new ORC tables in the reserved space.
724 auto createInPlaceWriter = [&](BinarySection &Section) -> BinaryStreamWriter {
725 const size_t Size = Section.getSize();
726 uint8_t *NewContents = new uint8_t[Size];
727 Section.updateContents(NewContents, Size);
728 Section.setOutputFileOffset(Section.getInputFileOffset());
729 return BinaryStreamWriter({NewContents, Size}, BC.AsmInfo->isLittleEndian()
730 ? endianness::little
731 : endianness::big);
733 BinaryStreamWriter UnwindWriter = createInPlaceWriter(*ORCUnwindSection);
734 BinaryStreamWriter UnwindIPWriter = createInPlaceWriter(*ORCUnwindIPSection);
736 uint64_t NumEmitted = 0;
737 std::optional<ORCState> LastEmittedORC;
738 auto emitORCEntry = [&](const uint64_t IP, const ORCState &ORC,
739 MCSymbol *Label = 0, bool Force = false) -> Error {
740 if (LastEmittedORC && ORC == *LastEmittedORC && !Force)
741 return Error::success();
743 LastEmittedORC = ORC;
745 if (++NumEmitted > NumORCEntries)
746 return createStringError(errc::executable_format_error,
747 "exceeded the number of allocated ORC entries");
749 if (Label)
750 ORCUnwindIPSection->addRelocation(UnwindIPWriter.getOffset(), Label,
751 Relocation::getPC32(), /*Addend*/ 0);
753 const int32_t IPValue =
754 IP - ORCUnwindIPSection->getAddress() - UnwindIPWriter.getOffset();
755 if (Error E = UnwindIPWriter.writeInteger(IPValue))
756 return E;
758 if (Error E = UnwindWriter.writeInteger(ORC.SPOffset))
759 return E;
760 if (Error E = UnwindWriter.writeInteger(ORC.BPOffset))
761 return E;
762 if (Error E = UnwindWriter.writeInteger(ORC.Info))
763 return E;
765 return Error::success();
768 // Emit new ORC entries for the emitted function.
769 auto emitORC = [&](const FunctionFragment &FF) -> Error {
770 ORCState CurrentState = NullORC;
771 for (BinaryBasicBlock *BB : FF) {
772 for (MCInst &Inst : *BB) {
773 ErrorOr<ORCState> ErrorOrState =
774 BC.MIB->tryGetAnnotationAs<ORCState>(Inst, "ORC");
775 if (!ErrorOrState || *ErrorOrState == CurrentState)
776 continue;
778 // Issue label for the instruction.
779 MCSymbol *Label =
780 BC.MIB->getOrCreateInstLabel(Inst, "__ORC_", BC.Ctx.get());
782 if (Error E = emitORCEntry(0, *ErrorOrState, Label))
783 return E;
785 CurrentState = *ErrorOrState;
789 return Error::success();
792 // Emit ORC entries for cold fragments. We assume that these fragments are
793 // emitted contiguously in memory using reserved space in the kernel. This
794 // assumption is validated in post-emit pass validateORCTables() where we
795 // check that ORC entries are sorted by their addresses.
796 auto emitColdORC = [&]() -> Error {
797 for (BinaryFunction &BF :
798 llvm::make_second_range(BC.getBinaryFunctions())) {
799 if (!BC.shouldEmit(BF))
800 continue;
801 for (FunctionFragment &FF : BF.getLayout().getSplitFragments())
802 if (Error E = emitORC(FF))
803 return E;
806 return Error::success();
809 bool ShouldEmitCold = !BC.BOLTReserved.empty();
810 for (ORCListEntry &Entry : ORCEntries) {
811 if (ShouldEmitCold && Entry.IP > BC.BOLTReserved.start()) {
812 if (Error E = emitColdORC())
813 return E;
815 // Emit terminator entry at the end of the reserved region.
816 if (Error E = emitORCEntry(BC.BOLTReserved.end(), NullORC))
817 return E;
819 ShouldEmitCold = false;
822 // Emit original entries for functions that we haven't modified.
823 if (!Entry.BF || !BC.shouldEmit(*Entry.BF)) {
824 // Emit terminator only if it marks the start of a function.
825 if (Entry.ORC == NullORC && !Entry.BF)
826 continue;
827 if (Error E = emitORCEntry(Entry.IP, Entry.ORC))
828 return E;
829 continue;
832 // Emit all ORC entries for a function referenced by an entry and skip over
833 // the rest of entries for this function by resetting its ORC attribute.
834 if (Entry.BF->hasORC()) {
835 if (Error E = emitORC(Entry.BF->getLayout().getMainFragment()))
836 return E;
837 Entry.BF->setHasORC(false);
841 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitted " << NumEmitted
842 << " ORC entries\n");
844 // Populate ORC tables with a terminator entry with max address to match the
845 // original table sizes.
846 const uint64_t LastIP = std::numeric_limits<uint64_t>::max();
847 while (UnwindWriter.bytesRemaining()) {
848 if (Error E = emitORCEntry(LastIP, NullORC, nullptr, /*Force*/ true))
849 return E;
852 return Error::success();
855 Error LinuxKernelRewriter::validateORCTables() {
856 if (!ORCUnwindIPSection)
857 return Error::success();
859 const uint64_t IPSectionAddress = ORCUnwindIPSection->getAddress();
860 DataExtractor IPDE = DataExtractor(ORCUnwindIPSection->getOutputContents(),
861 BC.AsmInfo->isLittleEndian(),
862 BC.AsmInfo->getCodePointerSize());
863 DataExtractor::Cursor IPCursor(0);
864 uint64_t PrevIP = 0;
865 for (uint32_t Index = 0; Index < NumORCEntries; ++Index) {
866 const uint64_t IP =
867 IPSectionAddress + IPCursor.tell() + (int32_t)IPDE.getU32(IPCursor);
868 if (!IPCursor)
869 return createStringError(errc::executable_format_error,
870 "out of bounds while reading ORC IP table: %s",
871 toString(IPCursor.takeError()).c_str());
873 assert(IP >= PrevIP && "Unsorted ORC table detected");
874 (void)PrevIP;
875 PrevIP = IP;
878 return Error::success();
881 /// The static call site table is created by objtool and contains entries in the
882 /// following format:
884 /// struct static_call_site {
885 /// s32 addr;
886 /// s32 key;
887 /// };
889 Error LinuxKernelRewriter::readStaticCalls() {
890 const BinaryData *StaticCallTable =
891 BC.getBinaryDataByName("__start_static_call_sites");
892 if (!StaticCallTable)
893 return Error::success();
895 StaticCallTableAddress = StaticCallTable->getAddress();
897 const BinaryData *Stop = BC.getBinaryDataByName("__stop_static_call_sites");
898 if (!Stop)
899 return createStringError(errc::executable_format_error,
900 "missing __stop_static_call_sites symbol");
902 ErrorOr<BinarySection &> ErrorOrSection =
903 BC.getSectionForAddress(StaticCallTableAddress);
904 if (!ErrorOrSection)
905 return createStringError(errc::executable_format_error,
906 "no section matching __start_static_call_sites");
908 StaticCallSection = *ErrorOrSection;
909 if (!StaticCallSection->containsAddress(Stop->getAddress() - 1))
910 return createStringError(errc::executable_format_error,
911 "__stop_static_call_sites not in the same section "
912 "as __start_static_call_sites");
914 if ((Stop->getAddress() - StaticCallTableAddress) % STATIC_CALL_ENTRY_SIZE)
915 return createStringError(errc::executable_format_error,
916 "static call table size error");
918 const uint64_t SectionAddress = StaticCallSection->getAddress();
919 DataExtractor DE(StaticCallSection->getContents(),
920 BC.AsmInfo->isLittleEndian(),
921 BC.AsmInfo->getCodePointerSize());
922 DataExtractor::Cursor Cursor(StaticCallTableAddress - SectionAddress);
923 uint32_t EntryID = 0;
924 while (Cursor && Cursor.tell() < Stop->getAddress() - SectionAddress) {
925 const uint64_t CallAddress =
926 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor);
927 const uint64_t KeyAddress =
928 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor);
930 // Consume the status of the cursor.
931 if (!Cursor)
932 return createStringError(errc::executable_format_error,
933 "out of bounds while reading static calls: %s",
934 toString(Cursor.takeError()).c_str());
936 ++EntryID;
938 if (opts::DumpStaticCalls) {
939 BC.outs() << "Static Call Site: " << EntryID << '\n';
940 BC.outs() << "\tCallAddress: 0x" << Twine::utohexstr(CallAddress)
941 << "\n\tKeyAddress: 0x" << Twine::utohexstr(KeyAddress)
942 << '\n';
945 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(CallAddress);
946 if (!BF)
947 continue;
949 if (!BC.shouldEmit(*BF))
950 continue;
952 if (!BF->hasInstructions())
953 continue;
955 MCInst *Inst = BF->getInstructionAtOffset(CallAddress - BF->getAddress());
956 if (!Inst)
957 return createStringError(errc::executable_format_error,
958 "no instruction at call site address 0x%" PRIx64,
959 CallAddress);
961 // Check for duplicate entries.
962 if (BC.MIB->hasAnnotation(*Inst, "StaticCall"))
963 return createStringError(errc::executable_format_error,
964 "duplicate static call site at 0x%" PRIx64,
965 CallAddress);
967 BC.MIB->addAnnotation(*Inst, "StaticCall", EntryID);
969 MCSymbol *Label =
970 BC.MIB->getOrCreateInstLabel(*Inst, "__SC_", BC.Ctx.get());
972 StaticCallEntries.push_back({EntryID, BF, Label});
975 BC.outs() << "BOLT-INFO: parsed " << StaticCallEntries.size()
976 << " static call entries\n";
978 return Error::success();
981 /// The static call table is sorted during boot time in
982 /// static_call_sort_entries(). This makes it possible to update existing
983 /// entries in-place ignoring their relative order.
984 Error LinuxKernelRewriter::rewriteStaticCalls() {
985 if (!StaticCallTableAddress || !StaticCallSection)
986 return Error::success();
988 for (auto &Entry : StaticCallEntries) {
989 if (!Entry.Function)
990 continue;
992 BinaryFunction &BF = *Entry.Function;
993 if (!BC.shouldEmit(BF))
994 continue;
996 // Create a relocation against the label.
997 const uint64_t EntryOffset = StaticCallTableAddress -
998 StaticCallSection->getAddress() +
999 (Entry.ID - 1) * STATIC_CALL_ENTRY_SIZE;
1000 StaticCallSection->addRelocation(EntryOffset, Entry.Label,
1001 ELF::R_X86_64_PC32, /*Addend*/ 0);
1004 return Error::success();
1007 /// Instructions that access user-space memory can cause page faults. These
1008 /// faults will be handled by the kernel and execution will resume at the fixup
1009 /// code location if the address was invalid. The kernel uses the exception
1010 /// table to match the faulting instruction to its fixup. The table consists of
1011 /// the following entries:
1013 /// struct exception_table_entry {
1014 /// int insn;
1015 /// int fixup;
1016 /// int data;
1017 /// };
1019 /// More info at:
1020 /// https://www.kernel.org/doc/Documentation/x86/exception-tables.txt
1021 Error LinuxKernelRewriter::readExceptionTable() {
1022 ExceptionsSection = BC.getUniqueSectionByName("__ex_table");
1023 if (!ExceptionsSection)
1024 return Error::success();
1026 if (ExceptionsSection->getSize() % EXCEPTION_TABLE_ENTRY_SIZE)
1027 return createStringError(errc::executable_format_error,
1028 "exception table size error");
1030 const uint64_t SectionAddress = ExceptionsSection->getAddress();
1031 DataExtractor DE(ExceptionsSection->getContents(),
1032 BC.AsmInfo->isLittleEndian(),
1033 BC.AsmInfo->getCodePointerSize());
1034 DataExtractor::Cursor Cursor(0);
1035 uint32_t EntryID = 0;
1036 while (Cursor && Cursor.tell() < ExceptionsSection->getSize()) {
1037 const uint64_t InstAddress =
1038 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor);
1039 const uint64_t FixupAddress =
1040 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor);
1041 const uint64_t Data = DE.getU32(Cursor);
1043 // Consume the status of the cursor.
1044 if (!Cursor)
1045 return createStringError(
1046 errc::executable_format_error,
1047 "out of bounds while reading exception table: %s",
1048 toString(Cursor.takeError()).c_str());
1050 ++EntryID;
1052 if (opts::DumpExceptions) {
1053 BC.outs() << "Exception Entry: " << EntryID << '\n';
1054 BC.outs() << "\tInsn: 0x" << Twine::utohexstr(InstAddress) << '\n'
1055 << "\tFixup: 0x" << Twine::utohexstr(FixupAddress) << '\n'
1056 << "\tData: 0x" << Twine::utohexstr(Data) << '\n';
1059 MCInst *Inst = nullptr;
1060 MCSymbol *FixupLabel = nullptr;
1062 BinaryFunction *InstBF = BC.getBinaryFunctionContainingAddress(InstAddress);
1063 if (InstBF && BC.shouldEmit(*InstBF)) {
1064 Inst = InstBF->getInstructionAtOffset(InstAddress - InstBF->getAddress());
1065 if (!Inst)
1066 return createStringError(errc::executable_format_error,
1067 "no instruction at address 0x%" PRIx64
1068 " in exception table",
1069 InstAddress);
1070 BC.MIB->addAnnotation(*Inst, "ExceptionEntry", EntryID);
1071 FunctionsWithExceptions.insert(InstBF);
1074 if (!InstBF && opts::Verbosity) {
1075 BC.outs() << "BOLT-INFO: no function matches instruction at 0x"
1076 << Twine::utohexstr(InstAddress)
1077 << " referenced by Linux exception table\n";
1080 BinaryFunction *FixupBF =
1081 BC.getBinaryFunctionContainingAddress(FixupAddress);
1082 if (FixupBF && BC.shouldEmit(*FixupBF)) {
1083 const uint64_t Offset = FixupAddress - FixupBF->getAddress();
1084 if (!FixupBF->getInstructionAtOffset(Offset))
1085 return createStringError(errc::executable_format_error,
1086 "no instruction at fixup address 0x%" PRIx64
1087 " in exception table",
1088 FixupAddress);
1089 FixupLabel = Offset ? FixupBF->addEntryPointAtOffset(Offset)
1090 : FixupBF->getSymbol();
1091 if (Inst)
1092 BC.MIB->addAnnotation(*Inst, "Fixup", FixupLabel->getName());
1093 FunctionsWithExceptions.insert(FixupBF);
1096 if (!FixupBF && opts::Verbosity) {
1097 BC.outs() << "BOLT-INFO: no function matches fixup code at 0x"
1098 << Twine::utohexstr(FixupAddress)
1099 << " referenced by Linux exception table\n";
1103 BC.outs() << "BOLT-INFO: parsed "
1104 << ExceptionsSection->getSize() / EXCEPTION_TABLE_ENTRY_SIZE
1105 << " exception table entries\n";
1107 return Error::success();
1110 /// Depending on the value of CONFIG_BUILDTIME_TABLE_SORT, the kernel expects
1111 /// the exception table to be sorted. Hence we have to sort it after code
1112 /// reordering.
1113 Error LinuxKernelRewriter::rewriteExceptionTable() {
1114 // Disable output of functions with exceptions before rewrite support is
1115 // added.
1116 for (BinaryFunction *BF : FunctionsWithExceptions)
1117 BF->setSimple(false);
1119 return Error::success();
1122 /// .parainsrtuctions section contains information for patching parvirtual call
1123 /// instructions during runtime. The entries in the section are in the form:
1125 /// struct paravirt_patch_site {
1126 /// u8 *instr; /* original instructions */
1127 /// u8 type; /* type of this instruction */
1128 /// u8 len; /* length of original instruction */
1129 /// };
1131 /// Note that the structures are aligned at 8-byte boundary.
1132 Error LinuxKernelRewriter::readParaInstructions() {
1133 ParavirtualPatchSection = BC.getUniqueSectionByName(".parainstructions");
1134 if (!ParavirtualPatchSection)
1135 return Error::success();
1137 DataExtractor DE = DataExtractor(ParavirtualPatchSection->getContents(),
1138 BC.AsmInfo->isLittleEndian(),
1139 BC.AsmInfo->getCodePointerSize());
1140 uint32_t EntryID = 0;
1141 DataExtractor::Cursor Cursor(0);
1142 while (Cursor && !DE.eof(Cursor)) {
1143 const uint64_t NextOffset = alignTo(Cursor.tell(), Align(PARA_PATCH_ALIGN));
1144 if (!DE.isValidOffset(NextOffset))
1145 break;
1147 Cursor.seek(NextOffset);
1149 const uint64_t InstrLocation = DE.getU64(Cursor);
1150 const uint8_t Type = DE.getU8(Cursor);
1151 const uint8_t Len = DE.getU8(Cursor);
1153 if (!Cursor)
1154 return createStringError(
1155 errc::executable_format_error,
1156 "out of bounds while reading .parainstructions: %s",
1157 toString(Cursor.takeError()).c_str());
1159 ++EntryID;
1161 if (opts::DumpParavirtualPatchSites) {
1162 BC.outs() << "Paravirtual patch site: " << EntryID << '\n';
1163 BC.outs() << "\tInstr: 0x" << Twine::utohexstr(InstrLocation)
1164 << "\n\tType: 0x" << Twine::utohexstr(Type) << "\n\tLen: 0x"
1165 << Twine::utohexstr(Len) << '\n';
1168 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(InstrLocation);
1169 if (!BF && opts::Verbosity) {
1170 BC.outs() << "BOLT-INFO: no function matches address 0x"
1171 << Twine::utohexstr(InstrLocation)
1172 << " referenced by paravirutal patch site\n";
1175 if (BF && BC.shouldEmit(*BF)) {
1176 MCInst *Inst =
1177 BF->getInstructionAtOffset(InstrLocation - BF->getAddress());
1178 if (!Inst)
1179 return createStringError(errc::executable_format_error,
1180 "no instruction at address 0x%" PRIx64
1181 " in paravirtual call site %d",
1182 InstrLocation, EntryID);
1183 BC.MIB->addAnnotation(*Inst, "ParaSite", EntryID);
1187 BC.outs() << "BOLT-INFO: parsed " << EntryID << " paravirtual patch sites\n";
1189 return Error::success();
1192 void LinuxKernelRewriter::skipFunctionsWithAnnotation(
1193 StringRef Annotation) const {
1194 for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) {
1195 if (!BC.shouldEmit(BF))
1196 continue;
1197 for (const BinaryBasicBlock &BB : BF) {
1198 const bool HasAnnotation = llvm::any_of(BB, [&](const MCInst &Inst) {
1199 return BC.MIB->hasAnnotation(Inst, Annotation);
1201 if (HasAnnotation) {
1202 BF.setSimple(false);
1203 break;
1209 Error LinuxKernelRewriter::rewriteParaInstructions() {
1210 // Disable output of functions with paravirtual instructions before the
1211 // rewrite support is complete.
1212 skipFunctionsWithAnnotation("ParaSite");
1214 return Error::success();
1217 /// Process __bug_table section.
1218 /// This section contains information useful for kernel debugging, mostly
1219 /// utilized by WARN()/WARN_ON() macros and deprecated BUG()/BUG_ON().
1221 /// Each entry in the section is a struct bug_entry that contains a pointer to
1222 /// the ud2 instruction corresponding to the bug, corresponding file name (both
1223 /// pointers use PC relative offset addressing), line number, and flags.
1224 /// The definition of the struct bug_entry can be found in
1225 /// `include/asm-generic/bug.h`. The first entry in the struct is an instruction
1226 /// address encoded as a PC-relative offset. In theory, it could be an absolute
1227 /// address if CONFIG_GENERIC_BUG_RELATIVE_POINTERS is not set, but in practice
1228 /// the kernel code relies on it being a relative offset on x86-64.
1229 Error LinuxKernelRewriter::readBugTable() {
1230 BugTableSection = BC.getUniqueSectionByName("__bug_table");
1231 if (!BugTableSection)
1232 return Error::success();
1234 if (BugTableSection->getSize() % BUG_TABLE_ENTRY_SIZE)
1235 return createStringError(errc::executable_format_error,
1236 "bug table size error");
1238 const uint64_t SectionAddress = BugTableSection->getAddress();
1239 DataExtractor DE(BugTableSection->getContents(), BC.AsmInfo->isLittleEndian(),
1240 BC.AsmInfo->getCodePointerSize());
1241 DataExtractor::Cursor Cursor(0);
1242 uint32_t EntryID = 0;
1243 while (Cursor && Cursor.tell() < BugTableSection->getSize()) {
1244 const uint64_t Pos = Cursor.tell();
1245 const uint64_t InstAddress =
1246 SectionAddress + Pos + (int32_t)DE.getU32(Cursor);
1247 Cursor.seek(Pos + BUG_TABLE_ENTRY_SIZE);
1249 if (!Cursor)
1250 return createStringError(errc::executable_format_error,
1251 "out of bounds while reading __bug_table: %s",
1252 toString(Cursor.takeError()).c_str());
1254 ++EntryID;
1256 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(InstAddress);
1257 if (!BF && opts::Verbosity) {
1258 BC.outs() << "BOLT-INFO: no function matches address 0x"
1259 << Twine::utohexstr(InstAddress)
1260 << " referenced by bug table\n";
1263 if (BF && BC.shouldEmit(*BF)) {
1264 MCInst *Inst = BF->getInstructionAtOffset(InstAddress - BF->getAddress());
1265 if (!Inst)
1266 return createStringError(errc::executable_format_error,
1267 "no instruction at address 0x%" PRIx64
1268 " referenced by bug table entry %d",
1269 InstAddress, EntryID);
1270 BC.MIB->addAnnotation(*Inst, "BugEntry", EntryID);
1272 FunctionBugList[BF].push_back(EntryID);
1276 BC.outs() << "BOLT-INFO: parsed " << EntryID << " bug table entries\n";
1278 return Error::success();
1281 /// find_bug() uses linear search to match an address to an entry in the bug
1282 /// table. Hence, there is no need to sort entries when rewriting the table.
1283 /// When we need to erase an entry, we set its instruction address to zero.
1284 Error LinuxKernelRewriter::rewriteBugTable() {
1285 if (!BugTableSection)
1286 return Error::success();
1288 for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) {
1289 if (!BC.shouldEmit(BF))
1290 continue;
1292 if (!FunctionBugList.count(&BF))
1293 continue;
1295 // Bugs that will be emitted for this function.
1296 DenseSet<uint32_t> EmittedIDs;
1297 for (BinaryBasicBlock &BB : BF) {
1298 for (MCInst &Inst : BB) {
1299 if (!BC.MIB->hasAnnotation(Inst, "BugEntry"))
1300 continue;
1301 const uint32_t ID = BC.MIB->getAnnotationAs<uint32_t>(Inst, "BugEntry");
1302 EmittedIDs.insert(ID);
1304 // Create a relocation entry for this bug entry.
1305 MCSymbol *Label =
1306 BC.MIB->getOrCreateInstLabel(Inst, "__BUG_", BC.Ctx.get());
1307 const uint64_t EntryOffset = (ID - 1) * BUG_TABLE_ENTRY_SIZE;
1308 BugTableSection->addRelocation(EntryOffset, Label, ELF::R_X86_64_PC32,
1309 /*Addend*/ 0);
1313 // Clear bug entries that were not emitted for this function, e.g. as a
1314 // result of DCE, but setting their instruction address to zero.
1315 for (const uint32_t ID : FunctionBugList[&BF]) {
1316 if (!EmittedIDs.count(ID)) {
1317 const uint64_t EntryOffset = (ID - 1) * BUG_TABLE_ENTRY_SIZE;
1318 BugTableSection->addRelocation(EntryOffset, nullptr, ELF::R_X86_64_PC32,
1319 /*Addend*/ 0);
1324 return Error::success();
1327 /// The kernel can replace certain instruction sequences depending on hardware
1328 /// it is running on and features specified during boot time. The information
1329 /// about alternative instruction sequences is stored in .altinstructions
1330 /// section. The format of entries in this section is defined in
1331 /// arch/x86/include/asm/alternative.h:
1333 /// struct alt_instr {
1334 /// s32 instr_offset;
1335 /// s32 repl_offset;
1336 /// uXX feature;
1337 /// u8 instrlen;
1338 /// u8 replacementlen;
1339 /// u8 padlen; // present in older kernels
1340 /// } __packed;
1342 /// Note that the structure is packed.
1344 /// Since the size of the "feature" field could be either u16 or u32, and
1345 /// "padlen" presence is unknown, we attempt to parse .altinstructions section
1346 /// using all possible combinations (four at this time). Since we validate the
1347 /// contents of the section and its size, the detection works quite well.
1348 /// Still, we leave the user the opportunity to specify these features on the
1349 /// command line and skip the guesswork.
1350 Error LinuxKernelRewriter::readAltInstructions() {
1351 AltInstrSection = BC.getUniqueSectionByName(".altinstructions");
1352 if (!AltInstrSection)
1353 return Error::success();
1355 // Presence of "padlen" field.
1356 std::vector<bool> PadLenVariants;
1357 if (opts::AltInstHasPadLen.getNumOccurrences())
1358 PadLenVariants.push_back(opts::AltInstHasPadLen);
1359 else
1360 PadLenVariants = {false, true};
1362 // Size (in bytes) variants of "feature" field.
1363 std::vector<uint32_t> FeatureSizeVariants;
1364 if (opts::AltInstFeatureSize.getNumOccurrences())
1365 FeatureSizeVariants.push_back(opts::AltInstFeatureSize);
1366 else
1367 FeatureSizeVariants = {2, 4};
1369 for (bool AltInstHasPadLen : PadLenVariants) {
1370 for (uint32_t AltInstFeatureSize : FeatureSizeVariants) {
1371 LLVM_DEBUG({
1372 dbgs() << "BOLT-DEBUG: trying AltInstHasPadLen = " << AltInstHasPadLen
1373 << "; AltInstFeatureSize = " << AltInstFeatureSize << ";\n";
1375 if (Error E = tryReadAltInstructions(AltInstFeatureSize, AltInstHasPadLen,
1376 /*ParseOnly*/ true)) {
1377 consumeError(std::move(E));
1378 continue;
1381 LLVM_DEBUG(dbgs() << "Matched .altinstructions format\n");
1383 if (!opts::AltInstHasPadLen.getNumOccurrences())
1384 BC.outs() << "BOLT-INFO: setting --" << opts::AltInstHasPadLen.ArgStr
1385 << '=' << AltInstHasPadLen << '\n';
1387 if (!opts::AltInstFeatureSize.getNumOccurrences())
1388 BC.outs() << "BOLT-INFO: setting --" << opts::AltInstFeatureSize.ArgStr
1389 << '=' << AltInstFeatureSize << '\n';
1391 return tryReadAltInstructions(AltInstFeatureSize, AltInstHasPadLen,
1392 /*ParseOnly*/ false);
1396 // We couldn't match the format. Read again to properly propagate the error
1397 // to the user.
1398 return tryReadAltInstructions(opts::AltInstFeatureSize,
1399 opts::AltInstHasPadLen, /*ParseOnly*/ false);
1402 Error LinuxKernelRewriter::tryReadAltInstructions(uint32_t AltInstFeatureSize,
1403 bool AltInstHasPadLen,
1404 bool ParseOnly) {
1405 const uint64_t Address = AltInstrSection->getAddress();
1406 DataExtractor DE = DataExtractor(AltInstrSection->getContents(),
1407 BC.AsmInfo->isLittleEndian(),
1408 BC.AsmInfo->getCodePointerSize());
1409 uint64_t EntryID = 0;
1410 DataExtractor::Cursor Cursor(0);
1411 while (Cursor && !DE.eof(Cursor)) {
1412 const uint64_t OrgInstAddress =
1413 Address + Cursor.tell() + (int32_t)DE.getU32(Cursor);
1414 const uint64_t AltInstAddress =
1415 Address + Cursor.tell() + (int32_t)DE.getU32(Cursor);
1416 const uint64_t Feature = DE.getUnsigned(Cursor, AltInstFeatureSize);
1417 const uint8_t OrgSize = DE.getU8(Cursor);
1418 const uint8_t AltSize = DE.getU8(Cursor);
1420 // Older kernels may have the padlen field.
1421 const uint8_t PadLen = AltInstHasPadLen ? DE.getU8(Cursor) : 0;
1423 if (!Cursor)
1424 return createStringError(
1425 errc::executable_format_error,
1426 "out of bounds while reading .altinstructions: %s",
1427 toString(Cursor.takeError()).c_str());
1429 ++EntryID;
1431 if (opts::DumpAltInstructions) {
1432 BC.outs() << "Alternative instruction entry: " << EntryID
1433 << "\n\tOrg: 0x" << Twine::utohexstr(OrgInstAddress)
1434 << "\n\tAlt: 0x" << Twine::utohexstr(AltInstAddress)
1435 << "\n\tFeature: 0x" << Twine::utohexstr(Feature)
1436 << "\n\tOrgSize: " << (int)OrgSize
1437 << "\n\tAltSize: " << (int)AltSize << '\n';
1438 if (AltInstHasPadLen)
1439 BC.outs() << "\tPadLen: " << (int)PadLen << '\n';
1442 if (AltSize > OrgSize)
1443 return createStringError(errc::executable_format_error,
1444 "error reading .altinstructions");
1446 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(OrgInstAddress);
1447 if (!BF && opts::Verbosity) {
1448 BC.outs() << "BOLT-INFO: no function matches address 0x"
1449 << Twine::utohexstr(OrgInstAddress)
1450 << " of instruction from .altinstructions\n";
1453 BinaryFunction *AltBF =
1454 BC.getBinaryFunctionContainingAddress(AltInstAddress);
1455 if (!ParseOnly && AltBF && BC.shouldEmit(*AltBF)) {
1456 BC.errs()
1457 << "BOLT-WARNING: alternative instruction sequence found in function "
1458 << *AltBF << '\n';
1459 AltBF->setIgnored();
1462 if (!BF || !BF->hasInstructions())
1463 continue;
1465 if (OrgInstAddress + OrgSize > BF->getAddress() + BF->getSize())
1466 return createStringError(errc::executable_format_error,
1467 "error reading .altinstructions");
1469 MCInst *Inst =
1470 BF->getInstructionAtOffset(OrgInstAddress - BF->getAddress());
1471 if (!Inst)
1472 return createStringError(errc::executable_format_error,
1473 "no instruction at address 0x%" PRIx64
1474 " referenced by .altinstructions entry %d",
1475 OrgInstAddress, EntryID);
1477 if (ParseOnly)
1478 continue;
1480 // There could be more than one alternative instruction sequences for the
1481 // same original instruction. Annotate each alternative separately.
1482 std::string AnnotationName = "AltInst";
1483 unsigned N = 2;
1484 while (BC.MIB->hasAnnotation(*Inst, AnnotationName))
1485 AnnotationName = "AltInst" + std::to_string(N++);
1487 BC.MIB->addAnnotation(*Inst, AnnotationName, EntryID);
1489 // Annotate all instructions from the original sequence. Note that it's not
1490 // the most efficient way to look for instructions in the address range,
1491 // but since alternative instructions are uncommon, it will do for now.
1492 for (uint32_t Offset = 1; Offset < OrgSize; ++Offset) {
1493 Inst = BF->getInstructionAtOffset(OrgInstAddress + Offset -
1494 BF->getAddress());
1495 if (Inst)
1496 BC.MIB->addAnnotation(*Inst, AnnotationName, EntryID);
1500 if (!ParseOnly)
1501 BC.outs() << "BOLT-INFO: parsed " << EntryID
1502 << " alternative instruction entries\n";
1504 return Error::success();
1507 void LinuxKernelRewriter::processAltInstructionsPostCFG() {
1508 // Disable optimization and output of functions with alt instructions before
1509 // the rewrite support is complete. Alt instructions can modify the control
1510 // flow, hence we may end up deleting seemingly unreachable code.
1511 skipFunctionsWithAnnotation("AltInst");
1514 /// When the Linux kernel needs to handle an error associated with a given PCI
1515 /// device, it uses a table stored in .pci_fixup section to locate a fixup code
1516 /// specific to the vendor and the problematic device. The section contains a
1517 /// list of the following structures defined in include/linux/pci.h:
1519 /// struct pci_fixup {
1520 /// u16 vendor; /* Or PCI_ANY_ID */
1521 /// u16 device; /* Or PCI_ANY_ID */
1522 /// u32 class; /* Or PCI_ANY_ID */
1523 /// unsigned int class_shift; /* should be 0, 8, 16 */
1524 /// int hook_offset;
1525 /// };
1527 /// Normally, the hook will point to a function start and we don't have to
1528 /// update the pointer if we are not relocating functions. Hence, while reading
1529 /// the table we validate this assumption. If a function has a fixup code in the
1530 /// middle of its body, we issue a warning and ignore it.
1531 Error LinuxKernelRewriter::readPCIFixupTable() {
1532 PCIFixupSection = BC.getUniqueSectionByName(".pci_fixup");
1533 if (!PCIFixupSection)
1534 return Error::success();
1536 if (PCIFixupSection->getSize() % PCI_FIXUP_ENTRY_SIZE)
1537 return createStringError(errc::executable_format_error,
1538 "PCI fixup table size error");
1540 const uint64_t Address = PCIFixupSection->getAddress();
1541 DataExtractor DE = DataExtractor(PCIFixupSection->getContents(),
1542 BC.AsmInfo->isLittleEndian(),
1543 BC.AsmInfo->getCodePointerSize());
1544 uint64_t EntryID = 0;
1545 DataExtractor::Cursor Cursor(0);
1546 while (Cursor && !DE.eof(Cursor)) {
1547 const uint16_t Vendor = DE.getU16(Cursor);
1548 const uint16_t Device = DE.getU16(Cursor);
1549 const uint32_t Class = DE.getU32(Cursor);
1550 const uint32_t ClassShift = DE.getU32(Cursor);
1551 const uint64_t HookAddress =
1552 Address + Cursor.tell() + (int32_t)DE.getU32(Cursor);
1554 if (!Cursor)
1555 return createStringError(errc::executable_format_error,
1556 "out of bounds while reading .pci_fixup: %s",
1557 toString(Cursor.takeError()).c_str());
1559 ++EntryID;
1561 if (opts::DumpPCIFixups) {
1562 BC.outs() << "PCI fixup entry: " << EntryID << "\n\tVendor 0x"
1563 << Twine::utohexstr(Vendor) << "\n\tDevice: 0x"
1564 << Twine::utohexstr(Device) << "\n\tClass: 0x"
1565 << Twine::utohexstr(Class) << "\n\tClassShift: 0x"
1566 << Twine::utohexstr(ClassShift) << "\n\tHookAddress: 0x"
1567 << Twine::utohexstr(HookAddress) << '\n';
1570 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(HookAddress);
1571 if (!BF && opts::Verbosity) {
1572 BC.outs() << "BOLT-INFO: no function matches address 0x"
1573 << Twine::utohexstr(HookAddress)
1574 << " of hook from .pci_fixup\n";
1577 if (!BF || !BC.shouldEmit(*BF))
1578 continue;
1580 if (const uint64_t Offset = HookAddress - BF->getAddress()) {
1581 BC.errs() << "BOLT-WARNING: PCI fixup detected in the middle of function "
1582 << *BF << " at offset 0x" << Twine::utohexstr(Offset) << '\n';
1583 BF->setSimple(false);
1587 BC.outs() << "BOLT-INFO: parsed " << EntryID << " PCI fixup entries\n";
1589 return Error::success();
1592 /// Runtime code modification used by static keys is the most ubiquitous
1593 /// self-modifying feature of the Linux kernel. The idea is to eliminate the
1594 /// condition check and associated conditional jump on a hot path if that
1595 /// condition (based on a boolean value of a static key) does not change often.
1596 /// Whenever the condition changes, the kernel runtime modifies all code paths
1597 /// associated with that key flipping the code between nop and (unconditional)
1598 /// jump. The information about the code is stored in a static key jump table
1599 /// and contains the list of entries of the following type from
1600 /// include/linux/jump_label.h:
1602 /// struct jump_entry {
1603 /// s32 code;
1604 /// s32 target;
1605 /// long key; // key may be far away from the core kernel under KASLR
1606 /// };
1608 /// The list does not have to be stored in any sorted way, but it is sorted at
1609 /// boot time (or module initialization time) first by "key" and then by "code".
1610 /// jump_label_sort_entries() is responsible for sorting the table.
1612 /// The key in jump_entry structure uses lower two bits of the key address
1613 /// (which itself is aligned) to store extra information. We are interested in
1614 /// the lower bit which indicates if the key is likely to be set on the code
1615 /// path associated with this jump_entry.
1617 /// static_key_{enable,disable}() functions modify the code based on key and
1618 /// jump table entries.
1620 /// jump_label_update() updates all code entries for a given key. Batch mode is
1621 /// used for x86.
1623 /// The actual patching happens in text_poke_bp_batch() that overrides the first
1624 /// byte of the sequence with int3 before proceeding with actual code
1625 /// replacement.
1626 Error LinuxKernelRewriter::readStaticKeysJumpTable() {
1627 const BinaryData *StaticKeysJumpTable =
1628 BC.getBinaryDataByName("__start___jump_table");
1629 if (!StaticKeysJumpTable)
1630 return Error::success();
1632 StaticKeysJumpTableAddress = StaticKeysJumpTable->getAddress();
1634 const BinaryData *Stop = BC.getBinaryDataByName("__stop___jump_table");
1635 if (!Stop)
1636 return createStringError(errc::executable_format_error,
1637 "missing __stop___jump_table symbol");
1639 ErrorOr<BinarySection &> ErrorOrSection =
1640 BC.getSectionForAddress(StaticKeysJumpTableAddress);
1641 if (!ErrorOrSection)
1642 return createStringError(errc::executable_format_error,
1643 "no section matching __start___jump_table");
1645 StaticKeysJumpSection = *ErrorOrSection;
1646 if (!StaticKeysJumpSection->containsAddress(Stop->getAddress() - 1))
1647 return createStringError(errc::executable_format_error,
1648 "__stop___jump_table not in the same section "
1649 "as __start___jump_table");
1651 if ((Stop->getAddress() - StaticKeysJumpTableAddress) %
1652 STATIC_KEYS_JUMP_ENTRY_SIZE)
1653 return createStringError(errc::executable_format_error,
1654 "static keys jump table size error");
1656 const uint64_t SectionAddress = StaticKeysJumpSection->getAddress();
1657 DataExtractor DE(StaticKeysJumpSection->getContents(),
1658 BC.AsmInfo->isLittleEndian(),
1659 BC.AsmInfo->getCodePointerSize());
1660 DataExtractor::Cursor Cursor(StaticKeysJumpTableAddress - SectionAddress);
1661 uint32_t EntryID = 0;
1662 while (Cursor && Cursor.tell() < Stop->getAddress() - SectionAddress) {
1663 const uint64_t JumpAddress =
1664 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor);
1665 const uint64_t TargetAddress =
1666 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor);
1667 const uint64_t KeyAddress =
1668 SectionAddress + Cursor.tell() + (int64_t)DE.getU64(Cursor);
1670 // Consume the status of the cursor.
1671 if (!Cursor)
1672 return createStringError(
1673 errc::executable_format_error,
1674 "out of bounds while reading static keys jump table: %s",
1675 toString(Cursor.takeError()).c_str());
1677 ++EntryID;
1679 JumpInfo.push_back(JumpInfoEntry());
1680 JumpInfoEntry &Info = JumpInfo.back();
1681 Info.Likely = KeyAddress & 1;
1683 if (opts::DumpStaticKeys) {
1684 BC.outs() << "Static key jump entry: " << EntryID
1685 << "\n\tJumpAddress: 0x" << Twine::utohexstr(JumpAddress)
1686 << "\n\tTargetAddress: 0x" << Twine::utohexstr(TargetAddress)
1687 << "\n\tKeyAddress: 0x" << Twine::utohexstr(KeyAddress)
1688 << "\n\tIsLikely: " << Info.Likely << '\n';
1691 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(JumpAddress);
1692 if (!BF && opts::Verbosity) {
1693 BC.outs()
1694 << "BOLT-INFO: no function matches address 0x"
1695 << Twine::utohexstr(JumpAddress)
1696 << " of jump instruction referenced from static keys jump table\n";
1699 if (!BF || !BC.shouldEmit(*BF))
1700 continue;
1702 MCInst *Inst = BF->getInstructionAtOffset(JumpAddress - BF->getAddress());
1703 if (!Inst)
1704 return createStringError(
1705 errc::executable_format_error,
1706 "no instruction at static keys jump site address 0x%" PRIx64,
1707 JumpAddress);
1709 if (!BF->containsAddress(TargetAddress))
1710 return createStringError(
1711 errc::executable_format_error,
1712 "invalid target of static keys jump at 0x%" PRIx64 " : 0x%" PRIx64,
1713 JumpAddress, TargetAddress);
1715 const bool IsBranch = BC.MIB->isBranch(*Inst);
1716 if (!IsBranch && !BC.MIB->isNoop(*Inst))
1717 return createStringError(errc::executable_format_error,
1718 "jump or nop expected at address 0x%" PRIx64,
1719 JumpAddress);
1721 const uint64_t Size = BC.computeInstructionSize(*Inst);
1722 if (Size != 2 && Size != 5) {
1723 return createStringError(
1724 errc::executable_format_error,
1725 "unexpected static keys jump size at address 0x%" PRIx64,
1726 JumpAddress);
1729 MCSymbol *Target = BF->registerBranch(JumpAddress, TargetAddress);
1730 MCInst StaticKeyBranch;
1732 // Create a conditional branch instruction. The actual conditional code type
1733 // should not matter as long as it's a valid code. The instruction should be
1734 // treated as a conditional branch for control-flow purposes. Before we emit
1735 // the code, it will be converted to a different instruction in
1736 // rewriteStaticKeysJumpTable().
1738 // NB: for older kernels, under LongJumpLabels option, we create long
1739 // conditional branch to guarantee that code size estimation takes
1740 // into account the extra bytes needed for long branch that will be used
1741 // by the kernel patching code. Newer kernels can work with both short
1742 // and long branches. The code for long conditional branch is larger
1743 // than unconditional one, so we are pessimistic in our estimations.
1744 if (opts::LongJumpLabels)
1745 BC.MIB->createLongCondBranch(StaticKeyBranch, Target, 0, BC.Ctx.get());
1746 else
1747 BC.MIB->createCondBranch(StaticKeyBranch, Target, 0, BC.Ctx.get());
1748 BC.MIB->moveAnnotations(std::move(*Inst), StaticKeyBranch);
1749 BC.MIB->setDynamicBranch(StaticKeyBranch, EntryID);
1750 *Inst = StaticKeyBranch;
1752 // IsBranch = InitialValue ^ LIKELY
1754 // 0 0 0
1755 // 1 0 1
1756 // 1 1 0
1757 // 0 1 1
1759 // => InitialValue = IsBranch ^ LIKELY
1760 Info.InitValue = IsBranch ^ Info.Likely;
1762 // Add annotations to facilitate manual code analysis.
1763 BC.MIB->addAnnotation(*Inst, "Likely", Info.Likely);
1764 BC.MIB->addAnnotation(*Inst, "InitValue", Info.InitValue);
1765 if (!BC.MIB->getSize(*Inst))
1766 BC.MIB->setSize(*Inst, Size);
1768 if (!BC.MIB->getOffset(*Inst))
1769 BC.MIB->setOffset(*Inst, JumpAddress - BF->getAddress());
1771 if (opts::LongJumpLabels)
1772 BC.MIB->setSize(*Inst, 5);
1775 BC.outs() << "BOLT-INFO: parsed " << EntryID << " static keys jump entries\n";
1777 return Error::success();
1780 // Pre-emit pass. Convert dynamic branch instructions into jumps that could be
1781 // relaxed. In post-emit pass we will convert those jumps into nops when
1782 // necessary. We do the unconditional conversion into jumps so that the jumps
1783 // can be relaxed and the optimal size of jump/nop instruction is selected.
1784 Error LinuxKernelRewriter::rewriteStaticKeysJumpTable() {
1785 if (!StaticKeysJumpSection)
1786 return Error::success();
1788 uint64_t NumShort = 0;
1789 uint64_t NumLong = 0;
1790 for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) {
1791 if (!BC.shouldEmit(BF))
1792 continue;
1794 for (BinaryBasicBlock &BB : BF) {
1795 for (MCInst &Inst : BB) {
1796 if (!BC.MIB->isDynamicBranch(Inst))
1797 continue;
1799 const uint32_t EntryID = *BC.MIB->getDynamicBranchID(Inst);
1800 MCSymbol *Target =
1801 const_cast<MCSymbol *>(BC.MIB->getTargetSymbol(Inst));
1802 assert(Target && "Target symbol should be set.");
1804 const JumpInfoEntry &Info = JumpInfo[EntryID - 1];
1805 const bool IsBranch = Info.Likely ^ Info.InitValue;
1807 uint32_t Size = *BC.MIB->getSize(Inst);
1808 if (Size == 2)
1809 ++NumShort;
1810 else if (Size == 5)
1811 ++NumLong;
1812 else
1813 llvm_unreachable("Wrong size for static keys jump instruction.");
1815 MCInst NewInst;
1816 // Replace the instruction with unconditional jump even if it needs to
1817 // be nop in the binary.
1818 if (opts::LongJumpLabels) {
1819 BC.MIB->createLongUncondBranch(NewInst, Target, BC.Ctx.get());
1820 } else {
1821 // Newer kernels can handle short and long jumps for static keys.
1822 // Optimistically, emit short jump and check if it gets relaxed into
1823 // a long one during post-emit. Only then convert the jump to a nop.
1824 BC.MIB->createUncondBranch(NewInst, Target, BC.Ctx.get());
1827 BC.MIB->moveAnnotations(std::move(Inst), NewInst);
1828 Inst = NewInst;
1830 // Mark the instruction for nop conversion.
1831 if (!IsBranch)
1832 NopIDs.insert(EntryID);
1834 MCSymbol *Label =
1835 BC.MIB->getOrCreateInstLabel(Inst, "__SK_", BC.Ctx.get());
1837 // Create a relocation against the label.
1838 const uint64_t EntryOffset = StaticKeysJumpTableAddress -
1839 StaticKeysJumpSection->getAddress() +
1840 (EntryID - 1) * 16;
1841 StaticKeysJumpSection->addRelocation(EntryOffset, Label,
1842 ELF::R_X86_64_PC32,
1843 /*Addend*/ 0);
1844 StaticKeysJumpSection->addRelocation(EntryOffset + 4, Target,
1845 ELF::R_X86_64_PC32, /*Addend*/ 0);
1850 BC.outs() << "BOLT-INFO: the input contains " << NumShort << " short and "
1851 << NumLong << " long static keys jumps in optimized functions\n";
1853 return Error::success();
1856 // Post-emit pass of static keys jump section. Convert jumps to nops.
1857 Error LinuxKernelRewriter::updateStaticKeysJumpTablePostEmit() {
1858 if (!StaticKeysJumpSection || !StaticKeysJumpSection->isFinalized())
1859 return Error::success();
1861 const uint64_t SectionAddress = StaticKeysJumpSection->getAddress();
1862 DataExtractor DE(StaticKeysJumpSection->getOutputContents(),
1863 BC.AsmInfo->isLittleEndian(),
1864 BC.AsmInfo->getCodePointerSize());
1865 DataExtractor::Cursor Cursor(StaticKeysJumpTableAddress - SectionAddress);
1866 const BinaryData *Stop = BC.getBinaryDataByName("__stop___jump_table");
1867 uint32_t EntryID = 0;
1868 uint64_t NumShort = 0;
1869 uint64_t NumLong = 0;
1870 while (Cursor && Cursor.tell() < Stop->getAddress() - SectionAddress) {
1871 const uint64_t JumpAddress =
1872 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor);
1873 const uint64_t TargetAddress =
1874 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor);
1875 const uint64_t KeyAddress =
1876 SectionAddress + Cursor.tell() + (int64_t)DE.getU64(Cursor);
1878 // Consume the status of the cursor.
1879 if (!Cursor)
1880 return createStringError(errc::executable_format_error,
1881 "out of bounds while updating static keys: %s",
1882 toString(Cursor.takeError()).c_str());
1884 ++EntryID;
1886 LLVM_DEBUG({
1887 dbgs() << "\n\tJumpAddress: 0x" << Twine::utohexstr(JumpAddress)
1888 << "\n\tTargetAddress: 0x" << Twine::utohexstr(TargetAddress)
1889 << "\n\tKeyAddress: 0x" << Twine::utohexstr(KeyAddress) << '\n';
1891 (void)TargetAddress;
1892 (void)KeyAddress;
1894 BinaryFunction *BF =
1895 BC.getBinaryFunctionContainingAddress(JumpAddress,
1896 /*CheckPastEnd*/ false,
1897 /*UseMaxSize*/ true);
1898 assert(BF && "Cannot get function for modified static key.");
1900 if (!BF->isEmitted())
1901 continue;
1903 // Disassemble instruction to collect stats even if nop-conversion is
1904 // unnecessary.
1905 MutableArrayRef<uint8_t> Contents = MutableArrayRef<uint8_t>(
1906 reinterpret_cast<uint8_t *>(BF->getImageAddress()), BF->getImageSize());
1907 assert(Contents.size() && "Non-empty function image expected.");
1909 MCInst Inst;
1910 uint64_t Size;
1911 const uint64_t JumpOffset = JumpAddress - BF->getAddress();
1912 if (!BC.DisAsm->getInstruction(Inst, Size, Contents.slice(JumpOffset), 0,
1913 nulls())) {
1914 llvm_unreachable("Unable to disassemble jump instruction.");
1916 assert(BC.MIB->isBranch(Inst) && "Branch instruction expected.");
1918 if (Size == 2)
1919 ++NumShort;
1920 else if (Size == 5)
1921 ++NumLong;
1922 else
1923 llvm_unreachable("Unexpected size for static keys jump instruction.");
1925 // Check if we need to convert jump instruction into a nop.
1926 if (!NopIDs.contains(EntryID))
1927 continue;
1929 SmallString<15> NopCode;
1930 raw_svector_ostream VecOS(NopCode);
1931 BC.MAB->writeNopData(VecOS, Size, BC.STI.get());
1932 for (uint64_t I = 0; I < Size; ++I)
1933 Contents[JumpOffset + I] = NopCode[I];
1936 BC.outs() << "BOLT-INFO: written " << NumShort << " short and " << NumLong
1937 << " long static keys jumps in optimized functions\n";
1939 return Error::success();
1942 } // namespace
1944 std::unique_ptr<MetadataRewriter>
1945 llvm::bolt::createLinuxKernelRewriter(BinaryContext &BC) {
1946 return std::make_unique<LinuxKernelRewriter>(BC);