1 //===- bolt/Rewrite/LinuxKernelRewriter.cpp -------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Support for updating Linux Kernel metadata.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Core/BinaryFunction.h"
14 #include "bolt/Rewrite/MetadataRewriter.h"
15 #include "bolt/Rewrite/MetadataRewriters.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/DenseSet.h"
19 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
20 #include "llvm/Support/BinaryStreamWriter.h"
21 #include "llvm/Support/CommandLine.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/Errc.h"
25 #define DEBUG_TYPE "bolt-linux"
33 AltInstHasPadLen("alt-inst-has-padlen",
34 cl::desc("specify that .altinstructions has padlen field"),
35 cl::init(false), cl::Hidden
, cl::cat(BoltCategory
));
37 static cl::opt
<uint32_t>
38 AltInstFeatureSize("alt-inst-feature-size",
39 cl::desc("size of feature field in .altinstructions"),
40 cl::init(2), cl::Hidden
, cl::cat(BoltCategory
));
43 DumpAltInstructions("dump-alt-instructions",
44 cl::desc("dump Linux alternative instructions info"),
45 cl::init(false), cl::Hidden
, cl::cat(BoltCategory
));
48 DumpExceptions("dump-linux-exceptions",
49 cl::desc("dump Linux kernel exception table"),
50 cl::init(false), cl::Hidden
, cl::cat(BoltCategory
));
53 DumpORC("dump-orc", cl::desc("dump raw ORC unwind information (sorted)"),
54 cl::init(false), cl::Hidden
, cl::cat(BoltCategory
));
56 static cl::opt
<bool> DumpParavirtualPatchSites(
57 "dump-para-sites", cl::desc("dump Linux kernel paravitual patch sites"),
58 cl::init(false), cl::Hidden
, cl::cat(BoltCategory
));
61 DumpPCIFixups("dump-pci-fixups",
62 cl::desc("dump Linux kernel PCI fixup table"),
63 cl::init(false), cl::Hidden
, cl::cat(BoltCategory
));
65 static cl::opt
<bool> DumpSMPLocks("dump-smp-locks",
66 cl::desc("dump Linux kernel SMP locks"),
67 cl::init(false), cl::Hidden
,
68 cl::cat(BoltCategory
));
70 static cl::opt
<bool> DumpStaticCalls("dump-static-calls",
71 cl::desc("dump Linux kernel static calls"),
72 cl::init(false), cl::Hidden
,
73 cl::cat(BoltCategory
));
76 DumpStaticKeys("dump-static-keys",
77 cl::desc("dump Linux kernel static keys jump table"),
78 cl::init(false), cl::Hidden
, cl::cat(BoltCategory
));
80 static cl::opt
<bool> LongJumpLabels(
82 cl::desc("always use long jumps/nops for Linux kernel static keys"),
83 cl::init(false), cl::Hidden
, cl::cat(BoltCategory
));
87 cl::desc("print ORC unwind information for instructions"),
88 cl::init(true), cl::Hidden
, cl::cat(BoltCategory
));
92 /// Linux Kernel supports stack unwinding using ORC (oops rewind capability).
93 /// ORC state at every IP can be described by the following data structure.
99 bool operator==(const ORCState
&Other
) const {
100 return SPOffset
== Other
.SPOffset
&& BPOffset
== Other
.BPOffset
&&
104 bool operator!=(const ORCState
&Other
) const { return !(*this == Other
); }
107 /// Section terminator ORC entry.
108 static ORCState NullORC
= {0, 0, 0};
110 /// Basic printer for ORC entry. It does not provide the same level of
111 /// information as objtool (for now).
112 inline raw_ostream
&operator<<(raw_ostream
&OS
, const ORCState
&E
) {
116 OS
<< format("{sp: %d, bp: %d, info: 0x%x}", E
.SPOffset
, E
.BPOffset
,
119 OS
<< "{terminator}";
126 class LinuxKernelRewriter final
: public MetadataRewriter
{
127 /// Information required for updating metadata referencing an instruction.
128 struct InstructionFixup
{
129 BinarySection
&Section
; // Section referencing the instruction.
130 uint64_t Offset
; // Offset in the section above.
131 BinaryFunction
&BF
; // Function containing the instruction.
132 MCSymbol
&Label
; // Label marking the instruction.
133 bool IsPCRelative
; // If the reference type is relative.
135 std::vector
<InstructionFixup
> Fixups
;
137 /// Size of an entry in .smp_locks section.
138 static constexpr size_t SMP_LOCKS_ENTRY_SIZE
= 4;
140 /// Linux ORC sections.
141 ErrorOr
<BinarySection
&> ORCUnwindSection
= std::errc::bad_address
;
142 ErrorOr
<BinarySection
&> ORCUnwindIPSection
= std::errc::bad_address
;
144 /// Size of entries in ORC sections.
145 static constexpr size_t ORC_UNWIND_ENTRY_SIZE
= 6;
146 static constexpr size_t ORC_UNWIND_IP_ENTRY_SIZE
= 4;
148 struct ORCListEntry
{
149 uint64_t IP
; /// Instruction address.
150 BinaryFunction
*BF
; /// Binary function corresponding to the entry.
151 ORCState ORC
; /// Stack unwind info in ORC format.
153 /// ORC entries are sorted by their IPs. Terminator entries (NullORC)
154 /// should precede other entries with the same address.
155 bool operator<(const ORCListEntry
&Other
) const {
160 return ORC
== NullORC
&& Other
.ORC
!= NullORC
;
164 using ORCListType
= std::vector
<ORCListEntry
>;
165 ORCListType ORCEntries
;
167 /// Number of entries in the input file ORC sections.
168 uint64_t NumORCEntries
= 0;
170 /// Section containing static keys jump table.
171 ErrorOr
<BinarySection
&> StaticKeysJumpSection
= std::errc::bad_address
;
172 uint64_t StaticKeysJumpTableAddress
= 0;
173 static constexpr size_t STATIC_KEYS_JUMP_ENTRY_SIZE
= 8;
175 struct JumpInfoEntry
{
179 SmallVector
<JumpInfoEntry
, 16> JumpInfo
;
181 /// Static key entries that need nop conversion.
182 DenseSet
<uint32_t> NopIDs
;
184 /// Section containing static call table.
185 ErrorOr
<BinarySection
&> StaticCallSection
= std::errc::bad_address
;
186 uint64_t StaticCallTableAddress
= 0;
187 static constexpr size_t STATIC_CALL_ENTRY_SIZE
= 8;
189 struct StaticCallInfo
{
190 uint32_t ID
; /// Identifier of the entry in the table.
191 BinaryFunction
*Function
; /// Function containing associated call.
192 MCSymbol
*Label
; /// Label attached to the call.
194 using StaticCallListType
= std::vector
<StaticCallInfo
>;
195 StaticCallListType StaticCallEntries
;
197 /// Section containing the Linux exception table.
198 ErrorOr
<BinarySection
&> ExceptionsSection
= std::errc::bad_address
;
199 static constexpr size_t EXCEPTION_TABLE_ENTRY_SIZE
= 12;
201 /// Functions with exception handling code.
202 DenseSet
<BinaryFunction
*> FunctionsWithExceptions
;
204 /// Section with paravirtual patch sites.
205 ErrorOr
<BinarySection
&> ParavirtualPatchSection
= std::errc::bad_address
;
207 /// Alignment of paravirtual patch structures.
208 static constexpr size_t PARA_PATCH_ALIGN
= 8;
210 /// .altinstructions section.
211 ErrorOr
<BinarySection
&> AltInstrSection
= std::errc::bad_address
;
213 /// Section containing Linux bug table.
214 ErrorOr
<BinarySection
&> BugTableSection
= std::errc::bad_address
;
216 /// Size of bug_entry struct.
217 static constexpr size_t BUG_TABLE_ENTRY_SIZE
= 12;
219 /// List of bug entries per function.
220 using FunctionBugListType
=
221 DenseMap
<BinaryFunction
*, SmallVector
<uint32_t, 2>>;
222 FunctionBugListType FunctionBugList
;
224 /// .pci_fixup section.
225 ErrorOr
<BinarySection
&> PCIFixupSection
= std::errc::bad_address
;
226 static constexpr size_t PCI_FIXUP_ENTRY_SIZE
= 16;
228 /// Process linux kernel special sections and their relocations.
229 void processLKSections();
231 /// Process __ksymtab and __ksymtab_gpl.
232 void processLKKSymtab(bool IsGPL
= false);
234 // Create relocations in sections requiring fixups.
236 // Make sure functions that will not be emitted are marked as such before this
237 // function is executed.
238 void processInstructionFixups();
240 /// Process .smp_locks section.
241 Error
processSMPLocks();
243 /// Read ORC unwind information and annotate instructions.
244 Error
readORCTables();
246 /// Update ORC for functions once CFG is constructed.
247 Error
processORCPostCFG();
249 /// Update ORC data in the binary.
250 Error
rewriteORCTables();
252 /// Validate written ORC tables after binary emission.
253 Error
validateORCTables();
255 /// Static call table handling.
256 Error
readStaticCalls();
257 Error
rewriteStaticCalls();
259 Error
readExceptionTable();
260 Error
rewriteExceptionTable();
262 /// Paravirtual instruction patch sites.
263 Error
readParaInstructions();
264 Error
rewriteParaInstructions();
266 /// __bug_table section handling.
267 Error
readBugTable();
268 Error
rewriteBugTable();
270 /// Do no process functions containing instruction annotated with
272 void skipFunctionsWithAnnotation(StringRef Annotation
) const;
274 /// Handle alternative instruction info from .altinstructions.
275 Error
readAltInstructions();
276 void processAltInstructionsPostCFG();
277 Error
tryReadAltInstructions(uint32_t AltInstFeatureSize
,
278 bool AltInstHasPadLen
, bool ParseOnly
);
281 Error
readPCIFixupTable();
283 /// Handle static keys jump table.
284 Error
readStaticKeysJumpTable();
285 Error
rewriteStaticKeysJumpTable();
286 Error
updateStaticKeysJumpTablePostEmit();
289 LinuxKernelRewriter(BinaryContext
&BC
)
290 : MetadataRewriter("linux-kernel-rewriter", BC
) {}
292 Error
preCFGInitializer() override
{
295 if (Error E
= processSMPLocks())
298 if (Error E
= readStaticCalls())
301 if (Error E
= readExceptionTable())
304 if (Error E
= readParaInstructions())
307 if (Error E
= readBugTable())
310 if (Error E
= readAltInstructions())
313 // Some ORC entries could be linked to alternative instruction
314 // sequences. Hence, we read ORC after .altinstructions.
315 if (Error E
= readORCTables())
318 if (Error E
= readPCIFixupTable())
321 if (Error E
= readStaticKeysJumpTable())
324 return Error::success();
327 Error
postCFGInitializer() override
{
328 if (Error E
= processORCPostCFG())
331 processAltInstructionsPostCFG();
333 return Error::success();
336 Error
preEmitFinalizer() override
{
337 // Since rewriteExceptionTable() can mark functions as non-simple, run it
338 // before other rewriters that depend on simple/emit status.
339 if (Error E
= rewriteExceptionTable())
342 if (Error E
= rewriteParaInstructions())
345 if (Error E
= rewriteORCTables())
348 if (Error E
= rewriteStaticCalls())
351 if (Error E
= rewriteStaticKeysJumpTable())
354 if (Error E
= rewriteBugTable())
357 processInstructionFixups();
359 return Error::success();
362 Error
postEmitFinalizer() override
{
363 if (Error E
= updateStaticKeysJumpTablePostEmit())
366 if (Error E
= validateORCTables())
369 return Error::success();
373 void LinuxKernelRewriter::processLKSections() {
375 processLKKSymtab(true);
378 /// Process __ksymtab[_gpl] sections of Linux Kernel.
379 /// This section lists all the vmlinux symbols that kernel modules can access.
381 /// All the entries are 4 bytes each and hence we can read them by one by one
382 /// and ignore the ones that are not pointing to the .text section. All pointers
383 /// are PC relative offsets. Always, points to the beginning of the function.
384 void LinuxKernelRewriter::processLKKSymtab(bool IsGPL
) {
385 StringRef SectionName
= "__ksymtab";
387 SectionName
= "__ksymtab_gpl";
388 ErrorOr
<BinarySection
&> SectionOrError
=
389 BC
.getUniqueSectionByName(SectionName
);
390 assert(SectionOrError
&&
391 "__ksymtab[_gpl] section not found in Linux Kernel binary");
392 const uint64_t SectionSize
= SectionOrError
->getSize();
393 const uint64_t SectionAddress
= SectionOrError
->getAddress();
394 assert((SectionSize
% 4) == 0 &&
395 "The size of the __ksymtab[_gpl] section should be a multiple of 4");
397 for (uint64_t I
= 0; I
< SectionSize
; I
+= 4) {
398 const uint64_t EntryAddress
= SectionAddress
+ I
;
399 ErrorOr
<int64_t> Offset
= BC
.getSignedValueAtAddress(EntryAddress
, 4);
400 assert(Offset
&& "Reading valid PC-relative offset for a ksymtab entry");
401 const int32_t SignedOffset
= *Offset
;
402 const uint64_t RefAddress
= EntryAddress
+ SignedOffset
;
403 BinaryFunction
*BF
= BC
.getBinaryFunctionAtAddress(RefAddress
);
407 BC
.addRelocation(EntryAddress
, BF
->getSymbol(), Relocation::getPC32(), 0,
412 /// .smp_locks section contains PC-relative references to instructions with LOCK
413 /// prefix. The prefix can be converted to NOP at boot time on non-SMP systems.
414 Error
LinuxKernelRewriter::processSMPLocks() {
415 ErrorOr
<BinarySection
&> SMPLocksSection
=
416 BC
.getUniqueSectionByName(".smp_locks");
417 if (!SMPLocksSection
)
418 return Error::success();
420 const uint64_t SectionSize
= SMPLocksSection
->getSize();
421 const uint64_t SectionAddress
= SMPLocksSection
->getAddress();
422 if (SectionSize
% SMP_LOCKS_ENTRY_SIZE
)
423 return createStringError(errc::executable_format_error
,
424 "bad size of .smp_locks section");
426 DataExtractor DE
= DataExtractor(SMPLocksSection
->getContents(),
427 BC
.AsmInfo
->isLittleEndian(),
428 BC
.AsmInfo
->getCodePointerSize());
429 DataExtractor::Cursor
Cursor(0);
430 while (Cursor
&& Cursor
.tell() < SectionSize
) {
431 const uint64_t Offset
= Cursor
.tell();
432 const uint64_t IP
= SectionAddress
+ Offset
+ (int32_t)DE
.getU32(Cursor
);
434 // Consume the status of the cursor.
436 return createStringError(errc::executable_format_error
,
437 "error while reading .smp_locks: %s",
438 toString(Cursor
.takeError()).c_str());
440 if (opts::DumpSMPLocks
)
441 BC
.outs() << "SMP lock at 0x: " << Twine::utohexstr(IP
) << '\n';
443 BinaryFunction
*BF
= BC
.getBinaryFunctionContainingAddress(IP
);
444 if (!BF
|| !BC
.shouldEmit(*BF
))
447 MCInst
*Inst
= BF
->getInstructionAtOffset(IP
- BF
->getAddress());
449 return createStringError(errc::executable_format_error
,
450 "no instruction matches lock at 0x%" PRIx64
, IP
);
452 // Check for duplicate entries.
453 if (BC
.MIB
->hasAnnotation(*Inst
, "SMPLock"))
454 return createStringError(errc::executable_format_error
,
455 "duplicate SMP lock at 0x%" PRIx64
, IP
);
457 BC
.MIB
->addAnnotation(*Inst
, "SMPLock", true);
459 BC
.MIB
->getOrCreateInstLabel(*Inst
, "__SMPLock_", BC
.Ctx
.get());
461 Fixups
.push_back({*SMPLocksSection
, Offset
, *BF
, *Label
,
462 /*IsPCRelative*/ true});
465 const uint64_t NumEntries
= SectionSize
/ SMP_LOCKS_ENTRY_SIZE
;
466 BC
.outs() << "BOLT-INFO: parsed " << NumEntries
<< " SMP lock entries\n";
468 return Error::success();
471 void LinuxKernelRewriter::processInstructionFixups() {
472 for (InstructionFixup
&Fixup
: Fixups
) {
473 if (!BC
.shouldEmit(Fixup
.BF
))
476 Fixup
.Section
.addRelocation(Fixup
.Offset
, &Fixup
.Label
,
477 Fixup
.IsPCRelative
? ELF::R_X86_64_PC32
483 Error
LinuxKernelRewriter::readORCTables() {
484 // NOTE: we should ignore relocations for orc tables as the tables are sorted
485 // post-link time and relocations are not updated.
486 ORCUnwindSection
= BC
.getUniqueSectionByName(".orc_unwind");
487 ORCUnwindIPSection
= BC
.getUniqueSectionByName(".orc_unwind_ip");
489 if (!ORCUnwindSection
&& !ORCUnwindIPSection
)
490 return Error::success();
492 if (!ORCUnwindSection
|| !ORCUnwindIPSection
)
493 return createStringError(errc::executable_format_error
,
494 "missing ORC section");
496 NumORCEntries
= ORCUnwindIPSection
->getSize() / ORC_UNWIND_IP_ENTRY_SIZE
;
497 if (ORCUnwindSection
->getSize() != NumORCEntries
* ORC_UNWIND_ENTRY_SIZE
||
498 ORCUnwindIPSection
->getSize() != NumORCEntries
* ORC_UNWIND_IP_ENTRY_SIZE
)
499 return createStringError(errc::executable_format_error
,
500 "ORC entries number mismatch detected");
502 const uint64_t IPSectionAddress
= ORCUnwindIPSection
->getAddress();
503 DataExtractor OrcDE
= DataExtractor(ORCUnwindSection
->getContents(),
504 BC
.AsmInfo
->isLittleEndian(),
505 BC
.AsmInfo
->getCodePointerSize());
506 DataExtractor IPDE
= DataExtractor(ORCUnwindIPSection
->getContents(),
507 BC
.AsmInfo
->isLittleEndian(),
508 BC
.AsmInfo
->getCodePointerSize());
509 DataExtractor::Cursor
ORCCursor(0);
510 DataExtractor::Cursor
IPCursor(0);
512 for (uint32_t Index
= 0; Index
< NumORCEntries
; ++Index
) {
514 IPSectionAddress
+ IPCursor
.tell() + (int32_t)IPDE
.getU32(IPCursor
);
516 // Consume the status of the cursor.
518 return createStringError(errc::executable_format_error
,
519 "out of bounds while reading ORC IP table: %s",
520 toString(IPCursor
.takeError()).c_str());
522 if (IP
< PrevIP
&& opts::Verbosity
)
523 BC
.errs() << "BOLT-WARNING: out of order IP 0x" << Twine::utohexstr(IP
)
524 << " detected while reading ORC\n";
528 // Store all entries, includes those we are not going to update as the
529 // tables need to be sorted globally before being written out.
530 ORCEntries
.push_back(ORCListEntry());
531 ORCListEntry
&Entry
= ORCEntries
.back();
534 Entry
.ORC
.SPOffset
= (int16_t)OrcDE
.getU16(ORCCursor
);
535 Entry
.ORC
.BPOffset
= (int16_t)OrcDE
.getU16(ORCCursor
);
536 Entry
.ORC
.Info
= (int16_t)OrcDE
.getU16(ORCCursor
);
539 // Consume the status of the cursor.
541 return createStringError(errc::executable_format_error
,
542 "out of bounds while reading ORC: %s",
543 toString(ORCCursor
.takeError()).c_str());
545 if (Entry
.ORC
== NullORC
)
548 BinaryFunction
*&BF
= Entry
.BF
;
549 BF
= BC
.getBinaryFunctionContainingAddress(IP
, /*CheckPastEnd*/ true);
551 // If the entry immediately pointing past the end of the function is not
552 // the terminator entry, then it does not belong to this function.
553 if (BF
&& BF
->getAddress() + BF
->getSize() == IP
)
558 BC
.errs() << "BOLT-WARNING: no binary function found matching ORC 0x"
559 << Twine::utohexstr(IP
) << ": " << Entry
.ORC
<< '\n';
565 if (!BF
->hasInstructions())
568 const uint64_t Offset
= IP
- BF
->getAddress();
569 MCInst
*Inst
= BF
->getInstructionAtOffset(Offset
);
571 // Check if there is an alternative instruction(s) at this IP. Multiple
572 // alternative instructions can take a place of a single original
573 // instruction and each alternative can have a separate ORC entry.
574 // Since ORC table is shared between all alternative sequences, there's
575 // a requirement that only one (out of many) sequences can have an
576 // instruction from the ORC table to avoid ambiguities/conflicts.
578 // For now, we have limited support for alternatives. I.e. we still print
579 // functions with them, but will not change the code in the output binary.
580 // As such, we can ignore alternative ORC entries. They will be preserved
581 // in the binary, but will not get printed in the instruction stream.
582 Inst
= BF
->getInstructionContainingOffset(Offset
);
583 if (Inst
|| BC
.MIB
->hasAnnotation(*Inst
, "AltInst"))
586 return createStringError(
587 errc::executable_format_error
,
588 "no instruction at address 0x%" PRIx64
" in .orc_unwind_ip", IP
);
591 // Some addresses will have two entries associated with them. The first
592 // one being a "weak" section terminator. Since we ignore the terminator,
593 // we should only assign one entry per instruction.
594 if (BC
.MIB
->hasAnnotation(*Inst
, "ORC"))
595 return createStringError(
596 errc::executable_format_error
,
597 "duplicate non-terminal ORC IP 0x%" PRIx64
" in .orc_unwind_ip", IP
);
599 BC
.MIB
->addAnnotation(*Inst
, "ORC", Entry
.ORC
);
602 BC
.outs() << "BOLT-INFO: parsed " << NumORCEntries
<< " ORC entries\n";
605 BC
.outs() << "BOLT-INFO: ORC unwind information:\n";
606 for (const ORCListEntry
&E
: ORCEntries
) {
607 BC
.outs() << "0x" << Twine::utohexstr(E
.IP
) << ": " << E
.ORC
;
609 BC
.outs() << ": " << *E
.BF
;
614 // Add entries for functions that don't have explicit ORC info at the start.
615 // We'll have the correct info for them even if ORC for the preceding function
617 ORCListType NewEntries
;
618 for (BinaryFunction
&BF
: llvm::make_second_range(BC
.getBinaryFunctions())) {
619 auto It
= llvm::partition_point(ORCEntries
, [&](const ORCListEntry
&E
) {
620 return E
.IP
<= BF
.getAddress();
622 if (It
!= ORCEntries
.begin())
628 if (It
->ORC
== NullORC
&& It
->IP
== BF
.getAddress()) {
634 NewEntries
.push_back({BF
.getAddress(), &BF
, It
->ORC
});
635 if (It
->ORC
!= NullORC
)
639 llvm::copy(NewEntries
, std::back_inserter(ORCEntries
));
640 llvm::sort(ORCEntries
);
643 BC
.outs() << "BOLT-INFO: amended ORC unwind information:\n";
644 for (const ORCListEntry
&E
: ORCEntries
) {
645 BC
.outs() << "0x" << Twine::utohexstr(E
.IP
) << ": " << E
.ORC
;
647 BC
.outs() << ": " << *E
.BF
;
652 return Error::success();
655 Error
LinuxKernelRewriter::processORCPostCFG() {
657 return Error::success();
659 // Propagate ORC to the rest of the function. We can annotate every
660 // instruction in every function, but to minimize the overhead, we annotate
661 // the first instruction in every basic block to reflect the state at the
662 // entry. This way, the ORC state can be calculated based on annotations
663 // regardless of the basic block layout. Note that if we insert/delete
664 // instructions, we must take care to attach ORC info to the new/deleted ones.
665 for (BinaryFunction
&BF
: llvm::make_second_range(BC
.getBinaryFunctions())) {
667 std::optional
<ORCState
> CurrentState
;
668 for (BinaryBasicBlock
&BB
: BF
) {
669 for (MCInst
&Inst
: BB
) {
670 ErrorOr
<ORCState
> State
=
671 BC
.MIB
->tryGetAnnotationAs
<ORCState
>(Inst
, "ORC");
674 CurrentState
= *State
;
678 // Get state for the start of the function.
680 // A terminator entry (NullORC) can match the function address. If
681 // there's also a non-terminator entry, it will be placed after the
682 // terminator. Hence, we are looking for the last ORC entry that
683 // matches the address.
685 llvm::partition_point(ORCEntries
, [&](const ORCListEntry
&E
) {
686 return E
.IP
<= BF
.getAddress();
688 if (It
!= ORCEntries
.begin())
691 assert(It
->IP
== BF
.getAddress() && (!It
->BF
|| It
->BF
== &BF
) &&
692 "ORC info at function entry expected.");
694 if (It
->ORC
== NullORC
&& BF
.hasORC()) {
695 BC
.errs() << "BOLT-WARNING: ORC unwind info excludes prologue for "
701 CurrentState
= It
->ORC
;
702 if (It
->ORC
!= NullORC
)
706 // While printing ORC, attach info to every instruction for convenience.
707 if (opts::PrintORC
|| &Inst
== &BB
.front())
708 BC
.MIB
->addAnnotation(Inst
, "ORC", *CurrentState
);
713 return Error::success();
716 Error
LinuxKernelRewriter::rewriteORCTables() {
718 return Error::success();
720 // Update ORC sections in-place. As we change the code, the number of ORC
721 // entries may increase for some functions. However, as we remove terminator
722 // redundancy (see below), more space is freed up and we should always be able
723 // to fit new ORC tables in the reserved space.
724 auto createInPlaceWriter
= [&](BinarySection
&Section
) -> BinaryStreamWriter
{
725 const size_t Size
= Section
.getSize();
726 uint8_t *NewContents
= new uint8_t[Size
];
727 Section
.updateContents(NewContents
, Size
);
728 Section
.setOutputFileOffset(Section
.getInputFileOffset());
729 return BinaryStreamWriter({NewContents
, Size
}, BC
.AsmInfo
->isLittleEndian()
733 BinaryStreamWriter UnwindWriter
= createInPlaceWriter(*ORCUnwindSection
);
734 BinaryStreamWriter UnwindIPWriter
= createInPlaceWriter(*ORCUnwindIPSection
);
736 uint64_t NumEmitted
= 0;
737 std::optional
<ORCState
> LastEmittedORC
;
738 auto emitORCEntry
= [&](const uint64_t IP
, const ORCState
&ORC
,
739 MCSymbol
*Label
= 0, bool Force
= false) -> Error
{
740 if (LastEmittedORC
&& ORC
== *LastEmittedORC
&& !Force
)
741 return Error::success();
743 LastEmittedORC
= ORC
;
745 if (++NumEmitted
> NumORCEntries
)
746 return createStringError(errc::executable_format_error
,
747 "exceeded the number of allocated ORC entries");
750 ORCUnwindIPSection
->addRelocation(UnwindIPWriter
.getOffset(), Label
,
751 Relocation::getPC32(), /*Addend*/ 0);
753 const int32_t IPValue
=
754 IP
- ORCUnwindIPSection
->getAddress() - UnwindIPWriter
.getOffset();
755 if (Error E
= UnwindIPWriter
.writeInteger(IPValue
))
758 if (Error E
= UnwindWriter
.writeInteger(ORC
.SPOffset
))
760 if (Error E
= UnwindWriter
.writeInteger(ORC
.BPOffset
))
762 if (Error E
= UnwindWriter
.writeInteger(ORC
.Info
))
765 return Error::success();
768 // Emit new ORC entries for the emitted function.
769 auto emitORC
= [&](const FunctionFragment
&FF
) -> Error
{
770 ORCState CurrentState
= NullORC
;
771 for (BinaryBasicBlock
*BB
: FF
) {
772 for (MCInst
&Inst
: *BB
) {
773 ErrorOr
<ORCState
> ErrorOrState
=
774 BC
.MIB
->tryGetAnnotationAs
<ORCState
>(Inst
, "ORC");
775 if (!ErrorOrState
|| *ErrorOrState
== CurrentState
)
778 // Issue label for the instruction.
780 BC
.MIB
->getOrCreateInstLabel(Inst
, "__ORC_", BC
.Ctx
.get());
782 if (Error E
= emitORCEntry(0, *ErrorOrState
, Label
))
785 CurrentState
= *ErrorOrState
;
789 return Error::success();
792 // Emit ORC entries for cold fragments. We assume that these fragments are
793 // emitted contiguously in memory using reserved space in the kernel. This
794 // assumption is validated in post-emit pass validateORCTables() where we
795 // check that ORC entries are sorted by their addresses.
796 auto emitColdORC
= [&]() -> Error
{
797 for (BinaryFunction
&BF
:
798 llvm::make_second_range(BC
.getBinaryFunctions())) {
799 if (!BC
.shouldEmit(BF
))
801 for (FunctionFragment
&FF
: BF
.getLayout().getSplitFragments())
802 if (Error E
= emitORC(FF
))
806 return Error::success();
809 bool ShouldEmitCold
= !BC
.BOLTReserved
.empty();
810 for (ORCListEntry
&Entry
: ORCEntries
) {
811 if (ShouldEmitCold
&& Entry
.IP
> BC
.BOLTReserved
.start()) {
812 if (Error E
= emitColdORC())
815 // Emit terminator entry at the end of the reserved region.
816 if (Error E
= emitORCEntry(BC
.BOLTReserved
.end(), NullORC
))
819 ShouldEmitCold
= false;
822 // Emit original entries for functions that we haven't modified.
823 if (!Entry
.BF
|| !BC
.shouldEmit(*Entry
.BF
)) {
824 // Emit terminator only if it marks the start of a function.
825 if (Entry
.ORC
== NullORC
&& !Entry
.BF
)
827 if (Error E
= emitORCEntry(Entry
.IP
, Entry
.ORC
))
832 // Emit all ORC entries for a function referenced by an entry and skip over
833 // the rest of entries for this function by resetting its ORC attribute.
834 if (Entry
.BF
->hasORC()) {
835 if (Error E
= emitORC(Entry
.BF
->getLayout().getMainFragment()))
837 Entry
.BF
->setHasORC(false);
841 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitted " << NumEmitted
842 << " ORC entries\n");
844 // Populate ORC tables with a terminator entry with max address to match the
845 // original table sizes.
846 const uint64_t LastIP
= std::numeric_limits
<uint64_t>::max();
847 while (UnwindWriter
.bytesRemaining()) {
848 if (Error E
= emitORCEntry(LastIP
, NullORC
, nullptr, /*Force*/ true))
852 return Error::success();
855 Error
LinuxKernelRewriter::validateORCTables() {
856 if (!ORCUnwindIPSection
)
857 return Error::success();
859 const uint64_t IPSectionAddress
= ORCUnwindIPSection
->getAddress();
860 DataExtractor IPDE
= DataExtractor(ORCUnwindIPSection
->getOutputContents(),
861 BC
.AsmInfo
->isLittleEndian(),
862 BC
.AsmInfo
->getCodePointerSize());
863 DataExtractor::Cursor
IPCursor(0);
865 for (uint32_t Index
= 0; Index
< NumORCEntries
; ++Index
) {
867 IPSectionAddress
+ IPCursor
.tell() + (int32_t)IPDE
.getU32(IPCursor
);
869 return createStringError(errc::executable_format_error
,
870 "out of bounds while reading ORC IP table: %s",
871 toString(IPCursor
.takeError()).c_str());
873 assert(IP
>= PrevIP
&& "Unsorted ORC table detected");
878 return Error::success();
881 /// The static call site table is created by objtool and contains entries in the
882 /// following format:
884 /// struct static_call_site {
889 Error
LinuxKernelRewriter::readStaticCalls() {
890 const BinaryData
*StaticCallTable
=
891 BC
.getBinaryDataByName("__start_static_call_sites");
892 if (!StaticCallTable
)
893 return Error::success();
895 StaticCallTableAddress
= StaticCallTable
->getAddress();
897 const BinaryData
*Stop
= BC
.getBinaryDataByName("__stop_static_call_sites");
899 return createStringError(errc::executable_format_error
,
900 "missing __stop_static_call_sites symbol");
902 ErrorOr
<BinarySection
&> ErrorOrSection
=
903 BC
.getSectionForAddress(StaticCallTableAddress
);
905 return createStringError(errc::executable_format_error
,
906 "no section matching __start_static_call_sites");
908 StaticCallSection
= *ErrorOrSection
;
909 if (!StaticCallSection
->containsAddress(Stop
->getAddress() - 1))
910 return createStringError(errc::executable_format_error
,
911 "__stop_static_call_sites not in the same section "
912 "as __start_static_call_sites");
914 if ((Stop
->getAddress() - StaticCallTableAddress
) % STATIC_CALL_ENTRY_SIZE
)
915 return createStringError(errc::executable_format_error
,
916 "static call table size error");
918 const uint64_t SectionAddress
= StaticCallSection
->getAddress();
919 DataExtractor
DE(StaticCallSection
->getContents(),
920 BC
.AsmInfo
->isLittleEndian(),
921 BC
.AsmInfo
->getCodePointerSize());
922 DataExtractor::Cursor
Cursor(StaticCallTableAddress
- SectionAddress
);
923 uint32_t EntryID
= 0;
924 while (Cursor
&& Cursor
.tell() < Stop
->getAddress() - SectionAddress
) {
925 const uint64_t CallAddress
=
926 SectionAddress
+ Cursor
.tell() + (int32_t)DE
.getU32(Cursor
);
927 const uint64_t KeyAddress
=
928 SectionAddress
+ Cursor
.tell() + (int32_t)DE
.getU32(Cursor
);
930 // Consume the status of the cursor.
932 return createStringError(errc::executable_format_error
,
933 "out of bounds while reading static calls: %s",
934 toString(Cursor
.takeError()).c_str());
938 if (opts::DumpStaticCalls
) {
939 BC
.outs() << "Static Call Site: " << EntryID
<< '\n';
940 BC
.outs() << "\tCallAddress: 0x" << Twine::utohexstr(CallAddress
)
941 << "\n\tKeyAddress: 0x" << Twine::utohexstr(KeyAddress
)
945 BinaryFunction
*BF
= BC
.getBinaryFunctionContainingAddress(CallAddress
);
949 if (!BC
.shouldEmit(*BF
))
952 if (!BF
->hasInstructions())
955 MCInst
*Inst
= BF
->getInstructionAtOffset(CallAddress
- BF
->getAddress());
957 return createStringError(errc::executable_format_error
,
958 "no instruction at call site address 0x%" PRIx64
,
961 // Check for duplicate entries.
962 if (BC
.MIB
->hasAnnotation(*Inst
, "StaticCall"))
963 return createStringError(errc::executable_format_error
,
964 "duplicate static call site at 0x%" PRIx64
,
967 BC
.MIB
->addAnnotation(*Inst
, "StaticCall", EntryID
);
970 BC
.MIB
->getOrCreateInstLabel(*Inst
, "__SC_", BC
.Ctx
.get());
972 StaticCallEntries
.push_back({EntryID
, BF
, Label
});
975 BC
.outs() << "BOLT-INFO: parsed " << StaticCallEntries
.size()
976 << " static call entries\n";
978 return Error::success();
981 /// The static call table is sorted during boot time in
982 /// static_call_sort_entries(). This makes it possible to update existing
983 /// entries in-place ignoring their relative order.
984 Error
LinuxKernelRewriter::rewriteStaticCalls() {
985 if (!StaticCallTableAddress
|| !StaticCallSection
)
986 return Error::success();
988 for (auto &Entry
: StaticCallEntries
) {
992 BinaryFunction
&BF
= *Entry
.Function
;
993 if (!BC
.shouldEmit(BF
))
996 // Create a relocation against the label.
997 const uint64_t EntryOffset
= StaticCallTableAddress
-
998 StaticCallSection
->getAddress() +
999 (Entry
.ID
- 1) * STATIC_CALL_ENTRY_SIZE
;
1000 StaticCallSection
->addRelocation(EntryOffset
, Entry
.Label
,
1001 ELF::R_X86_64_PC32
, /*Addend*/ 0);
1004 return Error::success();
1007 /// Instructions that access user-space memory can cause page faults. These
1008 /// faults will be handled by the kernel and execution will resume at the fixup
1009 /// code location if the address was invalid. The kernel uses the exception
1010 /// table to match the faulting instruction to its fixup. The table consists of
1011 /// the following entries:
1013 /// struct exception_table_entry {
1020 /// https://www.kernel.org/doc/Documentation/x86/exception-tables.txt
1021 Error
LinuxKernelRewriter::readExceptionTable() {
1022 ExceptionsSection
= BC
.getUniqueSectionByName("__ex_table");
1023 if (!ExceptionsSection
)
1024 return Error::success();
1026 if (ExceptionsSection
->getSize() % EXCEPTION_TABLE_ENTRY_SIZE
)
1027 return createStringError(errc::executable_format_error
,
1028 "exception table size error");
1030 const uint64_t SectionAddress
= ExceptionsSection
->getAddress();
1031 DataExtractor
DE(ExceptionsSection
->getContents(),
1032 BC
.AsmInfo
->isLittleEndian(),
1033 BC
.AsmInfo
->getCodePointerSize());
1034 DataExtractor::Cursor
Cursor(0);
1035 uint32_t EntryID
= 0;
1036 while (Cursor
&& Cursor
.tell() < ExceptionsSection
->getSize()) {
1037 const uint64_t InstAddress
=
1038 SectionAddress
+ Cursor
.tell() + (int32_t)DE
.getU32(Cursor
);
1039 const uint64_t FixupAddress
=
1040 SectionAddress
+ Cursor
.tell() + (int32_t)DE
.getU32(Cursor
);
1041 const uint64_t Data
= DE
.getU32(Cursor
);
1043 // Consume the status of the cursor.
1045 return createStringError(
1046 errc::executable_format_error
,
1047 "out of bounds while reading exception table: %s",
1048 toString(Cursor
.takeError()).c_str());
1052 if (opts::DumpExceptions
) {
1053 BC
.outs() << "Exception Entry: " << EntryID
<< '\n';
1054 BC
.outs() << "\tInsn: 0x" << Twine::utohexstr(InstAddress
) << '\n'
1055 << "\tFixup: 0x" << Twine::utohexstr(FixupAddress
) << '\n'
1056 << "\tData: 0x" << Twine::utohexstr(Data
) << '\n';
1059 MCInst
*Inst
= nullptr;
1060 MCSymbol
*FixupLabel
= nullptr;
1062 BinaryFunction
*InstBF
= BC
.getBinaryFunctionContainingAddress(InstAddress
);
1063 if (InstBF
&& BC
.shouldEmit(*InstBF
)) {
1064 Inst
= InstBF
->getInstructionAtOffset(InstAddress
- InstBF
->getAddress());
1066 return createStringError(errc::executable_format_error
,
1067 "no instruction at address 0x%" PRIx64
1068 " in exception table",
1070 BC
.MIB
->addAnnotation(*Inst
, "ExceptionEntry", EntryID
);
1071 FunctionsWithExceptions
.insert(InstBF
);
1074 if (!InstBF
&& opts::Verbosity
) {
1075 BC
.outs() << "BOLT-INFO: no function matches instruction at 0x"
1076 << Twine::utohexstr(InstAddress
)
1077 << " referenced by Linux exception table\n";
1080 BinaryFunction
*FixupBF
=
1081 BC
.getBinaryFunctionContainingAddress(FixupAddress
);
1082 if (FixupBF
&& BC
.shouldEmit(*FixupBF
)) {
1083 const uint64_t Offset
= FixupAddress
- FixupBF
->getAddress();
1084 if (!FixupBF
->getInstructionAtOffset(Offset
))
1085 return createStringError(errc::executable_format_error
,
1086 "no instruction at fixup address 0x%" PRIx64
1087 " in exception table",
1089 FixupLabel
= Offset
? FixupBF
->addEntryPointAtOffset(Offset
)
1090 : FixupBF
->getSymbol();
1092 BC
.MIB
->addAnnotation(*Inst
, "Fixup", FixupLabel
->getName());
1093 FunctionsWithExceptions
.insert(FixupBF
);
1096 if (!FixupBF
&& opts::Verbosity
) {
1097 BC
.outs() << "BOLT-INFO: no function matches fixup code at 0x"
1098 << Twine::utohexstr(FixupAddress
)
1099 << " referenced by Linux exception table\n";
1103 BC
.outs() << "BOLT-INFO: parsed "
1104 << ExceptionsSection
->getSize() / EXCEPTION_TABLE_ENTRY_SIZE
1105 << " exception table entries\n";
1107 return Error::success();
1110 /// Depending on the value of CONFIG_BUILDTIME_TABLE_SORT, the kernel expects
1111 /// the exception table to be sorted. Hence we have to sort it after code
1113 Error
LinuxKernelRewriter::rewriteExceptionTable() {
1114 // Disable output of functions with exceptions before rewrite support is
1116 for (BinaryFunction
*BF
: FunctionsWithExceptions
)
1117 BF
->setSimple(false);
1119 return Error::success();
1122 /// .parainsrtuctions section contains information for patching parvirtual call
1123 /// instructions during runtime. The entries in the section are in the form:
1125 /// struct paravirt_patch_site {
1126 /// u8 *instr; /* original instructions */
1127 /// u8 type; /* type of this instruction */
1128 /// u8 len; /* length of original instruction */
1131 /// Note that the structures are aligned at 8-byte boundary.
1132 Error
LinuxKernelRewriter::readParaInstructions() {
1133 ParavirtualPatchSection
= BC
.getUniqueSectionByName(".parainstructions");
1134 if (!ParavirtualPatchSection
)
1135 return Error::success();
1137 DataExtractor DE
= DataExtractor(ParavirtualPatchSection
->getContents(),
1138 BC
.AsmInfo
->isLittleEndian(),
1139 BC
.AsmInfo
->getCodePointerSize());
1140 uint32_t EntryID
= 0;
1141 DataExtractor::Cursor
Cursor(0);
1142 while (Cursor
&& !DE
.eof(Cursor
)) {
1143 const uint64_t NextOffset
= alignTo(Cursor
.tell(), Align(PARA_PATCH_ALIGN
));
1144 if (!DE
.isValidOffset(NextOffset
))
1147 Cursor
.seek(NextOffset
);
1149 const uint64_t InstrLocation
= DE
.getU64(Cursor
);
1150 const uint8_t Type
= DE
.getU8(Cursor
);
1151 const uint8_t Len
= DE
.getU8(Cursor
);
1154 return createStringError(
1155 errc::executable_format_error
,
1156 "out of bounds while reading .parainstructions: %s",
1157 toString(Cursor
.takeError()).c_str());
1161 if (opts::DumpParavirtualPatchSites
) {
1162 BC
.outs() << "Paravirtual patch site: " << EntryID
<< '\n';
1163 BC
.outs() << "\tInstr: 0x" << Twine::utohexstr(InstrLocation
)
1164 << "\n\tType: 0x" << Twine::utohexstr(Type
) << "\n\tLen: 0x"
1165 << Twine::utohexstr(Len
) << '\n';
1168 BinaryFunction
*BF
= BC
.getBinaryFunctionContainingAddress(InstrLocation
);
1169 if (!BF
&& opts::Verbosity
) {
1170 BC
.outs() << "BOLT-INFO: no function matches address 0x"
1171 << Twine::utohexstr(InstrLocation
)
1172 << " referenced by paravirutal patch site\n";
1175 if (BF
&& BC
.shouldEmit(*BF
)) {
1177 BF
->getInstructionAtOffset(InstrLocation
- BF
->getAddress());
1179 return createStringError(errc::executable_format_error
,
1180 "no instruction at address 0x%" PRIx64
1181 " in paravirtual call site %d",
1182 InstrLocation
, EntryID
);
1183 BC
.MIB
->addAnnotation(*Inst
, "ParaSite", EntryID
);
1187 BC
.outs() << "BOLT-INFO: parsed " << EntryID
<< " paravirtual patch sites\n";
1189 return Error::success();
1192 void LinuxKernelRewriter::skipFunctionsWithAnnotation(
1193 StringRef Annotation
) const {
1194 for (BinaryFunction
&BF
: llvm::make_second_range(BC
.getBinaryFunctions())) {
1195 if (!BC
.shouldEmit(BF
))
1197 for (const BinaryBasicBlock
&BB
: BF
) {
1198 const bool HasAnnotation
= llvm::any_of(BB
, [&](const MCInst
&Inst
) {
1199 return BC
.MIB
->hasAnnotation(Inst
, Annotation
);
1201 if (HasAnnotation
) {
1202 BF
.setSimple(false);
1209 Error
LinuxKernelRewriter::rewriteParaInstructions() {
1210 // Disable output of functions with paravirtual instructions before the
1211 // rewrite support is complete.
1212 skipFunctionsWithAnnotation("ParaSite");
1214 return Error::success();
1217 /// Process __bug_table section.
1218 /// This section contains information useful for kernel debugging, mostly
1219 /// utilized by WARN()/WARN_ON() macros and deprecated BUG()/BUG_ON().
1221 /// Each entry in the section is a struct bug_entry that contains a pointer to
1222 /// the ud2 instruction corresponding to the bug, corresponding file name (both
1223 /// pointers use PC relative offset addressing), line number, and flags.
1224 /// The definition of the struct bug_entry can be found in
1225 /// `include/asm-generic/bug.h`. The first entry in the struct is an instruction
1226 /// address encoded as a PC-relative offset. In theory, it could be an absolute
1227 /// address if CONFIG_GENERIC_BUG_RELATIVE_POINTERS is not set, but in practice
1228 /// the kernel code relies on it being a relative offset on x86-64.
1229 Error
LinuxKernelRewriter::readBugTable() {
1230 BugTableSection
= BC
.getUniqueSectionByName("__bug_table");
1231 if (!BugTableSection
)
1232 return Error::success();
1234 if (BugTableSection
->getSize() % BUG_TABLE_ENTRY_SIZE
)
1235 return createStringError(errc::executable_format_error
,
1236 "bug table size error");
1238 const uint64_t SectionAddress
= BugTableSection
->getAddress();
1239 DataExtractor
DE(BugTableSection
->getContents(), BC
.AsmInfo
->isLittleEndian(),
1240 BC
.AsmInfo
->getCodePointerSize());
1241 DataExtractor::Cursor
Cursor(0);
1242 uint32_t EntryID
= 0;
1243 while (Cursor
&& Cursor
.tell() < BugTableSection
->getSize()) {
1244 const uint64_t Pos
= Cursor
.tell();
1245 const uint64_t InstAddress
=
1246 SectionAddress
+ Pos
+ (int32_t)DE
.getU32(Cursor
);
1247 Cursor
.seek(Pos
+ BUG_TABLE_ENTRY_SIZE
);
1250 return createStringError(errc::executable_format_error
,
1251 "out of bounds while reading __bug_table: %s",
1252 toString(Cursor
.takeError()).c_str());
1256 BinaryFunction
*BF
= BC
.getBinaryFunctionContainingAddress(InstAddress
);
1257 if (!BF
&& opts::Verbosity
) {
1258 BC
.outs() << "BOLT-INFO: no function matches address 0x"
1259 << Twine::utohexstr(InstAddress
)
1260 << " referenced by bug table\n";
1263 if (BF
&& BC
.shouldEmit(*BF
)) {
1264 MCInst
*Inst
= BF
->getInstructionAtOffset(InstAddress
- BF
->getAddress());
1266 return createStringError(errc::executable_format_error
,
1267 "no instruction at address 0x%" PRIx64
1268 " referenced by bug table entry %d",
1269 InstAddress
, EntryID
);
1270 BC
.MIB
->addAnnotation(*Inst
, "BugEntry", EntryID
);
1272 FunctionBugList
[BF
].push_back(EntryID
);
1276 BC
.outs() << "BOLT-INFO: parsed " << EntryID
<< " bug table entries\n";
1278 return Error::success();
1281 /// find_bug() uses linear search to match an address to an entry in the bug
1282 /// table. Hence, there is no need to sort entries when rewriting the table.
1283 /// When we need to erase an entry, we set its instruction address to zero.
1284 Error
LinuxKernelRewriter::rewriteBugTable() {
1285 if (!BugTableSection
)
1286 return Error::success();
1288 for (BinaryFunction
&BF
: llvm::make_second_range(BC
.getBinaryFunctions())) {
1289 if (!BC
.shouldEmit(BF
))
1292 if (!FunctionBugList
.count(&BF
))
1295 // Bugs that will be emitted for this function.
1296 DenseSet
<uint32_t> EmittedIDs
;
1297 for (BinaryBasicBlock
&BB
: BF
) {
1298 for (MCInst
&Inst
: BB
) {
1299 if (!BC
.MIB
->hasAnnotation(Inst
, "BugEntry"))
1301 const uint32_t ID
= BC
.MIB
->getAnnotationAs
<uint32_t>(Inst
, "BugEntry");
1302 EmittedIDs
.insert(ID
);
1304 // Create a relocation entry for this bug entry.
1306 BC
.MIB
->getOrCreateInstLabel(Inst
, "__BUG_", BC
.Ctx
.get());
1307 const uint64_t EntryOffset
= (ID
- 1) * BUG_TABLE_ENTRY_SIZE
;
1308 BugTableSection
->addRelocation(EntryOffset
, Label
, ELF::R_X86_64_PC32
,
1313 // Clear bug entries that were not emitted for this function, e.g. as a
1314 // result of DCE, but setting their instruction address to zero.
1315 for (const uint32_t ID
: FunctionBugList
[&BF
]) {
1316 if (!EmittedIDs
.count(ID
)) {
1317 const uint64_t EntryOffset
= (ID
- 1) * BUG_TABLE_ENTRY_SIZE
;
1318 BugTableSection
->addRelocation(EntryOffset
, nullptr, ELF::R_X86_64_PC32
,
1324 return Error::success();
1327 /// The kernel can replace certain instruction sequences depending on hardware
1328 /// it is running on and features specified during boot time. The information
1329 /// about alternative instruction sequences is stored in .altinstructions
1330 /// section. The format of entries in this section is defined in
1331 /// arch/x86/include/asm/alternative.h:
1333 /// struct alt_instr {
1334 /// s32 instr_offset;
1335 /// s32 repl_offset;
1338 /// u8 replacementlen;
1339 /// u8 padlen; // present in older kernels
1342 /// Note that the structure is packed.
1344 /// Since the size of the "feature" field could be either u16 or u32, and
1345 /// "padlen" presence is unknown, we attempt to parse .altinstructions section
1346 /// using all possible combinations (four at this time). Since we validate the
1347 /// contents of the section and its size, the detection works quite well.
1348 /// Still, we leave the user the opportunity to specify these features on the
1349 /// command line and skip the guesswork.
1350 Error
LinuxKernelRewriter::readAltInstructions() {
1351 AltInstrSection
= BC
.getUniqueSectionByName(".altinstructions");
1352 if (!AltInstrSection
)
1353 return Error::success();
1355 // Presence of "padlen" field.
1356 std::vector
<bool> PadLenVariants
;
1357 if (opts::AltInstHasPadLen
.getNumOccurrences())
1358 PadLenVariants
.push_back(opts::AltInstHasPadLen
);
1360 PadLenVariants
= {false, true};
1362 // Size (in bytes) variants of "feature" field.
1363 std::vector
<uint32_t> FeatureSizeVariants
;
1364 if (opts::AltInstFeatureSize
.getNumOccurrences())
1365 FeatureSizeVariants
.push_back(opts::AltInstFeatureSize
);
1367 FeatureSizeVariants
= {2, 4};
1369 for (bool AltInstHasPadLen
: PadLenVariants
) {
1370 for (uint32_t AltInstFeatureSize
: FeatureSizeVariants
) {
1372 dbgs() << "BOLT-DEBUG: trying AltInstHasPadLen = " << AltInstHasPadLen
1373 << "; AltInstFeatureSize = " << AltInstFeatureSize
<< ";\n";
1375 if (Error E
= tryReadAltInstructions(AltInstFeatureSize
, AltInstHasPadLen
,
1376 /*ParseOnly*/ true)) {
1377 consumeError(std::move(E
));
1381 LLVM_DEBUG(dbgs() << "Matched .altinstructions format\n");
1383 if (!opts::AltInstHasPadLen
.getNumOccurrences())
1384 BC
.outs() << "BOLT-INFO: setting --" << opts::AltInstHasPadLen
.ArgStr
1385 << '=' << AltInstHasPadLen
<< '\n';
1387 if (!opts::AltInstFeatureSize
.getNumOccurrences())
1388 BC
.outs() << "BOLT-INFO: setting --" << opts::AltInstFeatureSize
.ArgStr
1389 << '=' << AltInstFeatureSize
<< '\n';
1391 return tryReadAltInstructions(AltInstFeatureSize
, AltInstHasPadLen
,
1392 /*ParseOnly*/ false);
1396 // We couldn't match the format. Read again to properly propagate the error
1398 return tryReadAltInstructions(opts::AltInstFeatureSize
,
1399 opts::AltInstHasPadLen
, /*ParseOnly*/ false);
1402 Error
LinuxKernelRewriter::tryReadAltInstructions(uint32_t AltInstFeatureSize
,
1403 bool AltInstHasPadLen
,
1405 const uint64_t Address
= AltInstrSection
->getAddress();
1406 DataExtractor DE
= DataExtractor(AltInstrSection
->getContents(),
1407 BC
.AsmInfo
->isLittleEndian(),
1408 BC
.AsmInfo
->getCodePointerSize());
1409 uint64_t EntryID
= 0;
1410 DataExtractor::Cursor
Cursor(0);
1411 while (Cursor
&& !DE
.eof(Cursor
)) {
1412 const uint64_t OrgInstAddress
=
1413 Address
+ Cursor
.tell() + (int32_t)DE
.getU32(Cursor
);
1414 const uint64_t AltInstAddress
=
1415 Address
+ Cursor
.tell() + (int32_t)DE
.getU32(Cursor
);
1416 const uint64_t Feature
= DE
.getUnsigned(Cursor
, AltInstFeatureSize
);
1417 const uint8_t OrgSize
= DE
.getU8(Cursor
);
1418 const uint8_t AltSize
= DE
.getU8(Cursor
);
1420 // Older kernels may have the padlen field.
1421 const uint8_t PadLen
= AltInstHasPadLen
? DE
.getU8(Cursor
) : 0;
1424 return createStringError(
1425 errc::executable_format_error
,
1426 "out of bounds while reading .altinstructions: %s",
1427 toString(Cursor
.takeError()).c_str());
1431 if (opts::DumpAltInstructions
) {
1432 BC
.outs() << "Alternative instruction entry: " << EntryID
1433 << "\n\tOrg: 0x" << Twine::utohexstr(OrgInstAddress
)
1434 << "\n\tAlt: 0x" << Twine::utohexstr(AltInstAddress
)
1435 << "\n\tFeature: 0x" << Twine::utohexstr(Feature
)
1436 << "\n\tOrgSize: " << (int)OrgSize
1437 << "\n\tAltSize: " << (int)AltSize
<< '\n';
1438 if (AltInstHasPadLen
)
1439 BC
.outs() << "\tPadLen: " << (int)PadLen
<< '\n';
1442 if (AltSize
> OrgSize
)
1443 return createStringError(errc::executable_format_error
,
1444 "error reading .altinstructions");
1446 BinaryFunction
*BF
= BC
.getBinaryFunctionContainingAddress(OrgInstAddress
);
1447 if (!BF
&& opts::Verbosity
) {
1448 BC
.outs() << "BOLT-INFO: no function matches address 0x"
1449 << Twine::utohexstr(OrgInstAddress
)
1450 << " of instruction from .altinstructions\n";
1453 BinaryFunction
*AltBF
=
1454 BC
.getBinaryFunctionContainingAddress(AltInstAddress
);
1455 if (!ParseOnly
&& AltBF
&& BC
.shouldEmit(*AltBF
)) {
1457 << "BOLT-WARNING: alternative instruction sequence found in function "
1459 AltBF
->setIgnored();
1462 if (!BF
|| !BF
->hasInstructions())
1465 if (OrgInstAddress
+ OrgSize
> BF
->getAddress() + BF
->getSize())
1466 return createStringError(errc::executable_format_error
,
1467 "error reading .altinstructions");
1470 BF
->getInstructionAtOffset(OrgInstAddress
- BF
->getAddress());
1472 return createStringError(errc::executable_format_error
,
1473 "no instruction at address 0x%" PRIx64
1474 " referenced by .altinstructions entry %d",
1475 OrgInstAddress
, EntryID
);
1480 // There could be more than one alternative instruction sequences for the
1481 // same original instruction. Annotate each alternative separately.
1482 std::string AnnotationName
= "AltInst";
1484 while (BC
.MIB
->hasAnnotation(*Inst
, AnnotationName
))
1485 AnnotationName
= "AltInst" + std::to_string(N
++);
1487 BC
.MIB
->addAnnotation(*Inst
, AnnotationName
, EntryID
);
1489 // Annotate all instructions from the original sequence. Note that it's not
1490 // the most efficient way to look for instructions in the address range,
1491 // but since alternative instructions are uncommon, it will do for now.
1492 for (uint32_t Offset
= 1; Offset
< OrgSize
; ++Offset
) {
1493 Inst
= BF
->getInstructionAtOffset(OrgInstAddress
+ Offset
-
1496 BC
.MIB
->addAnnotation(*Inst
, AnnotationName
, EntryID
);
1501 BC
.outs() << "BOLT-INFO: parsed " << EntryID
1502 << " alternative instruction entries\n";
1504 return Error::success();
1507 void LinuxKernelRewriter::processAltInstructionsPostCFG() {
1508 // Disable optimization and output of functions with alt instructions before
1509 // the rewrite support is complete. Alt instructions can modify the control
1510 // flow, hence we may end up deleting seemingly unreachable code.
1511 skipFunctionsWithAnnotation("AltInst");
1514 /// When the Linux kernel needs to handle an error associated with a given PCI
1515 /// device, it uses a table stored in .pci_fixup section to locate a fixup code
1516 /// specific to the vendor and the problematic device. The section contains a
1517 /// list of the following structures defined in include/linux/pci.h:
1519 /// struct pci_fixup {
1520 /// u16 vendor; /* Or PCI_ANY_ID */
1521 /// u16 device; /* Or PCI_ANY_ID */
1522 /// u32 class; /* Or PCI_ANY_ID */
1523 /// unsigned int class_shift; /* should be 0, 8, 16 */
1524 /// int hook_offset;
1527 /// Normally, the hook will point to a function start and we don't have to
1528 /// update the pointer if we are not relocating functions. Hence, while reading
1529 /// the table we validate this assumption. If a function has a fixup code in the
1530 /// middle of its body, we issue a warning and ignore it.
1531 Error
LinuxKernelRewriter::readPCIFixupTable() {
1532 PCIFixupSection
= BC
.getUniqueSectionByName(".pci_fixup");
1533 if (!PCIFixupSection
)
1534 return Error::success();
1536 if (PCIFixupSection
->getSize() % PCI_FIXUP_ENTRY_SIZE
)
1537 return createStringError(errc::executable_format_error
,
1538 "PCI fixup table size error");
1540 const uint64_t Address
= PCIFixupSection
->getAddress();
1541 DataExtractor DE
= DataExtractor(PCIFixupSection
->getContents(),
1542 BC
.AsmInfo
->isLittleEndian(),
1543 BC
.AsmInfo
->getCodePointerSize());
1544 uint64_t EntryID
= 0;
1545 DataExtractor::Cursor
Cursor(0);
1546 while (Cursor
&& !DE
.eof(Cursor
)) {
1547 const uint16_t Vendor
= DE
.getU16(Cursor
);
1548 const uint16_t Device
= DE
.getU16(Cursor
);
1549 const uint32_t Class
= DE
.getU32(Cursor
);
1550 const uint32_t ClassShift
= DE
.getU32(Cursor
);
1551 const uint64_t HookAddress
=
1552 Address
+ Cursor
.tell() + (int32_t)DE
.getU32(Cursor
);
1555 return createStringError(errc::executable_format_error
,
1556 "out of bounds while reading .pci_fixup: %s",
1557 toString(Cursor
.takeError()).c_str());
1561 if (opts::DumpPCIFixups
) {
1562 BC
.outs() << "PCI fixup entry: " << EntryID
<< "\n\tVendor 0x"
1563 << Twine::utohexstr(Vendor
) << "\n\tDevice: 0x"
1564 << Twine::utohexstr(Device
) << "\n\tClass: 0x"
1565 << Twine::utohexstr(Class
) << "\n\tClassShift: 0x"
1566 << Twine::utohexstr(ClassShift
) << "\n\tHookAddress: 0x"
1567 << Twine::utohexstr(HookAddress
) << '\n';
1570 BinaryFunction
*BF
= BC
.getBinaryFunctionContainingAddress(HookAddress
);
1571 if (!BF
&& opts::Verbosity
) {
1572 BC
.outs() << "BOLT-INFO: no function matches address 0x"
1573 << Twine::utohexstr(HookAddress
)
1574 << " of hook from .pci_fixup\n";
1577 if (!BF
|| !BC
.shouldEmit(*BF
))
1580 if (const uint64_t Offset
= HookAddress
- BF
->getAddress()) {
1581 BC
.errs() << "BOLT-WARNING: PCI fixup detected in the middle of function "
1582 << *BF
<< " at offset 0x" << Twine::utohexstr(Offset
) << '\n';
1583 BF
->setSimple(false);
1587 BC
.outs() << "BOLT-INFO: parsed " << EntryID
<< " PCI fixup entries\n";
1589 return Error::success();
1592 /// Runtime code modification used by static keys is the most ubiquitous
1593 /// self-modifying feature of the Linux kernel. The idea is to eliminate the
1594 /// condition check and associated conditional jump on a hot path if that
1595 /// condition (based on a boolean value of a static key) does not change often.
1596 /// Whenever the condition changes, the kernel runtime modifies all code paths
1597 /// associated with that key flipping the code between nop and (unconditional)
1598 /// jump. The information about the code is stored in a static key jump table
1599 /// and contains the list of entries of the following type from
1600 /// include/linux/jump_label.h:
1602 /// struct jump_entry {
1605 /// long key; // key may be far away from the core kernel under KASLR
1608 /// The list does not have to be stored in any sorted way, but it is sorted at
1609 /// boot time (or module initialization time) first by "key" and then by "code".
1610 /// jump_label_sort_entries() is responsible for sorting the table.
1612 /// The key in jump_entry structure uses lower two bits of the key address
1613 /// (which itself is aligned) to store extra information. We are interested in
1614 /// the lower bit which indicates if the key is likely to be set on the code
1615 /// path associated with this jump_entry.
1617 /// static_key_{enable,disable}() functions modify the code based on key and
1618 /// jump table entries.
1620 /// jump_label_update() updates all code entries for a given key. Batch mode is
1623 /// The actual patching happens in text_poke_bp_batch() that overrides the first
1624 /// byte of the sequence with int3 before proceeding with actual code
1626 Error
LinuxKernelRewriter::readStaticKeysJumpTable() {
1627 const BinaryData
*StaticKeysJumpTable
=
1628 BC
.getBinaryDataByName("__start___jump_table");
1629 if (!StaticKeysJumpTable
)
1630 return Error::success();
1632 StaticKeysJumpTableAddress
= StaticKeysJumpTable
->getAddress();
1634 const BinaryData
*Stop
= BC
.getBinaryDataByName("__stop___jump_table");
1636 return createStringError(errc::executable_format_error
,
1637 "missing __stop___jump_table symbol");
1639 ErrorOr
<BinarySection
&> ErrorOrSection
=
1640 BC
.getSectionForAddress(StaticKeysJumpTableAddress
);
1641 if (!ErrorOrSection
)
1642 return createStringError(errc::executable_format_error
,
1643 "no section matching __start___jump_table");
1645 StaticKeysJumpSection
= *ErrorOrSection
;
1646 if (!StaticKeysJumpSection
->containsAddress(Stop
->getAddress() - 1))
1647 return createStringError(errc::executable_format_error
,
1648 "__stop___jump_table not in the same section "
1649 "as __start___jump_table");
1651 if ((Stop
->getAddress() - StaticKeysJumpTableAddress
) %
1652 STATIC_KEYS_JUMP_ENTRY_SIZE
)
1653 return createStringError(errc::executable_format_error
,
1654 "static keys jump table size error");
1656 const uint64_t SectionAddress
= StaticKeysJumpSection
->getAddress();
1657 DataExtractor
DE(StaticKeysJumpSection
->getContents(),
1658 BC
.AsmInfo
->isLittleEndian(),
1659 BC
.AsmInfo
->getCodePointerSize());
1660 DataExtractor::Cursor
Cursor(StaticKeysJumpTableAddress
- SectionAddress
);
1661 uint32_t EntryID
= 0;
1662 while (Cursor
&& Cursor
.tell() < Stop
->getAddress() - SectionAddress
) {
1663 const uint64_t JumpAddress
=
1664 SectionAddress
+ Cursor
.tell() + (int32_t)DE
.getU32(Cursor
);
1665 const uint64_t TargetAddress
=
1666 SectionAddress
+ Cursor
.tell() + (int32_t)DE
.getU32(Cursor
);
1667 const uint64_t KeyAddress
=
1668 SectionAddress
+ Cursor
.tell() + (int64_t)DE
.getU64(Cursor
);
1670 // Consume the status of the cursor.
1672 return createStringError(
1673 errc::executable_format_error
,
1674 "out of bounds while reading static keys jump table: %s",
1675 toString(Cursor
.takeError()).c_str());
1679 JumpInfo
.push_back(JumpInfoEntry());
1680 JumpInfoEntry
&Info
= JumpInfo
.back();
1681 Info
.Likely
= KeyAddress
& 1;
1683 if (opts::DumpStaticKeys
) {
1684 BC
.outs() << "Static key jump entry: " << EntryID
1685 << "\n\tJumpAddress: 0x" << Twine::utohexstr(JumpAddress
)
1686 << "\n\tTargetAddress: 0x" << Twine::utohexstr(TargetAddress
)
1687 << "\n\tKeyAddress: 0x" << Twine::utohexstr(KeyAddress
)
1688 << "\n\tIsLikely: " << Info
.Likely
<< '\n';
1691 BinaryFunction
*BF
= BC
.getBinaryFunctionContainingAddress(JumpAddress
);
1692 if (!BF
&& opts::Verbosity
) {
1694 << "BOLT-INFO: no function matches address 0x"
1695 << Twine::utohexstr(JumpAddress
)
1696 << " of jump instruction referenced from static keys jump table\n";
1699 if (!BF
|| !BC
.shouldEmit(*BF
))
1702 MCInst
*Inst
= BF
->getInstructionAtOffset(JumpAddress
- BF
->getAddress());
1704 return createStringError(
1705 errc::executable_format_error
,
1706 "no instruction at static keys jump site address 0x%" PRIx64
,
1709 if (!BF
->containsAddress(TargetAddress
))
1710 return createStringError(
1711 errc::executable_format_error
,
1712 "invalid target of static keys jump at 0x%" PRIx64
" : 0x%" PRIx64
,
1713 JumpAddress
, TargetAddress
);
1715 const bool IsBranch
= BC
.MIB
->isBranch(*Inst
);
1716 if (!IsBranch
&& !BC
.MIB
->isNoop(*Inst
))
1717 return createStringError(errc::executable_format_error
,
1718 "jump or nop expected at address 0x%" PRIx64
,
1721 const uint64_t Size
= BC
.computeInstructionSize(*Inst
);
1722 if (Size
!= 2 && Size
!= 5) {
1723 return createStringError(
1724 errc::executable_format_error
,
1725 "unexpected static keys jump size at address 0x%" PRIx64
,
1729 MCSymbol
*Target
= BF
->registerBranch(JumpAddress
, TargetAddress
);
1730 MCInst StaticKeyBranch
;
1732 // Create a conditional branch instruction. The actual conditional code type
1733 // should not matter as long as it's a valid code. The instruction should be
1734 // treated as a conditional branch for control-flow purposes. Before we emit
1735 // the code, it will be converted to a different instruction in
1736 // rewriteStaticKeysJumpTable().
1738 // NB: for older kernels, under LongJumpLabels option, we create long
1739 // conditional branch to guarantee that code size estimation takes
1740 // into account the extra bytes needed for long branch that will be used
1741 // by the kernel patching code. Newer kernels can work with both short
1742 // and long branches. The code for long conditional branch is larger
1743 // than unconditional one, so we are pessimistic in our estimations.
1744 if (opts::LongJumpLabels
)
1745 BC
.MIB
->createLongCondBranch(StaticKeyBranch
, Target
, 0, BC
.Ctx
.get());
1747 BC
.MIB
->createCondBranch(StaticKeyBranch
, Target
, 0, BC
.Ctx
.get());
1748 BC
.MIB
->moveAnnotations(std::move(*Inst
), StaticKeyBranch
);
1749 BC
.MIB
->setDynamicBranch(StaticKeyBranch
, EntryID
);
1750 *Inst
= StaticKeyBranch
;
1752 // IsBranch = InitialValue ^ LIKELY
1759 // => InitialValue = IsBranch ^ LIKELY
1760 Info
.InitValue
= IsBranch
^ Info
.Likely
;
1762 // Add annotations to facilitate manual code analysis.
1763 BC
.MIB
->addAnnotation(*Inst
, "Likely", Info
.Likely
);
1764 BC
.MIB
->addAnnotation(*Inst
, "InitValue", Info
.InitValue
);
1765 if (!BC
.MIB
->getSize(*Inst
))
1766 BC
.MIB
->setSize(*Inst
, Size
);
1768 if (!BC
.MIB
->getOffset(*Inst
))
1769 BC
.MIB
->setOffset(*Inst
, JumpAddress
- BF
->getAddress());
1771 if (opts::LongJumpLabels
)
1772 BC
.MIB
->setSize(*Inst
, 5);
1775 BC
.outs() << "BOLT-INFO: parsed " << EntryID
<< " static keys jump entries\n";
1777 return Error::success();
1780 // Pre-emit pass. Convert dynamic branch instructions into jumps that could be
1781 // relaxed. In post-emit pass we will convert those jumps into nops when
1782 // necessary. We do the unconditional conversion into jumps so that the jumps
1783 // can be relaxed and the optimal size of jump/nop instruction is selected.
1784 Error
LinuxKernelRewriter::rewriteStaticKeysJumpTable() {
1785 if (!StaticKeysJumpSection
)
1786 return Error::success();
1788 uint64_t NumShort
= 0;
1789 uint64_t NumLong
= 0;
1790 for (BinaryFunction
&BF
: llvm::make_second_range(BC
.getBinaryFunctions())) {
1791 if (!BC
.shouldEmit(BF
))
1794 for (BinaryBasicBlock
&BB
: BF
) {
1795 for (MCInst
&Inst
: BB
) {
1796 if (!BC
.MIB
->isDynamicBranch(Inst
))
1799 const uint32_t EntryID
= *BC
.MIB
->getDynamicBranchID(Inst
);
1801 const_cast<MCSymbol
*>(BC
.MIB
->getTargetSymbol(Inst
));
1802 assert(Target
&& "Target symbol should be set.");
1804 const JumpInfoEntry
&Info
= JumpInfo
[EntryID
- 1];
1805 const bool IsBranch
= Info
.Likely
^ Info
.InitValue
;
1807 uint32_t Size
= *BC
.MIB
->getSize(Inst
);
1813 llvm_unreachable("Wrong size for static keys jump instruction.");
1816 // Replace the instruction with unconditional jump even if it needs to
1817 // be nop in the binary.
1818 if (opts::LongJumpLabels
) {
1819 BC
.MIB
->createLongUncondBranch(NewInst
, Target
, BC
.Ctx
.get());
1821 // Newer kernels can handle short and long jumps for static keys.
1822 // Optimistically, emit short jump and check if it gets relaxed into
1823 // a long one during post-emit. Only then convert the jump to a nop.
1824 BC
.MIB
->createUncondBranch(NewInst
, Target
, BC
.Ctx
.get());
1827 BC
.MIB
->moveAnnotations(std::move(Inst
), NewInst
);
1830 // Mark the instruction for nop conversion.
1832 NopIDs
.insert(EntryID
);
1835 BC
.MIB
->getOrCreateInstLabel(Inst
, "__SK_", BC
.Ctx
.get());
1837 // Create a relocation against the label.
1838 const uint64_t EntryOffset
= StaticKeysJumpTableAddress
-
1839 StaticKeysJumpSection
->getAddress() +
1841 StaticKeysJumpSection
->addRelocation(EntryOffset
, Label
,
1844 StaticKeysJumpSection
->addRelocation(EntryOffset
+ 4, Target
,
1845 ELF::R_X86_64_PC32
, /*Addend*/ 0);
1850 BC
.outs() << "BOLT-INFO: the input contains " << NumShort
<< " short and "
1851 << NumLong
<< " long static keys jumps in optimized functions\n";
1853 return Error::success();
1856 // Post-emit pass of static keys jump section. Convert jumps to nops.
1857 Error
LinuxKernelRewriter::updateStaticKeysJumpTablePostEmit() {
1858 if (!StaticKeysJumpSection
|| !StaticKeysJumpSection
->isFinalized())
1859 return Error::success();
1861 const uint64_t SectionAddress
= StaticKeysJumpSection
->getAddress();
1862 DataExtractor
DE(StaticKeysJumpSection
->getOutputContents(),
1863 BC
.AsmInfo
->isLittleEndian(),
1864 BC
.AsmInfo
->getCodePointerSize());
1865 DataExtractor::Cursor
Cursor(StaticKeysJumpTableAddress
- SectionAddress
);
1866 const BinaryData
*Stop
= BC
.getBinaryDataByName("__stop___jump_table");
1867 uint32_t EntryID
= 0;
1868 uint64_t NumShort
= 0;
1869 uint64_t NumLong
= 0;
1870 while (Cursor
&& Cursor
.tell() < Stop
->getAddress() - SectionAddress
) {
1871 const uint64_t JumpAddress
=
1872 SectionAddress
+ Cursor
.tell() + (int32_t)DE
.getU32(Cursor
);
1873 const uint64_t TargetAddress
=
1874 SectionAddress
+ Cursor
.tell() + (int32_t)DE
.getU32(Cursor
);
1875 const uint64_t KeyAddress
=
1876 SectionAddress
+ Cursor
.tell() + (int64_t)DE
.getU64(Cursor
);
1878 // Consume the status of the cursor.
1880 return createStringError(errc::executable_format_error
,
1881 "out of bounds while updating static keys: %s",
1882 toString(Cursor
.takeError()).c_str());
1887 dbgs() << "\n\tJumpAddress: 0x" << Twine::utohexstr(JumpAddress
)
1888 << "\n\tTargetAddress: 0x" << Twine::utohexstr(TargetAddress
)
1889 << "\n\tKeyAddress: 0x" << Twine::utohexstr(KeyAddress
) << '\n';
1891 (void)TargetAddress
;
1894 BinaryFunction
*BF
=
1895 BC
.getBinaryFunctionContainingAddress(JumpAddress
,
1896 /*CheckPastEnd*/ false,
1897 /*UseMaxSize*/ true);
1898 assert(BF
&& "Cannot get function for modified static key.");
1900 if (!BF
->isEmitted())
1903 // Disassemble instruction to collect stats even if nop-conversion is
1905 MutableArrayRef
<uint8_t> Contents
= MutableArrayRef
<uint8_t>(
1906 reinterpret_cast<uint8_t *>(BF
->getImageAddress()), BF
->getImageSize());
1907 assert(Contents
.size() && "Non-empty function image expected.");
1911 const uint64_t JumpOffset
= JumpAddress
- BF
->getAddress();
1912 if (!BC
.DisAsm
->getInstruction(Inst
, Size
, Contents
.slice(JumpOffset
), 0,
1914 llvm_unreachable("Unable to disassemble jump instruction.");
1916 assert(BC
.MIB
->isBranch(Inst
) && "Branch instruction expected.");
1923 llvm_unreachable("Unexpected size for static keys jump instruction.");
1925 // Check if we need to convert jump instruction into a nop.
1926 if (!NopIDs
.contains(EntryID
))
1929 SmallString
<15> NopCode
;
1930 raw_svector_ostream
VecOS(NopCode
);
1931 BC
.MAB
->writeNopData(VecOS
, Size
, BC
.STI
.get());
1932 for (uint64_t I
= 0; I
< Size
; ++I
)
1933 Contents
[JumpOffset
+ I
] = NopCode
[I
];
1936 BC
.outs() << "BOLT-INFO: written " << NumShort
<< " short and " << NumLong
1937 << " long static keys jumps in optimized functions\n";
1939 return Error::success();
1944 std::unique_ptr
<MetadataRewriter
>
1945 llvm::bolt::createLinuxKernelRewriter(BinaryContext
&BC
) {
1946 return std::make_unique
<LinuxKernelRewriter
>(BC
);