1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the BinaryContext class.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Core/BinaryContext.h"
14 #include "bolt/Core/BinaryEmitter.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "bolt/Utils/Utils.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
21 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
22 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
23 #include "llvm/MC/MCAssembler.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
26 #include "llvm/MC/MCInstPrinter.h"
27 #include "llvm/MC/MCObjectStreamer.h"
28 #include "llvm/MC/MCObjectWriter.h"
29 #include "llvm/MC/MCRegisterInfo.h"
30 #include "llvm/MC/MCSectionELF.h"
31 #include "llvm/MC/MCStreamer.h"
32 #include "llvm/MC/MCSubtargetInfo.h"
33 #include "llvm/MC/MCSymbol.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Error.h"
36 #include "llvm/Support/Regex.h"
40 #include <unordered_set>
45 #define DEBUG_TYPE "bolt"
49 cl::opt
<bool> NoHugePages("no-huge-pages",
50 cl::desc("use regular size pages for code alignment"),
51 cl::Hidden
, cl::cat(BoltCategory
));
54 PrintDebugInfo("print-debug-info",
55 cl::desc("print debug info when printing functions"),
58 cl::cat(BoltCategory
));
60 cl::opt
<bool> PrintRelocations(
62 cl::desc("print relocations when printing functions/objects"), cl::Hidden
,
63 cl::cat(BoltCategory
));
66 PrintMemData("print-mem-data",
67 cl::desc("print memory data annotations when printing functions"),
70 cl::cat(BoltCategory
));
72 cl::opt
<std::string
> CompDirOverride(
74 cl::desc("overrides DW_AT_comp_dir, and provides an alternative base "
75 "location, which is used with DW_AT_dwo_name to construct a path "
77 cl::Hidden
, cl::init(""), cl::cat(BoltCategory
));
83 char BOLTError::ID
= 0;
85 BOLTError::BOLTError(bool IsFatal
, const Twine
&S
)
86 : IsFatal(IsFatal
), Msg(S
.str()) {}
88 void BOLTError::log(raw_ostream
&OS
) const {
91 StringRef ErrMsg
= StringRef(Msg
);
92 // Prepend our error prefix if it is missing
96 if (!ErrMsg
.starts_with("BOLT-ERROR"))
102 std::error_code
BOLTError::convertToErrorCode() const {
103 return inconvertibleErrorCode();
106 Error
createNonFatalBOLTError(const Twine
&S
) {
107 return make_error
<BOLTError
>(/*IsFatal*/ false, S
);
110 Error
createFatalBOLTError(const Twine
&S
) {
111 return make_error
<BOLTError
>(/*IsFatal*/ true, S
);
114 void BinaryContext::logBOLTErrorsAndQuitOnFatal(Error E
) {
115 handleAllErrors(Error(std::move(E
)), [&](const BOLTError
&E
) {
116 if (!E
.getMessage().empty())
123 BinaryContext::BinaryContext(std::unique_ptr
<MCContext
> Ctx
,
124 std::unique_ptr
<DWARFContext
> DwCtx
,
125 std::unique_ptr
<Triple
> TheTriple
,
126 const Target
*TheTarget
, std::string TripleName
,
127 std::unique_ptr
<MCCodeEmitter
> MCE
,
128 std::unique_ptr
<MCObjectFileInfo
> MOFI
,
129 std::unique_ptr
<const MCAsmInfo
> AsmInfo
,
130 std::unique_ptr
<const MCInstrInfo
> MII
,
131 std::unique_ptr
<const MCSubtargetInfo
> STI
,
132 std::unique_ptr
<MCInstPrinter
> InstPrinter
,
133 std::unique_ptr
<const MCInstrAnalysis
> MIA
,
134 std::unique_ptr
<MCPlusBuilder
> MIB
,
135 std::unique_ptr
<const MCRegisterInfo
> MRI
,
136 std::unique_ptr
<MCDisassembler
> DisAsm
,
137 JournalingStreams Logger
)
138 : Ctx(std::move(Ctx
)), DwCtx(std::move(DwCtx
)),
139 TheTriple(std::move(TheTriple
)), TheTarget(TheTarget
),
140 TripleName(TripleName
), MCE(std::move(MCE
)), MOFI(std::move(MOFI
)),
141 AsmInfo(std::move(AsmInfo
)), MII(std::move(MII
)), STI(std::move(STI
)),
142 InstPrinter(std::move(InstPrinter
)), MIA(std::move(MIA
)),
143 MIB(std::move(MIB
)), MRI(std::move(MRI
)), DisAsm(std::move(DisAsm
)),
144 Logger(Logger
), InitialDynoStats(isAArch64()) {
145 Relocation::Arch
= this->TheTriple
->getArch();
146 RegularPageSize
= isAArch64() ? RegularPageSizeAArch64
: RegularPageSizeX86
;
147 PageAlign
= opts::NoHugePages
? RegularPageSize
: HugePageSize
;
150 BinaryContext::~BinaryContext() {
151 for (BinarySection
*Section
: Sections
)
153 for (BinaryFunction
*InjectedFunction
: InjectedBinaryFunctions
)
154 delete InjectedFunction
;
155 for (std::pair
<const uint64_t, JumpTable
*> JTI
: JumpTables
)
160 /// Create BinaryContext for a given architecture \p ArchName and
161 /// triple \p TripleName.
162 Expected
<std::unique_ptr
<BinaryContext
>> BinaryContext::createBinaryContext(
163 Triple TheTriple
, StringRef InputFileName
, SubtargetFeatures
*Features
,
164 bool IsPIC
, std::unique_ptr
<DWARFContext
> DwCtx
, JournalingStreams Logger
) {
165 StringRef ArchName
= "";
166 std::string FeaturesStr
= "";
167 switch (TheTriple
.getArch()) {
168 case llvm::Triple::x86_64
:
170 return createFatalBOLTError(
171 "x86_64 target does not use SubtargetFeatures");
173 FeaturesStr
= "+nopl";
175 case llvm::Triple::aarch64
:
177 return createFatalBOLTError(
178 "AArch64 target does not use SubtargetFeatures");
179 ArchName
= "aarch64";
180 FeaturesStr
= "+all";
182 case llvm::Triple::riscv64
: {
183 ArchName
= "riscv64";
185 return createFatalBOLTError("RISCV target needs SubtargetFeatures");
186 // We rely on relaxation for some transformations (e.g., promoting all calls
187 // to PseudoCALL and then making JITLink relax them). Since the relax
188 // feature is not stored in the object file, we manually enable it.
189 Features
->AddFeature("relax");
190 FeaturesStr
= Features
->getString();
194 return createStringError(std::errc::not_supported
,
195 "BOLT-ERROR: Unrecognized machine in ELF file");
198 const std::string TripleName
= TheTriple
.str();
201 const Target
*TheTarget
=
202 TargetRegistry::lookupTarget(std::string(ArchName
), TheTriple
, Error
);
204 return createStringError(make_error_code(std::errc::not_supported
),
205 Twine("BOLT-ERROR: ", Error
));
207 std::unique_ptr
<const MCRegisterInfo
> MRI(
208 TheTarget
->createMCRegInfo(TripleName
));
210 return createStringError(
211 make_error_code(std::errc::not_supported
),
212 Twine("BOLT-ERROR: no register info for target ", TripleName
));
214 // Set up disassembler.
215 std::unique_ptr
<MCAsmInfo
> AsmInfo(
216 TheTarget
->createMCAsmInfo(*MRI
, TripleName
, MCTargetOptions()));
218 return createStringError(
219 make_error_code(std::errc::not_supported
),
220 Twine("BOLT-ERROR: no assembly info for target ", TripleName
));
221 // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump
222 // we want to emit such names as using @PLT without double quotes to convey
223 // variant kind to the assembler. BOLT doesn't rely on the linker so we can
224 // override the default AsmInfo behavior to emit names the way we want.
225 AsmInfo
->setAllowAtInName(true);
227 std::unique_ptr
<const MCSubtargetInfo
> STI(
228 TheTarget
->createMCSubtargetInfo(TripleName
, "", FeaturesStr
));
230 return createStringError(
231 make_error_code(std::errc::not_supported
),
232 Twine("BOLT-ERROR: no subtarget info for target ", TripleName
));
234 std::unique_ptr
<const MCInstrInfo
> MII(TheTarget
->createMCInstrInfo());
236 return createStringError(
237 make_error_code(std::errc::not_supported
),
238 Twine("BOLT-ERROR: no instruction info for target ", TripleName
));
240 std::unique_ptr
<MCContext
> Ctx(
241 new MCContext(TheTriple
, AsmInfo
.get(), MRI
.get(), STI
.get()));
242 std::unique_ptr
<MCObjectFileInfo
> MOFI(
243 TheTarget
->createMCObjectFileInfo(*Ctx
, IsPIC
));
244 Ctx
->setObjectFileInfo(MOFI
.get());
245 // We do not support X86 Large code model. Change this in the future.
247 if (TheTriple
.getArch() == llvm::Triple::aarch64
)
249 unsigned LSDAEncoding
=
250 Large
? dwarf::DW_EH_PE_absptr
: dwarf::DW_EH_PE_udata4
;
252 LSDAEncoding
= dwarf::DW_EH_PE_pcrel
|
253 (Large
? dwarf::DW_EH_PE_sdata8
: dwarf::DW_EH_PE_sdata4
);
256 std::unique_ptr
<MCDisassembler
> DisAsm(
257 TheTarget
->createMCDisassembler(*STI
, *Ctx
));
260 return createStringError(
261 make_error_code(std::errc::not_supported
),
262 Twine("BOLT-ERROR: no disassembler info for target ", TripleName
));
264 std::unique_ptr
<const MCInstrAnalysis
> MIA(
265 TheTarget
->createMCInstrAnalysis(MII
.get()));
267 return createStringError(
268 make_error_code(std::errc::not_supported
),
269 Twine("BOLT-ERROR: failed to create instruction analysis for target ",
272 int AsmPrinterVariant
= AsmInfo
->getAssemblerDialect();
273 std::unique_ptr
<MCInstPrinter
> InstructionPrinter(
274 TheTarget
->createMCInstPrinter(TheTriple
, AsmPrinterVariant
, *AsmInfo
,
276 if (!InstructionPrinter
)
277 return createStringError(
278 make_error_code(std::errc::not_supported
),
279 Twine("BOLT-ERROR: no instruction printer for target ", TripleName
));
280 InstructionPrinter
->setPrintImmHex(true);
282 std::unique_ptr
<MCCodeEmitter
> MCE(
283 TheTarget
->createMCCodeEmitter(*MII
, *Ctx
));
285 auto BC
= std::make_unique
<BinaryContext
>(
286 std::move(Ctx
), std::move(DwCtx
), std::make_unique
<Triple
>(TheTriple
),
287 TheTarget
, std::string(TripleName
), std::move(MCE
), std::move(MOFI
),
288 std::move(AsmInfo
), std::move(MII
), std::move(STI
),
289 std::move(InstructionPrinter
), std::move(MIA
), nullptr, std::move(MRI
),
290 std::move(DisAsm
), Logger
);
292 BC
->LSDAEncoding
= LSDAEncoding
;
294 BC
->MAB
= std::unique_ptr
<MCAsmBackend
>(
295 BC
->TheTarget
->createMCAsmBackend(*BC
->STI
, *BC
->MRI
, MCTargetOptions()));
297 BC
->setFilename(InputFileName
);
299 BC
->HasFixedLoadAddress
= !IsPIC
;
301 BC
->SymbolicDisAsm
= std::unique_ptr
<MCDisassembler
>(
302 BC
->TheTarget
->createMCDisassembler(*BC
->STI
, *BC
->Ctx
));
304 if (!BC
->SymbolicDisAsm
)
305 return createStringError(
306 make_error_code(std::errc::not_supported
),
307 Twine("BOLT-ERROR: no disassembler info for target ", TripleName
));
309 return std::move(BC
);
312 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName
) const {
314 (SymbolName
== "__hot_start" || SymbolName
== "__hot_end"))
318 (SymbolName
== "__hot_data_start" || SymbolName
== "__hot_data_end"))
321 if (SymbolName
== "_end")
327 std::unique_ptr
<MCObjectWriter
>
328 BinaryContext::createObjectWriter(raw_pwrite_stream
&OS
) {
329 return MAB
->createObjectWriter(OS
);
332 bool BinaryContext::validateObjectNesting() const {
333 auto Itr
= BinaryDataMap
.begin();
334 auto End
= BinaryDataMap
.end();
337 auto Next
= std::next(Itr
);
338 while (Next
!= End
&&
339 Itr
->second
->getSection() == Next
->second
->getSection() &&
340 Itr
->second
->containsRange(Next
->second
->getAddress(),
341 Next
->second
->getSize())) {
342 if (Next
->second
->Parent
!= Itr
->second
) {
343 this->errs() << "BOLT-WARNING: object nesting incorrect for:\n"
344 << "BOLT-WARNING: " << *Itr
->second
<< "\n"
345 << "BOLT-WARNING: " << *Next
->second
<< "\n";
355 bool BinaryContext::validateHoles() const {
357 for (BinarySection
&Section
: sections()) {
358 for (const Relocation
&Rel
: Section
.relocations()) {
359 uint64_t RelAddr
= Rel
.Offset
+ Section
.getAddress();
360 const BinaryData
*BD
= getBinaryDataContainingAddress(RelAddr
);
363 << "BOLT-WARNING: no BinaryData found for relocation at address"
364 << " 0x" << Twine::utohexstr(RelAddr
) << " in " << Section
.getName()
367 } else if (!BD
->getAtomicRoot()) {
369 << "BOLT-WARNING: no atomic BinaryData found for relocation at "
370 << "address 0x" << Twine::utohexstr(RelAddr
) << " in "
371 << Section
.getName() << "\n";
379 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI
) {
380 const uint64_t Address
= GAI
->second
->getAddress();
381 const uint64_t Size
= GAI
->second
->getSize();
383 auto fixParents
= [&](BinaryDataMapType::iterator Itr
,
384 BinaryData
*NewParent
) {
385 BinaryData
*OldParent
= Itr
->second
->Parent
;
386 Itr
->second
->Parent
= NewParent
;
388 while (Itr
!= BinaryDataMap
.end() && OldParent
&&
389 Itr
->second
->Parent
== OldParent
) {
390 Itr
->second
->Parent
= NewParent
;
395 // Check if the previous symbol contains the newly added symbol.
396 if (GAI
!= BinaryDataMap
.begin()) {
397 BinaryData
*Prev
= std::prev(GAI
)->second
;
399 if (Prev
->getSection() == GAI
->second
->getSection() &&
400 Prev
->containsRange(Address
, Size
)) {
401 fixParents(GAI
, Prev
);
403 fixParents(GAI
, nullptr);
409 // Check if the newly added symbol contains any subsequent symbols.
411 BinaryData
*BD
= GAI
->second
->Parent
? GAI
->second
->Parent
: GAI
->second
;
412 auto Itr
= std::next(GAI
);
414 Itr
!= BinaryDataMap
.end() &&
415 BD
->containsRange(Itr
->second
->getAddress(), Itr
->second
->getSize())) {
416 Itr
->second
->Parent
= BD
;
422 iterator_range
<BinaryContext::binary_data_iterator
>
423 BinaryContext::getSubBinaryData(BinaryData
*BD
) {
424 auto Start
= std::next(BinaryDataMap
.find(BD
->getAddress()));
426 while (End
!= BinaryDataMap
.end() && BD
->isAncestorOf(End
->second
))
428 return make_range(Start
, End
);
431 std::pair
<const MCSymbol
*, uint64_t>
432 BinaryContext::handleAddressRef(uint64_t Address
, BinaryFunction
&BF
,
435 // Check if this is an access to a constant island and create bookkeeping
436 // to keep track of it and emit it later as part of this function.
437 if (MCSymbol
*IslandSym
= BF
.getOrCreateIslandAccess(Address
))
438 return std::make_pair(IslandSym
, 0);
440 // Detect custom code written in assembly that refers to arbitrary
441 // constant islands from other functions. Write this reference so we
442 // can pull this constant island and emit it as part of this function
444 auto IslandIter
= AddressToConstantIslandMap
.lower_bound(Address
);
446 if (IslandIter
!= AddressToConstantIslandMap
.begin() &&
447 (IslandIter
== AddressToConstantIslandMap
.end() ||
448 IslandIter
->first
> Address
))
451 if (IslandIter
!= AddressToConstantIslandMap
.end()) {
452 // Fall-back to referencing the original constant island in the presence
453 // of dynamic relocs, as we currently do not support cloning them.
454 // Notice: we might fail to link because of this, if the original constant
455 // island we are referring would be emitted too far away.
456 if (IslandIter
->second
->hasDynamicRelocationAtIsland()) {
457 MCSymbol
*IslandSym
=
458 IslandIter
->second
->getOrCreateIslandAccess(Address
);
460 return std::make_pair(IslandSym
, 0);
461 } else if (MCSymbol
*IslandSym
=
462 IslandIter
->second
->getOrCreateProxyIslandAccess(Address
,
464 BF
.createIslandDependency(IslandSym
, IslandIter
->second
);
465 return std::make_pair(IslandSym
, 0);
470 // Note that the address does not necessarily have to reside inside
471 // a section, it could be an absolute address too.
472 ErrorOr
<BinarySection
&> Section
= getSectionForAddress(Address
);
473 if (Section
&& Section
->isText()) {
474 if (BF
.containsAddress(Address
, /*UseMaxSize=*/isAArch64())) {
475 if (Address
!= BF
.getAddress()) {
476 // The address could potentially escape. Mark it as another entry
477 // point into the function.
478 if (opts::Verbosity
>= 1) {
479 this->outs() << "BOLT-INFO: potentially escaped address 0x"
480 << Twine::utohexstr(Address
) << " in function " << BF
483 BF
.HasInternalLabelReference
= true;
484 return std::make_pair(
485 BF
.addEntryPointAtOffset(Address
- BF
.getAddress()), 0);
488 addInterproceduralReference(&BF
, Address
);
492 // With relocations, catch jump table references outside of the basic block
493 // containing the indirect jump.
494 if (HasRelocations
) {
495 const MemoryContentsType MemType
= analyzeMemoryAt(Address
, BF
);
496 if (MemType
== MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE
&& IsPCRel
) {
497 const MCSymbol
*Symbol
=
498 getOrCreateJumpTable(BF
, Address
, JumpTable::JTT_PIC
);
500 return std::make_pair(Symbol
, 0);
504 if (BinaryData
*BD
= getBinaryDataContainingAddress(Address
))
505 return std::make_pair(BD
->getSymbol(), Address
- BD
->getAddress());
507 // TODO: use DWARF info to get size/alignment here?
508 MCSymbol
*TargetSymbol
= getOrCreateGlobalSymbol(Address
, "DATAat");
509 LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol
->getName() << '\n');
510 return std::make_pair(TargetSymbol
, 0);
513 MemoryContentsType
BinaryContext::analyzeMemoryAt(uint64_t Address
,
514 BinaryFunction
&BF
) {
516 return MemoryContentsType::UNKNOWN
;
518 ErrorOr
<BinarySection
&> Section
= getSectionForAddress(Address
);
520 // No section - possibly an absolute address. Since we don't allow
521 // internal function addresses to escape the function scope - we
522 // consider it a tail call.
523 if (opts::Verbosity
> 1) {
524 this->errs() << "BOLT-WARNING: no section for address 0x"
525 << Twine::utohexstr(Address
) << " referenced from function "
528 return MemoryContentsType::UNKNOWN
;
531 if (Section
->isVirtual()) {
532 // The contents are filled at runtime.
533 return MemoryContentsType::UNKNOWN
;
536 // No support for jump tables in code yet.
537 if (Section
->isText())
538 return MemoryContentsType::UNKNOWN
;
540 // Start with checking for PIC jump table. We expect non-PIC jump tables
541 // to have high 32 bits set to 0.
542 if (analyzeJumpTable(Address
, JumpTable::JTT_PIC
, BF
))
543 return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE
;
545 if (analyzeJumpTable(Address
, JumpTable::JTT_NORMAL
, BF
))
546 return MemoryContentsType::POSSIBLE_JUMP_TABLE
;
548 return MemoryContentsType::UNKNOWN
;
551 bool BinaryContext::analyzeJumpTable(const uint64_t Address
,
552 const JumpTable::JumpTableType Type
,
553 const BinaryFunction
&BF
,
554 const uint64_t NextJTAddress
,
555 JumpTable::AddressesType
*EntriesAsAddress
,
556 bool *HasEntryInFragment
) const {
557 // Target address of __builtin_unreachable.
558 const uint64_t UnreachableAddress
= BF
.getAddress() + BF
.getSize();
560 // Is one of the targets __builtin_unreachable?
561 bool HasUnreachable
= false;
563 // Does one of the entries match function start address?
564 bool HasStartAsEntry
= false;
566 // Number of targets other than __builtin_unreachable.
567 uint64_t NumRealEntries
= 0;
569 // Size of the jump table without trailing __builtin_unreachable entries.
570 size_t TrimmedSize
= 0;
572 auto addEntryAddress
= [&](uint64_t EntryAddress
, bool Unreachable
= false) {
573 if (!EntriesAsAddress
)
575 EntriesAsAddress
->emplace_back(EntryAddress
);
577 TrimmedSize
= EntriesAsAddress
->size();
580 ErrorOr
<const BinarySection
&> Section
= getSectionForAddress(Address
);
584 // The upper bound is defined by containing object, section limits, and
585 // the next jump table in memory.
586 uint64_t UpperBound
= Section
->getEndAddress();
587 const BinaryData
*JumpTableBD
= getBinaryDataAtAddress(Address
);
588 if (JumpTableBD
&& JumpTableBD
->getSize()) {
589 assert(JumpTableBD
->getEndAddress() <= UpperBound
&&
590 "data object cannot cross a section boundary");
591 UpperBound
= JumpTableBD
->getEndAddress();
594 UpperBound
= std::min(NextJTAddress
, UpperBound
);
597 using JTT
= JumpTable::JumpTableType
;
598 dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n",
599 Address
, BF
.getPrintName(),
600 Type
== JTT::JTT_PIC
? "PIC" : "Normal");
602 const uint64_t EntrySize
= getJumpTableEntrySize(Type
);
603 for (uint64_t EntryAddress
= Address
; EntryAddress
<= UpperBound
- EntrySize
;
604 EntryAddress
+= EntrySize
) {
605 LLVM_DEBUG(dbgs() << " * Checking 0x" << Twine::utohexstr(EntryAddress
)
607 // Check if there's a proper relocation against the jump table entry.
608 if (HasRelocations
) {
609 if (Type
== JumpTable::JTT_PIC
&&
610 !DataPCRelocations
.count(EntryAddress
)) {
612 dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n");
615 if (Type
== JumpTable::JTT_NORMAL
&& !getRelocationAt(EntryAddress
)) {
618 << "FAIL: JTT_NORMAL table, no relocation for this address\n");
623 const uint64_t Value
=
624 (Type
== JumpTable::JTT_PIC
)
625 ? Address
+ *getSignedValueAtAddress(EntryAddress
, EntrySize
)
626 : *getPointerAtAddress(EntryAddress
);
628 // __builtin_unreachable() case.
629 if (Value
== UnreachableAddress
) {
630 addEntryAddress(Value
, /*Unreachable*/ true);
631 HasUnreachable
= true;
632 LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value
));
636 // Function start is another special case. It is allowed in the jump table,
637 // but we need at least one another regular entry to distinguish the table
638 // from, e.g. a function pointer array.
639 if (Value
== BF
.getAddress()) {
640 HasStartAsEntry
= true;
641 addEntryAddress(Value
);
645 // Function or one of its fragments.
646 const BinaryFunction
*TargetBF
= getBinaryFunctionContainingAddress(Value
);
647 const bool DoesBelongToFunction
=
648 BF
.containsAddress(Value
) ||
649 (TargetBF
&& TargetBF
->isParentOrChildOf(BF
));
650 if (!DoesBelongToFunction
) {
652 if (!BF
.containsAddress(Value
)) {
653 dbgs() << "FAIL: function doesn't contain this address\n";
655 dbgs() << " ! function containing this address: "
656 << TargetBF
->getPrintName() << '\n';
657 if (TargetBF
->isFragment()) {
658 dbgs() << " ! is a fragment";
659 for (BinaryFunction
*Parent
: TargetBF
->ParentFragments
)
660 dbgs() << ", parent: " << Parent
->getPrintName();
669 // Check there's an instruction at this offset.
670 if (TargetBF
->getState() == BinaryFunction::State::Disassembled
&&
671 !TargetBF
->getInstructionAtOffset(Value
- TargetBF
->getAddress())) {
672 LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value
));
677 LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value
));
679 if (TargetBF
!= &BF
&& HasEntryInFragment
)
680 *HasEntryInFragment
= true;
681 addEntryAddress(Value
);
684 // Trim direct/normal jump table to exclude trailing unreachable entries that
685 // can collide with a function address.
686 if (Type
== JumpTable::JTT_NORMAL
&& EntriesAsAddress
&&
687 TrimmedSize
!= EntriesAsAddress
->size() &&
688 getBinaryFunctionAtAddress(UnreachableAddress
))
689 EntriesAsAddress
->resize(TrimmedSize
);
691 // It's a jump table if the number of real entries is more than 1, or there's
692 // one real entry and one or more special targets. If there are only multiple
693 // special targets, then it's not a jump table.
694 return NumRealEntries
+ (HasUnreachable
|| HasStartAsEntry
) >= 2;
697 void BinaryContext::populateJumpTables() {
698 LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations
.size()
700 for (auto JTI
= JumpTables
.begin(), JTE
= JumpTables
.end(); JTI
!= JTE
;
702 JumpTable
*JT
= JTI
->second
;
704 bool NonSimpleParent
= false;
705 for (BinaryFunction
*BF
: JT
->Parents
)
706 NonSimpleParent
|= !BF
->isSimple();
710 uint64_t NextJTAddress
= 0;
711 auto NextJTI
= std::next(JTI
);
713 NextJTAddress
= NextJTI
->second
->getAddress();
716 analyzeJumpTable(JT
->getAddress(), JT
->Type
, *(JT
->Parents
[0]),
717 NextJTAddress
, &JT
->EntriesAsAddress
, &JT
->IsSplit
);
720 dbgs() << "failed to analyze ";
722 if (NextJTI
!= JTE
) {
724 NextJTI
->second
->print(dbgs());
727 llvm_unreachable("jump table heuristic failure");
729 for (BinaryFunction
*Frag
: JT
->Parents
) {
731 Frag
->setHasIndirectTargetToSplitFragment(true);
732 for (uint64_t EntryAddress
: JT
->EntriesAsAddress
)
733 // if target is builtin_unreachable
734 if (EntryAddress
== Frag
->getAddress() + Frag
->getSize()) {
735 Frag
->IgnoredBranches
.emplace_back(EntryAddress
- Frag
->getAddress(),
737 } else if (EntryAddress
>= Frag
->getAddress() &&
738 EntryAddress
< Frag
->getAddress() + Frag
->getSize()) {
739 Frag
->registerReferencedOffset(EntryAddress
- Frag
->getAddress());
743 // In strict mode, erase PC-relative relocation record. Later we check that
744 // all such records are erased and thus have been accounted for.
745 if (opts::StrictMode
&& JT
->Type
== JumpTable::JTT_PIC
) {
746 for (uint64_t Address
= JT
->getAddress();
747 Address
< JT
->getAddress() + JT
->getSize();
748 Address
+= JT
->EntrySize
) {
749 DataPCRelocations
.erase(DataPCRelocations
.find(Address
));
753 // Mark to skip the function and all its fragments.
754 for (BinaryFunction
*Frag
: JT
->Parents
)
755 if (Frag
->hasIndirectTargetToSplitFragment())
756 addFragmentsToSkip(Frag
);
759 if (opts::StrictMode
&& DataPCRelocations
.size()) {
761 dbgs() << DataPCRelocations
.size()
762 << " unclaimed PC-relative relocations left in data:\n";
763 for (uint64_t Reloc
: DataPCRelocations
)
764 dbgs() << Twine::utohexstr(Reloc
) << '\n';
766 assert(0 && "unclaimed PC-relative relocations left in data\n");
768 clearList(DataPCRelocations
);
771 void BinaryContext::skipMarkedFragments() {
772 std::vector
<BinaryFunction
*> FragmentQueue
;
773 // Copy the functions to FragmentQueue.
774 FragmentQueue
.assign(FragmentsToSkip
.begin(), FragmentsToSkip
.end());
775 auto addToWorklist
= [&](BinaryFunction
*Function
) -> void {
776 if (FragmentsToSkip
.count(Function
))
778 FragmentQueue
.push_back(Function
);
779 addFragmentsToSkip(Function
);
781 // Functions containing split jump tables need to be skipped with all
782 // fragments (transitively).
783 for (size_t I
= 0; I
!= FragmentQueue
.size(); I
++) {
784 BinaryFunction
*BF
= FragmentQueue
[I
];
785 assert(FragmentsToSkip
.count(BF
) &&
786 "internal error in traversing function fragments");
787 if (opts::Verbosity
>= 1)
788 this->errs() << "BOLT-WARNING: Ignoring " << BF
->getPrintName() << '\n';
789 BF
->setSimple(false);
790 BF
->setHasIndirectTargetToSplitFragment(true);
792 llvm::for_each(BF
->Fragments
, addToWorklist
);
793 llvm::for_each(BF
->ParentFragments
, addToWorklist
);
795 if (!FragmentsToSkip
.empty())
796 this->errs() << "BOLT-WARNING: skipped " << FragmentsToSkip
.size()
797 << " function" << (FragmentsToSkip
.size() == 1 ? "" : "s")
798 << " due to cold fragments\n";
801 MCSymbol
*BinaryContext::getOrCreateGlobalSymbol(uint64_t Address
, Twine Prefix
,
805 auto Itr
= BinaryDataMap
.find(Address
);
806 if (Itr
!= BinaryDataMap
.end()) {
807 assert(Itr
->second
->getSize() == Size
|| !Size
);
808 return Itr
->second
->getSymbol();
811 std::string Name
= (Prefix
+ "0x" + Twine::utohexstr(Address
)).str();
812 assert(!GlobalSymbols
.count(Name
) && "created name is not unique");
813 return registerNameAtAddress(Name
, Address
, Size
, Alignment
, Flags
);
816 MCSymbol
*BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name
) {
817 return Ctx
->getOrCreateSymbol(Name
);
820 BinaryFunction
*BinaryContext::createBinaryFunction(
821 const std::string
&Name
, BinarySection
&Section
, uint64_t Address
,
822 uint64_t Size
, uint64_t SymbolSize
, uint16_t Alignment
) {
823 auto Result
= BinaryFunctions
.emplace(
824 Address
, BinaryFunction(Name
, Section
, Address
, Size
, *this));
825 assert(Result
.second
== true && "unexpected duplicate function");
826 BinaryFunction
*BF
= &Result
.first
->second
;
827 registerNameAtAddress(Name
, Address
, SymbolSize
? SymbolSize
: Size
,
829 setSymbolToFunctionMap(BF
->getSymbol(), BF
);
834 BinaryContext::getOrCreateJumpTable(BinaryFunction
&Function
, uint64_t Address
,
835 JumpTable::JumpTableType Type
) {
836 // Two fragments of same function access same jump table
837 if (JumpTable
*JT
= getJumpTableContainingAddress(Address
)) {
838 assert(JT
->Type
== Type
&& "jump table types have to match");
839 assert(Address
== JT
->getAddress() && "unexpected non-empty jump table");
841 // Prevent associating a jump table to a specific fragment twice.
842 // This simple check arises from the assumption: no more than 2 fragments.
843 if (JT
->Parents
.size() == 1 && JT
->Parents
[0] != &Function
) {
844 assert(JT
->Parents
[0]->isParentOrChildOf(Function
) &&
845 "cannot re-use jump table of a different function");
846 // Duplicate the entry for the parent function for easy access
847 JT
->Parents
.push_back(&Function
);
848 if (opts::Verbosity
> 2) {
849 this->outs() << "BOLT-INFO: Multiple fragments access same jump table: "
850 << JT
->Parents
[0]->getPrintName() << "; "
851 << Function
.getPrintName() << "\n";
852 JT
->print(this->outs());
854 Function
.JumpTables
.emplace(Address
, JT
);
855 JT
->Parents
[0]->setHasIndirectTargetToSplitFragment(true);
856 JT
->Parents
[1]->setHasIndirectTargetToSplitFragment(true);
859 bool IsJumpTableParent
= false;
860 (void)IsJumpTableParent
;
861 for (BinaryFunction
*Frag
: JT
->Parents
)
862 if (Frag
== &Function
)
863 IsJumpTableParent
= true;
864 assert(IsJumpTableParent
&&
865 "cannot re-use jump table of a different function");
866 return JT
->getFirstLabel();
869 // Re-use the existing symbol if possible.
870 MCSymbol
*JTLabel
= nullptr;
871 if (BinaryData
*Object
= getBinaryDataAtAddress(Address
)) {
872 if (!isInternalSymbolName(Object
->getSymbol()->getName()))
873 JTLabel
= Object
->getSymbol();
876 const uint64_t EntrySize
= getJumpTableEntrySize(Type
);
878 const std::string JumpTableName
= generateJumpTableName(Function
, Address
);
879 JTLabel
= registerNameAtAddress(JumpTableName
, Address
, 0, EntrySize
);
882 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel
->getName()
883 << " in function " << Function
<< '\n');
885 JumpTable
*JT
= new JumpTable(*JTLabel
, Address
, EntrySize
, Type
,
886 JumpTable::LabelMapType
{{0, JTLabel
}},
887 *getSectionForAddress(Address
));
888 JT
->Parents
.push_back(&Function
);
889 if (opts::Verbosity
> 2)
890 JT
->print(this->outs());
891 JumpTables
.emplace(Address
, JT
);
893 // Duplicate the entry for the parent function for easy access.
894 Function
.JumpTables
.emplace(Address
, JT
);
898 std::pair
<uint64_t, const MCSymbol
*>
899 BinaryContext::duplicateJumpTable(BinaryFunction
&Function
, JumpTable
*JT
,
900 const MCSymbol
*OldLabel
) {
901 auto L
= scopeLock();
904 for (std::pair
<const unsigned, MCSymbol
*> Elmt
: JT
->Labels
) {
905 if (Elmt
.second
!= OldLabel
)
911 assert(Found
&& "Label not found");
913 MCSymbol
*NewLabel
= Ctx
->createNamedTempSymbol("duplicatedJT");
915 new JumpTable(*NewLabel
, JT
->getAddress(), JT
->EntrySize
, JT
->Type
,
916 JumpTable::LabelMapType
{{Offset
, NewLabel
}},
917 *getSectionForAddress(JT
->getAddress()));
918 NewJT
->Parents
= JT
->Parents
;
919 NewJT
->Entries
= JT
->Entries
;
920 NewJT
->Counts
= JT
->Counts
;
921 uint64_t JumpTableID
= ++DuplicatedJumpTables
;
922 // Invert it to differentiate from regular jump tables whose IDs are their
923 // addresses in the input binary memory space
924 JumpTableID
= ~JumpTableID
;
925 JumpTables
.emplace(JumpTableID
, NewJT
);
926 Function
.JumpTables
.emplace(JumpTableID
, NewJT
);
927 return std::make_pair(JumpTableID
, NewLabel
);
930 std::string
BinaryContext::generateJumpTableName(const BinaryFunction
&BF
,
934 if (const JumpTable
*JT
= BF
.getJumpTableContainingAddress(Address
)) {
935 Offset
= Address
- JT
->getAddress();
936 auto JTLabelsIt
= JT
->Labels
.find(Offset
);
937 if (JTLabelsIt
!= JT
->Labels
.end())
938 return std::string(JTLabelsIt
->second
->getName());
940 auto JTIdsIt
= JumpTableIds
.find(JT
->getAddress());
941 assert(JTIdsIt
!= JumpTableIds
.end());
942 Id
= JTIdsIt
->second
;
944 Id
= JumpTableIds
[Address
] = BF
.JumpTables
.size();
946 return ("JUMP_TABLE/" + BF
.getOneName().str() + "." + std::to_string(Id
) +
947 (Offset
? ("." + std::to_string(Offset
)) : ""));
950 bool BinaryContext::hasValidCodePadding(const BinaryFunction
&BF
) {
951 // FIXME: aarch64 support is missing.
955 if (BF
.getSize() == BF
.getMaxSize())
958 ErrorOr
<ArrayRef
<unsigned char>> FunctionData
= BF
.getData();
959 assert(FunctionData
&& "cannot get function as data");
961 uint64_t Offset
= BF
.getSize();
963 uint64_t InstrSize
= 0;
964 uint64_t InstrAddress
= BF
.getAddress() + Offset
;
965 using std::placeholders::_1
;
967 // Skip instructions that satisfy the predicate condition.
968 auto skipInstructions
= [&](std::function
<bool(const MCInst
&)> Predicate
) {
969 const uint64_t StartOffset
= Offset
;
970 for (; Offset
< BF
.getMaxSize();
971 Offset
+= InstrSize
, InstrAddress
+= InstrSize
) {
972 if (!DisAsm
->getInstruction(Instr
, InstrSize
, FunctionData
->slice(Offset
),
973 InstrAddress
, nulls()))
975 if (!Predicate(Instr
))
979 return Offset
- StartOffset
;
982 // Skip a sequence of zero bytes.
983 auto skipZeros
= [&]() {
984 const uint64_t StartOffset
= Offset
;
985 for (; Offset
< BF
.getMaxSize(); ++Offset
)
986 if ((*FunctionData
)[Offset
] != 0)
989 return Offset
- StartOffset
;
992 // Accept the whole padding area filled with breakpoints.
993 auto isBreakpoint
= std::bind(&MCPlusBuilder::isBreakpoint
, MIB
.get(), _1
);
994 if (skipInstructions(isBreakpoint
) && Offset
== BF
.getMaxSize())
997 auto isNoop
= std::bind(&MCPlusBuilder::isNoop
, MIB
.get(), _1
);
999 // Some functions have a jump to the next function or to the padding area
1000 // inserted after the body.
1001 auto isSkipJump
= [&](const MCInst
&Instr
) {
1002 uint64_t TargetAddress
= 0;
1003 if (MIB
->isUnconditionalBranch(Instr
) &&
1004 MIB
->evaluateBranch(Instr
, InstrAddress
, InstrSize
, TargetAddress
)) {
1005 if (TargetAddress
>= InstrAddress
+ InstrSize
&&
1006 TargetAddress
<= BF
.getAddress() + BF
.getMaxSize()) {
1013 // Skip over nops, jumps, and zero padding. Allow interleaving (this happens).
1014 while (skipInstructions(isNoop
) || skipInstructions(isSkipJump
) ||
1018 if (Offset
== BF
.getMaxSize())
1021 if (opts::Verbosity
>= 1) {
1022 this->errs() << "BOLT-WARNING: bad padding at address 0x"
1023 << Twine::utohexstr(BF
.getAddress() + BF
.getSize())
1024 << " starting at offset " << (Offset
- BF
.getSize())
1025 << " in function " << BF
<< '\n'
1026 << FunctionData
->slice(BF
.getSize(),
1027 BF
.getMaxSize() - BF
.getSize())
1034 void BinaryContext::adjustCodePadding() {
1035 for (auto &BFI
: BinaryFunctions
) {
1036 BinaryFunction
&BF
= BFI
.second
;
1037 if (!shouldEmit(BF
))
1040 if (!hasValidCodePadding(BF
)) {
1041 if (HasRelocations
) {
1042 if (opts::Verbosity
>= 1) {
1043 this->outs() << "BOLT-INFO: function " << BF
1044 << " has invalid padding. Ignoring the function.\n";
1048 BF
.setMaxSize(BF
.getSize());
1054 MCSymbol
*BinaryContext::registerNameAtAddress(StringRef Name
, uint64_t Address
,
1058 // Register the name with MCContext.
1059 MCSymbol
*Symbol
= Ctx
->getOrCreateSymbol(Name
);
1061 auto GAI
= BinaryDataMap
.find(Address
);
1063 if (GAI
== BinaryDataMap
.end()) {
1064 ErrorOr
<BinarySection
&> SectionOrErr
= getSectionForAddress(Address
);
1065 BinarySection
&Section
=
1066 SectionOrErr
? SectionOrErr
.get() : absoluteSection();
1067 BD
= new BinaryData(*Symbol
, Address
, Size
, Alignment
? Alignment
: 1,
1069 GAI
= BinaryDataMap
.emplace(Address
, BD
).first
;
1070 GlobalSymbols
[Name
] = BD
;
1071 updateObjectNesting(GAI
);
1074 if (!BD
->hasName(Name
)) {
1075 GlobalSymbols
[Name
] = BD
;
1076 BD
->Symbols
.push_back(Symbol
);
1084 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address
) const {
1085 auto NI
= BinaryDataMap
.lower_bound(Address
);
1086 auto End
= BinaryDataMap
.end();
1087 if ((NI
!= End
&& Address
== NI
->first
) ||
1088 ((NI
!= BinaryDataMap
.begin()) && (NI
-- != BinaryDataMap
.begin()))) {
1089 if (NI
->second
->containsAddress(Address
))
1092 // If this is a sub-symbol, see if a parent data contains the address.
1093 const BinaryData
*BD
= NI
->second
->getParent();
1095 if (BD
->containsAddress(Address
))
1097 BD
= BD
->getParent();
1103 BinaryData
*BinaryContext::getGOTSymbol() {
1104 // First tries to find a global symbol with that name
1105 BinaryData
*GOTSymBD
= getBinaryDataByName("_GLOBAL_OFFSET_TABLE_");
1109 // This symbol might be hidden from run-time link, so fetch the local
1110 // definition if available.
1111 GOTSymBD
= getBinaryDataByName("_GLOBAL_OFFSET_TABLE_/1");
1115 // If the local symbol is not unique, fail
1117 SmallString
<30> Storage
;
1118 while (const BinaryData
*BD
=
1119 getBinaryDataByName(Twine("_GLOBAL_OFFSET_TABLE_/")
1120 .concat(Twine(Index
++))
1121 .toStringRef(Storage
)))
1122 if (BD
->getAddress() != GOTSymBD
->getAddress())
1128 bool BinaryContext::setBinaryDataSize(uint64_t Address
, uint64_t Size
) {
1129 auto NI
= BinaryDataMap
.find(Address
);
1130 assert(NI
!= BinaryDataMap
.end());
1131 if (NI
== BinaryDataMap
.end())
1133 // TODO: it's possible that a jump table starts at the same address
1134 // as a larger blob of private data. When we set the size of the
1135 // jump table, it might be smaller than the total blob size. In this
1136 // case we just leave the original size since (currently) it won't really
1138 assert((!NI
->second
->Size
|| NI
->second
->Size
== Size
||
1139 (NI
->second
->isJumpTable() && NI
->second
->Size
> Size
)) &&
1140 "can't change the size of a symbol that has already had its "
1142 if (!NI
->second
->Size
) {
1143 NI
->second
->Size
= Size
;
1144 updateObjectNesting(NI
);
1150 void BinaryContext::generateSymbolHashes() {
1151 auto isPadding
= [](const BinaryData
&BD
) {
1152 StringRef Contents
= BD
.getSection().getContents();
1153 StringRef SymData
= Contents
.substr(BD
.getOffset(), BD
.getSize());
1154 return (BD
.getName().starts_with("HOLEat") ||
1155 SymData
.find_first_not_of(0) == StringRef::npos
);
1158 uint64_t NumCollisions
= 0;
1159 for (auto &Entry
: BinaryDataMap
) {
1160 BinaryData
&BD
= *Entry
.second
;
1161 StringRef Name
= BD
.getName();
1163 if (!isInternalSymbolName(Name
))
1166 // First check if a non-anonymous alias exists and move it to the front.
1167 if (BD
.getSymbols().size() > 1) {
1168 auto Itr
= llvm::find_if(BD
.getSymbols(), [&](const MCSymbol
*Symbol
) {
1169 return !isInternalSymbolName(Symbol
->getName());
1171 if (Itr
!= BD
.getSymbols().end()) {
1172 size_t Idx
= std::distance(BD
.getSymbols().begin(), Itr
);
1173 std::swap(BD
.getSymbols()[0], BD
.getSymbols()[Idx
]);
1178 // We have to skip 0 size symbols since they will all collide.
1179 if (BD
.getSize() == 0) {
1183 const uint64_t Hash
= BD
.getSection().hash(BD
);
1184 const size_t Idx
= Name
.find("0x");
1185 std::string NewName
=
1186 (Twine(Name
.substr(0, Idx
)) + "_" + Twine::utohexstr(Hash
)).str();
1187 if (getBinaryDataByName(NewName
)) {
1188 // Ignore collisions for symbols that appear to be padding
1189 // (i.e. all zeros or a "hole")
1190 if (!isPadding(BD
)) {
1191 if (opts::Verbosity
) {
1192 this->errs() << "BOLT-WARNING: collision detected when hashing " << BD
1193 << " with new name (" << NewName
<< "), skipping.\n";
1199 BD
.Symbols
.insert(BD
.Symbols
.begin(), Ctx
->getOrCreateSymbol(NewName
));
1200 GlobalSymbols
[NewName
] = &BD
;
1202 if (NumCollisions
) {
1203 this->errs() << "BOLT-WARNING: " << NumCollisions
1204 << " collisions detected while hashing binary objects";
1205 if (!opts::Verbosity
)
1206 this->errs() << ". Use -v=1 to see the list.";
1207 this->errs() << '\n';
1211 bool BinaryContext::registerFragment(BinaryFunction
&TargetFunction
,
1212 BinaryFunction
&Function
) const {
1213 assert(TargetFunction
.isFragment() && "TargetFunction must be a fragment");
1214 if (TargetFunction
.isChildOf(Function
))
1216 TargetFunction
.addParentFragment(Function
);
1217 Function
.addFragment(TargetFunction
);
1218 if (!HasRelocations
) {
1219 TargetFunction
.setSimple(false);
1220 Function
.setSimple(false);
1222 if (opts::Verbosity
>= 1) {
1223 this->outs() << "BOLT-INFO: marking " << TargetFunction
1224 << " as a fragment of " << Function
<< '\n';
1229 void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction
&BF
,
1230 MCInst
&LoadLowBits
,
1233 const MCSymbol
*TargetSymbol
;
1234 uint64_t Addend
= 0;
1235 std::tie(TargetSymbol
, Addend
) = handleAddressRef(Target
, BF
,
1238 MIB
->replaceImmWithSymbolRef(LoadHiBits
, TargetSymbol
, Addend
, Ctx
.get(), Val
,
1239 ELF::R_AARCH64_ADR_PREL_PG_HI21
);
1240 MIB
->replaceImmWithSymbolRef(LoadLowBits
, TargetSymbol
, Addend
, Ctx
.get(),
1241 Val
, ELF::R_AARCH64_ADD_ABS_LO12_NC
);
1244 bool BinaryContext::handleAArch64Veneer(uint64_t Address
, bool MatchOnly
) {
1245 BinaryFunction
*TargetFunction
= getBinaryFunctionContainingAddress(Address
);
1249 ErrorOr
<BinarySection
&> Section
= getSectionForAddress(Address
);
1250 assert(Section
&& "cannot get section for referenced address");
1251 if (!Section
->isText())
1255 StringRef SectionContents
= Section
->getContents();
1256 uint64_t Offset
= Address
- Section
->getAddress();
1257 const uint64_t MaxSize
= SectionContents
.size() - Offset
;
1258 const uint8_t *Bytes
=
1259 reinterpret_cast<const uint8_t *>(SectionContents
.data());
1260 ArrayRef
<uint8_t> Data(Bytes
+ Offset
, MaxSize
);
1262 auto matchVeneer
= [&](BinaryFunction::InstrMapType
&Instructions
,
1263 MCInst
&Instruction
, uint64_t Offset
,
1264 uint64_t AbsoluteInstrAddr
,
1265 uint64_t TotalSize
) -> bool {
1266 MCInst
*TargetHiBits
, *TargetLowBits
;
1267 uint64_t TargetAddress
, Count
;
1268 Count
= MIB
->matchLinkerVeneer(Instructions
.begin(), Instructions
.end(),
1269 AbsoluteInstrAddr
, Instruction
, TargetHiBits
,
1270 TargetLowBits
, TargetAddress
);
1277 // NOTE The target symbol was created during disassemble's
1278 // handleExternalReference
1279 const MCSymbol
*VeneerSymbol
= getOrCreateGlobalSymbol(Address
, "FUNCat");
1280 BinaryFunction
*Veneer
= createBinaryFunction(VeneerSymbol
->getName().str(),
1281 *Section
, Address
, TotalSize
);
1282 addAdrpAddRelocAArch64(*Veneer
, *TargetLowBits
, *TargetHiBits
,
1284 MIB
->addAnnotation(Instruction
, "AArch64Veneer", true);
1285 Veneer
->addInstruction(Offset
, std::move(Instruction
));
1287 for (auto It
= Instructions
.rbegin(); Count
!= 0; ++It
, --Count
) {
1288 MIB
->addAnnotation(It
->second
, "AArch64Veneer", true);
1289 Veneer
->addInstruction(It
->first
, std::move(It
->second
));
1292 Veneer
->getOrCreateLocalLabel(Address
);
1293 Veneer
->setMaxSize(TotalSize
);
1294 Veneer
->updateState(BinaryFunction::State::Disassembled
);
1295 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" << Address
1300 uint64_t Size
= 0, TotalSize
= 0;
1301 BinaryFunction::InstrMapType VeneerInstructions
;
1302 for (Offset
= 0; Offset
< MaxSize
; Offset
+= Size
) {
1304 const uint64_t AbsoluteInstrAddr
= Address
+ Offset
;
1305 if (!SymbolicDisAsm
->getInstruction(Instruction
, Size
, Data
.slice(Offset
),
1306 AbsoluteInstrAddr
, nulls()))
1310 if (MIB
->isBranch(Instruction
)) {
1311 Ret
= matchVeneer(VeneerInstructions
, Instruction
, Offset
,
1312 AbsoluteInstrAddr
, TotalSize
);
1316 VeneerInstructions
.emplace(Offset
, std::move(Instruction
));
1322 void BinaryContext::processInterproceduralReferences() {
1323 for (const std::pair
<BinaryFunction
*, uint64_t> &It
:
1324 InterproceduralReferences
) {
1325 BinaryFunction
&Function
= *It
.first
;
1326 uint64_t Address
= It
.second
;
1327 // Process interprocedural references from ignored functions in BAT mode
1328 // (non-simple in non-relocation mode) to properly register entry points
1329 if (!Address
|| (Function
.isIgnored() && !HasBATSection
))
1332 BinaryFunction
*TargetFunction
=
1333 getBinaryFunctionContainingAddress(Address
);
1334 if (&Function
== TargetFunction
)
1337 if (TargetFunction
) {
1338 if (TargetFunction
->isFragment() &&
1339 !TargetFunction
->isChildOf(Function
)) {
1341 << "BOLT-WARNING: interprocedural reference between unrelated "
1343 << Function
.getPrintName() << " and "
1344 << TargetFunction
->getPrintName() << '\n';
1346 if (uint64_t Offset
= Address
- TargetFunction
->getAddress())
1347 TargetFunction
->addEntryPointAtOffset(Offset
);
1352 // Check if address falls in function padding space - this could be
1353 // unmarked data in code. In this case adjust the padding space size.
1354 ErrorOr
<BinarySection
&> Section
= getSectionForAddress(Address
);
1355 assert(Section
&& "cannot get section for referenced address");
1357 if (!Section
->isText())
1360 // PLT requires special handling and could be ignored in this context.
1361 StringRef SectionName
= Section
->getName();
1362 if (SectionName
== ".plt" || SectionName
== ".plt.got")
1365 // Check if it is aarch64 veneer written at Address
1366 if (isAArch64() && handleAArch64Veneer(Address
))
1369 if (opts::processAllFunctions()) {
1370 this->errs() << "BOLT-ERROR: cannot process binaries with unmarked "
1371 << "object in code at address 0x"
1372 << Twine::utohexstr(Address
) << " belonging to section "
1373 << SectionName
<< " in current mode\n";
1377 TargetFunction
= getBinaryFunctionContainingAddress(Address
,
1378 /*CheckPastEnd=*/false,
1379 /*UseMaxSize=*/true);
1380 // We are not going to overwrite non-simple functions, but for simple
1381 // ones - adjust the padding size.
1382 if (TargetFunction
&& TargetFunction
->isSimple()) {
1384 << "BOLT-WARNING: function " << *TargetFunction
1385 << " has an object detected in a padding region at address 0x"
1386 << Twine::utohexstr(Address
) << '\n';
1387 TargetFunction
->setMaxSize(TargetFunction
->getSize());
1391 InterproceduralReferences
.clear();
1394 void BinaryContext::postProcessSymbolTable() {
1395 fixBinaryDataHoles();
1397 for (auto &Entry
: BinaryDataMap
) {
1398 BinaryData
*BD
= Entry
.second
;
1399 if ((BD
->getName().starts_with("SYMBOLat") ||
1400 BD
->getName().starts_with("DATAat")) &&
1401 !BD
->getParent() && !BD
->getSize() && !BD
->isAbsolute() &&
1403 this->errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD
1410 generateSymbolHashes();
1413 void BinaryContext::foldFunction(BinaryFunction
&ChildBF
,
1414 BinaryFunction
&ParentBF
) {
1415 assert(!ChildBF
.isMultiEntry() && !ParentBF
.isMultiEntry() &&
1416 "cannot merge functions with multiple entry points");
1418 std::unique_lock
<llvm::sys::RWMutex
> WriteCtxLock(CtxMutex
, std::defer_lock
);
1419 std::unique_lock
<llvm::sys::RWMutex
> WriteSymbolMapLock(
1420 SymbolToFunctionMapMutex
, std::defer_lock
);
1422 const StringRef ChildName
= ChildBF
.getOneName();
1424 // Move symbols over and update bookkeeping info.
1425 for (MCSymbol
*Symbol
: ChildBF
.getSymbols()) {
1426 ParentBF
.getSymbols().push_back(Symbol
);
1427 WriteSymbolMapLock
.lock();
1428 SymbolToFunctionMap
[Symbol
] = &ParentBF
;
1429 WriteSymbolMapLock
.unlock();
1430 // NB: there's no need to update BinaryDataMap and GlobalSymbols.
1432 ChildBF
.getSymbols().clear();
1434 // Move other names the child function is known under.
1435 llvm::move(ChildBF
.Aliases
, std::back_inserter(ParentBF
.Aliases
));
1436 ChildBF
.Aliases
.clear();
1438 if (HasRelocations
) {
1439 // Merge execution counts of ChildBF into those of ParentBF.
1440 // Without relocations, we cannot reliably merge profiles as both functions
1441 // continue to exist and either one can be executed.
1442 ChildBF
.mergeProfileDataInto(ParentBF
);
1444 std::shared_lock
<llvm::sys::RWMutex
> ReadBfsLock(BinaryFunctionsMutex
,
1446 std::unique_lock
<llvm::sys::RWMutex
> WriteBfsLock(BinaryFunctionsMutex
,
1448 // Remove ChildBF from the global set of functions in relocs mode.
1450 auto FI
= BinaryFunctions
.find(ChildBF
.getAddress());
1451 ReadBfsLock
.unlock();
1453 assert(FI
!= BinaryFunctions
.end() && "function not found");
1454 assert(&ChildBF
== &FI
->second
&& "function mismatch");
1456 WriteBfsLock
.lock();
1457 ChildBF
.clearDisasmState();
1458 FI
= BinaryFunctions
.erase(FI
);
1459 WriteBfsLock
.unlock();
1462 // In non-relocation mode we keep the function, but rename it.
1463 std::string NewName
= "__ICF_" + ChildName
.str();
1465 WriteCtxLock
.lock();
1466 ChildBF
.getSymbols().push_back(Ctx
->getOrCreateSymbol(NewName
));
1467 WriteCtxLock
.unlock();
1469 ChildBF
.setFolded(&ParentBF
);
1472 ParentBF
.setHasFunctionsFoldedInto();
1475 void BinaryContext::fixBinaryDataHoles() {
1476 assert(validateObjectNesting() && "object nesting inconsistency detected");
1478 for (BinarySection
&Section
: allocatableSections()) {
1479 std::vector
<std::pair
<uint64_t, uint64_t>> Holes
;
1481 auto isNotHole
= [&Section
](const binary_data_iterator
&Itr
) {
1482 BinaryData
*BD
= Itr
->second
;
1483 bool isHole
= (!BD
->getParent() && !BD
->getSize() && BD
->isObject() &&
1484 (BD
->getName().starts_with("SYMBOLat0x") ||
1485 BD
->getName().starts_with("DATAat0x") ||
1486 BD
->getName().starts_with("ANONYMOUS")));
1487 return !isHole
&& BD
->getSection() == Section
&& !BD
->getParent();
1490 auto BDStart
= BinaryDataMap
.begin();
1491 auto BDEnd
= BinaryDataMap
.end();
1492 auto Itr
= FilteredBinaryDataIterator(isNotHole
, BDStart
, BDEnd
);
1493 auto End
= FilteredBinaryDataIterator(isNotHole
, BDEnd
, BDEnd
);
1495 uint64_t EndAddress
= Section
.getAddress();
1497 while (Itr
!= End
) {
1498 if (Itr
->second
->getAddress() > EndAddress
) {
1499 uint64_t Gap
= Itr
->second
->getAddress() - EndAddress
;
1500 Holes
.emplace_back(EndAddress
, Gap
);
1502 EndAddress
= Itr
->second
->getEndAddress();
1506 if (EndAddress
< Section
.getEndAddress())
1507 Holes
.emplace_back(EndAddress
, Section
.getEndAddress() - EndAddress
);
1509 // If there is already a symbol at the start of the hole, grow that symbol
1510 // to cover the rest. Otherwise, create a new symbol to cover the hole.
1511 for (std::pair
<uint64_t, uint64_t> &Hole
: Holes
) {
1512 BinaryData
*BD
= getBinaryDataAtAddress(Hole
.first
);
1514 // BD->getSection() can be != Section if there are sections that
1515 // overlap. In this case it is probably safe to just skip the holes
1516 // since the overlapping section will not(?) have any symbols in it.
1517 if (BD
->getSection() == Section
)
1518 setBinaryDataSize(Hole
.first
, Hole
.second
);
1520 getOrCreateGlobalSymbol(Hole
.first
, "HOLEat", Hole
.second
, 1);
1525 assert(validateObjectNesting() && "object nesting inconsistency detected");
1526 assert(validateHoles() && "top level hole detected in object map");
1529 void BinaryContext::printGlobalSymbols(raw_ostream
&OS
) const {
1530 const BinarySection
*CurrentSection
= nullptr;
1531 bool FirstSection
= true;
1533 for (auto &Entry
: BinaryDataMap
) {
1534 const BinaryData
*BD
= Entry
.second
;
1535 const BinarySection
&Section
= BD
->getSection();
1536 if (FirstSection
|| Section
!= *CurrentSection
) {
1537 uint64_t Address
, Size
;
1538 StringRef Name
= Section
.getName();
1540 Address
= Section
.getAddress();
1541 Size
= Section
.getSize();
1543 Address
= BD
->getAddress();
1544 Size
= BD
->getSize();
1546 OS
<< "BOLT-INFO: Section " << Name
<< ", "
1547 << "0x" + Twine::utohexstr(Address
) << ":"
1548 << "0x" + Twine::utohexstr(Address
+ Size
) << "/" << Size
<< "\n";
1549 CurrentSection
= &Section
;
1550 FirstSection
= false;
1553 OS
<< "BOLT-INFO: ";
1554 const BinaryData
*P
= BD
->getParent();
1563 Expected
<unsigned> BinaryContext::getDwarfFile(
1564 StringRef Directory
, StringRef FileName
, unsigned FileNumber
,
1565 std::optional
<MD5::MD5Result
> Checksum
, std::optional
<StringRef
> Source
,
1566 unsigned CUID
, unsigned DWARFVersion
) {
1567 DwarfLineTable
&Table
= DwarfLineTablesCUMap
[CUID
];
1568 return Table
.tryGetFile(Directory
, FileName
, Checksum
, Source
, DWARFVersion
,
1572 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID
,
1573 const uint32_t SrcCUID
,
1574 unsigned FileIndex
) {
1575 DWARFCompileUnit
*SrcUnit
= DwCtx
->getCompileUnitForOffset(SrcCUID
);
1576 const DWARFDebugLine::LineTable
*LineTable
=
1577 DwCtx
->getLineTableForUnit(SrcUnit
);
1578 const std::vector
<DWARFDebugLine::FileNameEntry
> &FileNames
=
1579 LineTable
->Prologue
.FileNames
;
1580 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1582 assert(FileIndex
> 0 && FileIndex
<= FileNames
.size() &&
1583 "FileIndex out of range for the compilation unit.");
1585 if (FileNames
[FileIndex
- 1].DirIdx
!= 0) {
1586 if (std::optional
<const char *> DirName
= dwarf::toString(
1588 .IncludeDirectories
[FileNames
[FileIndex
- 1].DirIdx
- 1])) {
1592 StringRef FileName
= "";
1593 if (std::optional
<const char *> FName
=
1594 dwarf::toString(FileNames
[FileIndex
- 1].Name
))
1596 assert(FileName
!= "");
1597 DWARFCompileUnit
*DstUnit
= DwCtx
->getCompileUnitForOffset(DestCUID
);
1598 return cantFail(getDwarfFile(Dir
, FileName
, 0, std::nullopt
, std::nullopt
,
1599 DestCUID
, DstUnit
->getVersion()));
1602 std::vector
<BinaryFunction
*> BinaryContext::getSortedFunctions() {
1603 std::vector
<BinaryFunction
*> SortedFunctions(BinaryFunctions
.size());
1604 llvm::transform(llvm::make_second_range(BinaryFunctions
),
1605 SortedFunctions
.begin(),
1606 [](BinaryFunction
&BF
) { return &BF
; });
1608 llvm::stable_sort(SortedFunctions
,
1609 [](const BinaryFunction
*A
, const BinaryFunction
*B
) {
1610 if (A
->hasValidIndex() && B
->hasValidIndex()) {
1611 return A
->getIndex() < B
->getIndex();
1613 return A
->hasValidIndex();
1615 return SortedFunctions
;
1618 std::vector
<BinaryFunction
*> BinaryContext::getAllBinaryFunctions() {
1619 std::vector
<BinaryFunction
*> AllFunctions
;
1620 AllFunctions
.reserve(BinaryFunctions
.size() + InjectedBinaryFunctions
.size());
1621 llvm::transform(llvm::make_second_range(BinaryFunctions
),
1622 std::back_inserter(AllFunctions
),
1623 [](BinaryFunction
&BF
) { return &BF
; });
1624 llvm::copy(InjectedBinaryFunctions
, std::back_inserter(AllFunctions
));
1626 return AllFunctions
;
1629 std::optional
<DWARFUnit
*> BinaryContext::getDWOCU(uint64_t DWOId
) {
1630 auto Iter
= DWOCUs
.find(DWOId
);
1631 if (Iter
== DWOCUs
.end())
1632 return std::nullopt
;
1634 return Iter
->second
;
1637 DWARFContext
*BinaryContext::getDWOContext() const {
1640 return &DWOCUs
.begin()->second
->getContext();
1643 /// Handles DWO sections that can either be in .o, .dwo or .dwp files.
1644 void BinaryContext::preprocessDWODebugInfo() {
1645 for (const std::unique_ptr
<DWARFUnit
> &CU
: DwCtx
->compile_units()) {
1646 DWARFUnit
*const DwarfUnit
= CU
.get();
1647 if (std::optional
<uint64_t> DWOId
= DwarfUnit
->getDWOId()) {
1648 std::string DWOName
= dwarf::toString(
1649 DwarfUnit
->getUnitDIE().find(
1650 {dwarf::DW_AT_dwo_name
, dwarf::DW_AT_GNU_dwo_name
}),
1652 SmallString
<16> AbsolutePath
;
1653 if (!opts::CompDirOverride
.empty()) {
1654 sys::path::append(AbsolutePath
, opts::CompDirOverride
);
1655 sys::path::append(AbsolutePath
, DWOName
);
1658 DwarfUnit
->getNonSkeletonUnitDIE(false, AbsolutePath
).getDwarfUnit();
1659 if (!DWOCU
->isDWOUnit()) {
1661 << "BOLT-WARNING: Debug Fission: DWO debug information for "
1663 << " was not retrieved and won't be updated. Please check "
1667 DWOCUs
[*DWOId
] = DWOCU
;
1670 if (!DWOCUs
.empty())
1671 this->outs() << "BOLT-INFO: processing split DWARF\n";
1674 void BinaryContext::preprocessDebugInfo() {
1680 bool operator<(const CURange
&Other
) const { return LowPC
< Other
.LowPC
; }
1683 // Building a map of address ranges to CUs similar to .debug_aranges and use
1684 // it to assign CU to functions.
1685 std::vector
<CURange
> AllRanges
;
1686 AllRanges
.reserve(DwCtx
->getNumCompileUnits());
1687 for (const std::unique_ptr
<DWARFUnit
> &CU
: DwCtx
->compile_units()) {
1688 Expected
<DWARFAddressRangesVector
> RangesOrError
=
1689 CU
->getUnitDIE().getAddressRanges();
1690 if (!RangesOrError
) {
1691 consumeError(RangesOrError
.takeError());
1694 for (DWARFAddressRange
&Range
: *RangesOrError
) {
1695 // Parts of the debug info could be invalidated due to corresponding code
1696 // being removed from the binary by the linker. Hence we check if the
1697 // address is a valid one.
1698 if (containsAddress(Range
.LowPC
))
1699 AllRanges
.emplace_back(CURange
{Range
.LowPC
, Range
.HighPC
, CU
.get()});
1702 ContainsDwarf5
|= CU
->getVersion() >= 5;
1703 ContainsDwarfLegacy
|= CU
->getVersion() < 5;
1706 llvm::sort(AllRanges
);
1707 for (auto &KV
: BinaryFunctions
) {
1708 const uint64_t FunctionAddress
= KV
.first
;
1709 BinaryFunction
&Function
= KV
.second
;
1711 auto It
= llvm::partition_point(
1712 AllRanges
, [=](CURange R
) { return R
.HighPC
<= FunctionAddress
; });
1713 if (It
!= AllRanges
.end() && It
->LowPC
<= FunctionAddress
)
1714 Function
.setDWARFUnit(It
->Unit
);
1717 // Discover units with debug info that needs to be updated.
1718 for (const auto &KV
: BinaryFunctions
) {
1719 const BinaryFunction
&BF
= KV
.second
;
1720 if (shouldEmit(BF
) && BF
.getDWARFUnit())
1721 ProcessedCUs
.insert(BF
.getDWARFUnit());
1724 // Clear debug info for functions from units that we are not going to process.
1725 for (auto &KV
: BinaryFunctions
) {
1726 BinaryFunction
&BF
= KV
.second
;
1727 if (BF
.getDWARFUnit() && !ProcessedCUs
.count(BF
.getDWARFUnit()))
1728 BF
.setDWARFUnit(nullptr);
1731 if (opts::Verbosity
>= 1) {
1732 this->outs() << "BOLT-INFO: " << ProcessedCUs
.size() << " out of "
1733 << DwCtx
->getNumCompileUnits() << " CUs will be updated\n";
1736 preprocessDWODebugInfo();
1738 // Populate MCContext with DWARF files from all units.
1739 StringRef GlobalPrefix
= AsmInfo
->getPrivateGlobalPrefix();
1740 for (const std::unique_ptr
<DWARFUnit
> &CU
: DwCtx
->compile_units()) {
1741 const uint64_t CUID
= CU
->getOffset();
1742 DwarfLineTable
&BinaryLineTable
= getDwarfLineTable(CUID
);
1743 BinaryLineTable
.setLabel(Ctx
->getOrCreateSymbol(
1744 GlobalPrefix
+ "line_table_start" + Twine(CUID
)));
1746 if (!ProcessedCUs
.count(CU
.get()))
1749 const DWARFDebugLine::LineTable
*LineTable
=
1750 DwCtx
->getLineTableForUnit(CU
.get());
1751 const std::vector
<DWARFDebugLine::FileNameEntry
> &FileNames
=
1752 LineTable
->Prologue
.FileNames
;
1754 uint16_t DwarfVersion
= LineTable
->Prologue
.getVersion();
1755 if (DwarfVersion
>= 5) {
1756 std::optional
<MD5::MD5Result
> Checksum
;
1757 if (LineTable
->Prologue
.ContentTypes
.HasMD5
)
1758 Checksum
= LineTable
->Prologue
.FileNames
[0].Checksum
;
1759 std::optional
<const char *> Name
=
1760 dwarf::toString(CU
->getUnitDIE().find(dwarf::DW_AT_name
), nullptr);
1761 if (std::optional
<uint64_t> DWOID
= CU
->getDWOId()) {
1762 auto Iter
= DWOCUs
.find(*DWOID
);
1763 assert(Iter
!= DWOCUs
.end() && "DWO CU was not found.");
1764 Name
= dwarf::toString(
1765 Iter
->second
->getUnitDIE().find(dwarf::DW_AT_name
), nullptr);
1767 BinaryLineTable
.setRootFile(CU
->getCompilationDir(), *Name
, Checksum
,
1771 BinaryLineTable
.setDwarfVersion(DwarfVersion
);
1773 // Assign a unique label to every line table, one per CU.
1774 // Make sure empty debug line tables are registered too.
1775 if (FileNames
.empty()) {
1776 cantFail(getDwarfFile("", "<unknown>", 0, std::nullopt
, std::nullopt
,
1777 CUID
, DwarfVersion
));
1780 const uint32_t Offset
= DwarfVersion
< 5 ? 1 : 0;
1781 for (size_t I
= 0, Size
= FileNames
.size(); I
!= Size
; ++I
) {
1782 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1785 if (FileNames
[I
].DirIdx
!= 0 || DwarfVersion
>= 5)
1786 if (std::optional
<const char *> DirName
= dwarf::toString(
1788 .IncludeDirectories
[FileNames
[I
].DirIdx
- Offset
]))
1790 StringRef FileName
= "";
1791 if (std::optional
<const char *> FName
=
1792 dwarf::toString(FileNames
[I
].Name
))
1794 assert(FileName
!= "");
1795 std::optional
<MD5::MD5Result
> Checksum
;
1796 if (DwarfVersion
>= 5 && LineTable
->Prologue
.ContentTypes
.HasMD5
)
1797 Checksum
= LineTable
->Prologue
.FileNames
[I
].Checksum
;
1798 cantFail(getDwarfFile(Dir
, FileName
, 0, Checksum
, std::nullopt
, CUID
,
1804 bool BinaryContext::shouldEmit(const BinaryFunction
&Function
) const {
1805 if (Function
.isPseudo())
1808 if (opts::processAllFunctions())
1811 if (Function
.isIgnored())
1814 // In relocation mode we will emit non-simple functions with CFG.
1815 // If the function does not have a CFG it should be marked as ignored.
1816 return HasRelocations
|| Function
.isSimple();
1819 void BinaryContext::dump(const MCInst
&Inst
) const {
1820 if (LLVM_UNLIKELY(!InstPrinter
)) {
1821 dbgs() << "Cannot dump for InstPrinter is not initialized.\n";
1824 InstPrinter
->printInst(&Inst
, 0, "", *STI
, dbgs());
1828 void BinaryContext::printCFI(raw_ostream
&OS
, const MCCFIInstruction
&Inst
) {
1829 uint32_t Operation
= Inst
.getOperation();
1830 switch (Operation
) {
1831 case MCCFIInstruction::OpSameValue
:
1832 OS
<< "OpSameValue Reg" << Inst
.getRegister();
1834 case MCCFIInstruction::OpRememberState
:
1835 OS
<< "OpRememberState";
1837 case MCCFIInstruction::OpRestoreState
:
1838 OS
<< "OpRestoreState";
1840 case MCCFIInstruction::OpOffset
:
1841 OS
<< "OpOffset Reg" << Inst
.getRegister() << " " << Inst
.getOffset();
1843 case MCCFIInstruction::OpDefCfaRegister
:
1844 OS
<< "OpDefCfaRegister Reg" << Inst
.getRegister();
1846 case MCCFIInstruction::OpDefCfaOffset
:
1847 OS
<< "OpDefCfaOffset " << Inst
.getOffset();
1849 case MCCFIInstruction::OpDefCfa
:
1850 OS
<< "OpDefCfa Reg" << Inst
.getRegister() << " " << Inst
.getOffset();
1852 case MCCFIInstruction::OpRelOffset
:
1853 OS
<< "OpRelOffset Reg" << Inst
.getRegister() << " " << Inst
.getOffset();
1855 case MCCFIInstruction::OpAdjustCfaOffset
:
1856 OS
<< "OfAdjustCfaOffset " << Inst
.getOffset();
1858 case MCCFIInstruction::OpEscape
:
1861 case MCCFIInstruction::OpRestore
:
1862 OS
<< "OpRestore Reg" << Inst
.getRegister();
1864 case MCCFIInstruction::OpUndefined
:
1865 OS
<< "OpUndefined Reg" << Inst
.getRegister();
1867 case MCCFIInstruction::OpRegister
:
1868 OS
<< "OpRegister Reg" << Inst
.getRegister() << " Reg"
1869 << Inst
.getRegister2();
1871 case MCCFIInstruction::OpWindowSave
:
1872 OS
<< "OpWindowSave";
1874 case MCCFIInstruction::OpGnuArgsSize
:
1875 OS
<< "OpGnuArgsSize";
1878 OS
<< "Op#" << Operation
;
1883 MarkerSymType
BinaryContext::getMarkerType(const SymbolRef
&Symbol
) const {
1884 // For aarch64 and riscv, the ABI defines mapping symbols so we identify data
1885 // in the code section (see IHI0056B). $x identifies a symbol starting code or
1886 // the end of a data chunk inside code, $d identifies start of data.
1887 if (isX86() || ELFSymbolRef(Symbol
).getSize())
1888 return MarkerSymType::NONE
;
1890 Expected
<StringRef
> NameOrError
= Symbol
.getName();
1891 Expected
<object::SymbolRef::Type
> TypeOrError
= Symbol
.getType();
1893 if (!TypeOrError
|| !NameOrError
)
1894 return MarkerSymType::NONE
;
1896 if (*TypeOrError
!= SymbolRef::ST_Unknown
)
1897 return MarkerSymType::NONE
;
1899 if (*NameOrError
== "$x" || NameOrError
->starts_with("$x."))
1900 return MarkerSymType::CODE
;
1903 if (isRISCV() && NameOrError
->starts_with("$x"))
1904 return MarkerSymType::CODE
;
1906 if (*NameOrError
== "$d" || NameOrError
->starts_with("$d."))
1907 return MarkerSymType::DATA
;
1909 return MarkerSymType::NONE
;
1912 bool BinaryContext::isMarker(const SymbolRef
&Symbol
) const {
1913 return getMarkerType(Symbol
) != MarkerSymType::NONE
;
1916 static void printDebugInfo(raw_ostream
&OS
, const MCInst
&Instruction
,
1917 const BinaryFunction
*Function
,
1918 DWARFContext
*DwCtx
) {
1919 DebugLineTableRowRef RowRef
=
1920 DebugLineTableRowRef::fromSMLoc(Instruction
.getLoc());
1921 if (RowRef
== DebugLineTableRowRef::NULL_ROW
)
1924 const DWARFDebugLine::LineTable
*LineTable
;
1925 if (Function
&& Function
->getDWARFUnit() &&
1926 Function
->getDWARFUnit()->getOffset() == RowRef
.DwCompileUnitIndex
) {
1927 LineTable
= Function
->getDWARFLineTable();
1929 LineTable
= DwCtx
->getLineTableForUnit(
1930 DwCtx
->getCompileUnitForOffset(RowRef
.DwCompileUnitIndex
));
1932 assert(LineTable
&& "line table expected for instruction with debug info");
1934 const DWARFDebugLine::Row
&Row
= LineTable
->Rows
[RowRef
.RowIndex
- 1];
1935 StringRef FileName
= "";
1936 if (std::optional
<const char *> FName
=
1937 dwarf::toString(LineTable
->Prologue
.FileNames
[Row
.File
- 1].Name
))
1939 OS
<< " # debug line " << FileName
<< ":" << Row
.Line
;
1941 OS
<< ":" << Row
.Column
;
1942 if (Row
.Discriminator
)
1943 OS
<< " discriminator:" << Row
.Discriminator
;
1946 void BinaryContext::printInstruction(raw_ostream
&OS
, const MCInst
&Instruction
,
1948 const BinaryFunction
*Function
,
1949 bool PrintMCInst
, bool PrintMemData
,
1950 bool PrintRelocations
,
1951 StringRef Endl
) const {
1952 OS
<< format(" %08" PRIx64
": ", Offset
);
1953 if (MIB
->isCFI(Instruction
)) {
1954 uint32_t Offset
= Instruction
.getOperand(0).getImm();
1955 OS
<< "\t!CFI\t$" << Offset
<< "\t; ";
1957 printCFI(OS
, *Function
->getCFIFor(Instruction
));
1961 if (std::optional
<uint32_t> DynamicID
=
1962 MIB
->getDynamicBranchID(Instruction
)) {
1963 OS
<< "\tjit\t" << MIB
->getTargetSymbol(Instruction
)->getName()
1964 << " # ID: " << DynamicID
;
1966 InstPrinter
->printInst(&Instruction
, 0, "", *STI
, OS
);
1968 if (MIB
->isCall(Instruction
)) {
1969 if (MIB
->isTailCall(Instruction
))
1970 OS
<< " # TAILCALL ";
1971 if (MIB
->isInvoke(Instruction
)) {
1972 const std::optional
<MCPlus::MCLandingPad
> EHInfo
=
1973 MIB
->getEHInfo(Instruction
);
1974 OS
<< " # handler: ";
1976 OS
<< *EHInfo
->first
;
1979 OS
<< "; action: " << EHInfo
->second
;
1980 const int64_t GnuArgsSize
= MIB
->getGnuArgsSize(Instruction
);
1981 if (GnuArgsSize
>= 0)
1982 OS
<< "; GNU_args_size = " << GnuArgsSize
;
1984 } else if (MIB
->isIndirectBranch(Instruction
)) {
1985 if (uint64_t JTAddress
= MIB
->getJumpTable(Instruction
)) {
1986 OS
<< " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress
);
1988 OS
<< " # UNKNOWN CONTROL FLOW";
1991 if (std::optional
<uint32_t> Offset
= MIB
->getOffset(Instruction
))
1992 OS
<< " # Offset: " << *Offset
;
1993 if (std::optional
<uint32_t> Size
= MIB
->getSize(Instruction
))
1994 OS
<< " # Size: " << *Size
;
1995 if (MCSymbol
*Label
= MIB
->getInstLabel(Instruction
))
1996 OS
<< " # Label: " << *Label
;
1998 MIB
->printAnnotations(Instruction
, OS
);
2000 if (opts::PrintDebugInfo
)
2001 printDebugInfo(OS
, Instruction
, Function
, DwCtx
.get());
2003 if ((opts::PrintRelocations
|| PrintRelocations
) && Function
) {
2004 const uint64_t Size
= computeCodeSize(&Instruction
, &Instruction
+ 1);
2005 Function
->printRelocations(OS
, Offset
, Size
);
2011 Instruction
.dump_pretty(OS
, InstPrinter
.get());
2016 std::optional
<uint64_t>
2017 BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress
,
2018 uint64_t FileOffset
) const {
2019 // Find a segment with a matching file offset.
2020 for (auto &KV
: SegmentMapInfo
) {
2021 const SegmentInfo
&SegInfo
= KV
.second
;
2022 // FileOffset is got from perf event,
2023 // and it is equal to alignDown(SegInfo.FileOffset, pagesize).
2024 // If the pagesize is not equal to SegInfo.Alignment.
2025 // FileOffset and SegInfo.FileOffset should be aligned first,
2026 // and then judge whether they are equal.
2027 if (alignDown(SegInfo
.FileOffset
, SegInfo
.Alignment
) ==
2028 alignDown(FileOffset
, SegInfo
.Alignment
)) {
2029 // The function's offset from base address in VAS is aligned by pagesize
2030 // instead of SegInfo.Alignment. Pagesize can't be got from perf events.
2031 // However, The ELF document says that SegInfo.FileOffset should equal
2032 // to SegInfo.Address, modulo the pagesize.
2033 // Reference: https://refspecs.linuxfoundation.org/elf/elf.pdf
2035 // So alignDown(SegInfo.Address, pagesize) can be calculated by:
2036 // alignDown(SegInfo.Address, pagesize)
2037 // = SegInfo.Address - (SegInfo.Address % pagesize)
2038 // = SegInfo.Address - (SegInfo.FileOffset % pagesize)
2039 // = SegInfo.Address - SegInfo.FileOffset +
2040 // alignDown(SegInfo.FileOffset, pagesize)
2041 // = SegInfo.Address - SegInfo.FileOffset + FileOffset
2042 return MMapAddress
- (SegInfo
.Address
- SegInfo
.FileOffset
+ FileOffset
);
2046 return std::nullopt
;
2049 ErrorOr
<BinarySection
&> BinaryContext::getSectionForAddress(uint64_t Address
) {
2050 auto SI
= AddressToSection
.upper_bound(Address
);
2051 if (SI
!= AddressToSection
.begin()) {
2053 uint64_t UpperBound
= SI
->first
+ SI
->second
->getSize();
2054 if (!SI
->second
->getSize())
2056 if (UpperBound
> Address
)
2059 return std::make_error_code(std::errc::bad_address
);
2063 BinaryContext::getSectionNameForAddress(uint64_t Address
) const {
2064 if (ErrorOr
<const BinarySection
&> Section
= getSectionForAddress(Address
))
2065 return Section
->getName();
2066 return std::make_error_code(std::errc::bad_address
);
2069 BinarySection
&BinaryContext::registerSection(BinarySection
*Section
) {
2070 auto Res
= Sections
.insert(Section
);
2072 assert(Res
.second
&& "can't register the same section twice.");
2074 // Only register allocatable sections in the AddressToSection map.
2075 if (Section
->isAllocatable() && Section
->getAddress())
2076 AddressToSection
.insert(std::make_pair(Section
->getAddress(), Section
));
2077 NameToSection
.insert(
2078 std::make_pair(std::string(Section
->getName()), Section
));
2079 if (Section
->hasSectionRef())
2080 SectionRefToBinarySection
.insert(
2081 std::make_pair(Section
->getSectionRef(), Section
));
2083 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section
<< "\n");
2087 BinarySection
&BinaryContext::registerSection(SectionRef Section
) {
2088 return registerSection(new BinarySection(*this, Section
));
2092 BinaryContext::registerSection(const Twine
&SectionName
,
2093 const BinarySection
&OriginalSection
) {
2094 return registerSection(
2095 new BinarySection(*this, SectionName
, OriginalSection
));
2099 BinaryContext::registerOrUpdateSection(const Twine
&Name
, unsigned ELFType
,
2100 unsigned ELFFlags
, uint8_t *Data
,
2101 uint64_t Size
, unsigned Alignment
) {
2102 auto NamedSections
= getSectionByName(Name
);
2103 if (NamedSections
.begin() != NamedSections
.end()) {
2104 assert(std::next(NamedSections
.begin()) == NamedSections
.end() &&
2105 "can only update unique sections");
2106 BinarySection
*Section
= NamedSections
.begin()->second
;
2108 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section
<< " -> ");
2109 const bool Flag
= Section
->isAllocatable();
2111 Section
->update(Data
, Size
, Alignment
, ELFType
, ELFFlags
);
2112 LLVM_DEBUG(dbgs() << *Section
<< "\n");
2113 // FIXME: Fix section flags/attributes for MachO.
2115 assert(Flag
== Section
->isAllocatable() &&
2116 "can't change section allocation status");
2120 return registerSection(
2121 new BinarySection(*this, Name
, Data
, Size
, Alignment
, ELFType
, ELFFlags
));
2124 void BinaryContext::deregisterSectionName(const BinarySection
&Section
) {
2125 auto NameRange
= NameToSection
.equal_range(Section
.getName().str());
2126 while (NameRange
.first
!= NameRange
.second
) {
2127 if (NameRange
.first
->second
== &Section
) {
2128 NameToSection
.erase(NameRange
.first
);
2135 void BinaryContext::deregisterUnusedSections() {
2136 ErrorOr
<BinarySection
&> AbsSection
= getUniqueSectionByName("<absolute>");
2137 for (auto SI
= Sections
.begin(); SI
!= Sections
.end();) {
2138 BinarySection
*Section
= *SI
;
2139 // We check getOutputData() instead of getOutputSize() because sometimes
2140 // zero-sized .text.cold sections are allocated.
2141 if (Section
->hasSectionRef() || Section
->getOutputData() ||
2142 (AbsSection
&& Section
== &AbsSection
.get())) {
2147 LLVM_DEBUG(dbgs() << "LLVM-DEBUG: deregistering " << Section
->getName()
2149 deregisterSectionName(*Section
);
2150 SI
= Sections
.erase(SI
);
2155 bool BinaryContext::deregisterSection(BinarySection
&Section
) {
2156 BinarySection
*SectionPtr
= &Section
;
2157 auto Itr
= Sections
.find(SectionPtr
);
2158 if (Itr
!= Sections
.end()) {
2159 auto Range
= AddressToSection
.equal_range(SectionPtr
->getAddress());
2160 while (Range
.first
!= Range
.second
) {
2161 if (Range
.first
->second
== SectionPtr
) {
2162 AddressToSection
.erase(Range
.first
);
2168 deregisterSectionName(*SectionPtr
);
2169 Sections
.erase(Itr
);
2176 void BinaryContext::renameSection(BinarySection
&Section
,
2177 const Twine
&NewName
) {
2178 auto Itr
= Sections
.find(&Section
);
2179 assert(Itr
!= Sections
.end() && "Section must exist to be renamed.");
2180 Sections
.erase(Itr
);
2182 deregisterSectionName(Section
);
2184 Section
.Name
= NewName
.str();
2185 Section
.setOutputName(Section
.Name
);
2187 NameToSection
.insert(std::make_pair(Section
.Name
, &Section
));
2189 // Reinsert with the new name.
2190 Sections
.insert(&Section
);
2193 void BinaryContext::printSections(raw_ostream
&OS
) const {
2194 for (BinarySection
*const &Section
: Sections
)
2195 OS
<< "BOLT-INFO: " << *Section
<< "\n";
2198 BinarySection
&BinaryContext::absoluteSection() {
2199 if (ErrorOr
<BinarySection
&> Section
= getUniqueSectionByName("<absolute>"))
2201 return registerOrUpdateSection("<absolute>", ELF::SHT_NULL
, 0u);
2204 ErrorOr
<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address
,
2205 size_t Size
) const {
2206 const ErrorOr
<const BinarySection
&> Section
= getSectionForAddress(Address
);
2208 return std::make_error_code(std::errc::bad_address
);
2210 if (Section
->isVirtual())
2213 DataExtractor
DE(Section
->getContents(), AsmInfo
->isLittleEndian(),
2214 AsmInfo
->getCodePointerSize());
2215 auto ValueOffset
= static_cast<uint64_t>(Address
- Section
->getAddress());
2216 return DE
.getUnsigned(&ValueOffset
, Size
);
2219 ErrorOr
<int64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address
,
2220 size_t Size
) const {
2221 const ErrorOr
<const BinarySection
&> Section
= getSectionForAddress(Address
);
2223 return std::make_error_code(std::errc::bad_address
);
2225 if (Section
->isVirtual())
2228 DataExtractor
DE(Section
->getContents(), AsmInfo
->isLittleEndian(),
2229 AsmInfo
->getCodePointerSize());
2230 auto ValueOffset
= static_cast<uint64_t>(Address
- Section
->getAddress());
2231 return DE
.getSigned(&ValueOffset
, Size
);
2234 void BinaryContext::addRelocation(uint64_t Address
, MCSymbol
*Symbol
,
2235 uint64_t Type
, uint64_t Addend
,
2237 ErrorOr
<BinarySection
&> Section
= getSectionForAddress(Address
);
2238 assert(Section
&& "cannot find section for address");
2239 Section
->addRelocation(Address
- Section
->getAddress(), Symbol
, Type
, Addend
,
2243 void BinaryContext::addDynamicRelocation(uint64_t Address
, MCSymbol
*Symbol
,
2244 uint64_t Type
, uint64_t Addend
,
2246 ErrorOr
<BinarySection
&> Section
= getSectionForAddress(Address
);
2247 assert(Section
&& "cannot find section for address");
2248 Section
->addDynamicRelocation(Address
- Section
->getAddress(), Symbol
, Type
,
2252 bool BinaryContext::removeRelocationAt(uint64_t Address
) {
2253 ErrorOr
<BinarySection
&> Section
= getSectionForAddress(Address
);
2254 assert(Section
&& "cannot find section for address");
2255 return Section
->removeRelocationAt(Address
- Section
->getAddress());
2258 const Relocation
*BinaryContext::getRelocationAt(uint64_t Address
) const {
2259 ErrorOr
<const BinarySection
&> Section
= getSectionForAddress(Address
);
2263 return Section
->getRelocationAt(Address
- Section
->getAddress());
2267 BinaryContext::getDynamicRelocationAt(uint64_t Address
) const {
2268 ErrorOr
<const BinarySection
&> Section
= getSectionForAddress(Address
);
2272 return Section
->getDynamicRelocationAt(Address
- Section
->getAddress());
2275 void BinaryContext::markAmbiguousRelocations(BinaryData
&BD
,
2276 const uint64_t Address
) {
2277 auto setImmovable
= [&](BinaryData
&BD
) {
2278 BinaryData
*Root
= BD
.getAtomicRoot();
2279 LLVM_DEBUG(if (Root
->isMoveable()) {
2280 dbgs() << "BOLT-DEBUG: setting " << *Root
<< " as immovable "
2281 << "due to ambiguous relocation referencing 0x"
2282 << Twine::utohexstr(Address
) << '\n';
2284 Root
->setIsMoveable(false);
2287 if (Address
== BD
.getAddress()) {
2290 // Set previous symbol as immovable
2291 BinaryData
*Prev
= getBinaryDataContainingAddress(Address
- 1);
2292 if (Prev
&& Prev
->getEndAddress() == BD
.getAddress())
2293 setImmovable(*Prev
);
2296 if (Address
== BD
.getEndAddress()) {
2299 // Set next symbol as immovable
2300 BinaryData
*Next
= getBinaryDataContainingAddress(BD
.getEndAddress());
2301 if (Next
&& Next
->getAddress() == BD
.getEndAddress())
2302 setImmovable(*Next
);
2306 BinaryFunction
*BinaryContext::getFunctionForSymbol(const MCSymbol
*Symbol
,
2307 uint64_t *EntryDesc
) {
2308 std::shared_lock
<llvm::sys::RWMutex
> Lock(SymbolToFunctionMapMutex
);
2309 auto BFI
= SymbolToFunctionMap
.find(Symbol
);
2310 if (BFI
== SymbolToFunctionMap
.end())
2313 BinaryFunction
*BF
= BFI
->second
;
2315 *EntryDesc
= BF
->getEntryIDForSymbol(Symbol
);
2321 BinaryContext::generateBugReportMessage(StringRef Message
,
2322 const BinaryFunction
&Function
) const {
2324 raw_string_ostream
SS(Msg
);
2325 SS
<< "=======================================\n";
2326 SS
<< "BOLT is unable to proceed because it couldn't properly understand "
2328 SS
<< "If you are running the most recent version of BOLT, you may "
2330 "report this and paste this dump.\nPlease check that there is no "
2331 "sensitive contents being shared in this dump.\n";
2332 SS
<< "\nOffending function: " << Function
.getPrintName() << "\n\n";
2333 ScopedPrinter
SP(SS
);
2334 SP
.printBinaryBlock("Function contents", *Function
.getData());
2336 const_cast<BinaryFunction
&>(Function
).print(SS
, "");
2337 SS
<< "ERROR: " << Message
;
2338 SS
<< "\n=======================================\n";
2343 BinaryContext::createInjectedBinaryFunction(const std::string
&Name
,
2345 InjectedBinaryFunctions
.push_back(new BinaryFunction(Name
, *this, IsSimple
));
2346 BinaryFunction
*BF
= InjectedBinaryFunctions
.back();
2347 setSymbolToFunctionMap(BF
->getSymbol(), BF
);
2348 BF
->CurrentState
= BinaryFunction::State::CFG
;
2352 std::pair
<size_t, size_t>
2353 BinaryContext::calculateEmittedSize(BinaryFunction
&BF
, bool FixBranches
) {
2354 // Adjust branch instruction to match the current layout.
2358 // Create local MC context to isolate the effect of ephemeral code emission.
2359 IndependentCodeEmitter MCEInstance
= createIndependentMCCodeEmitter();
2360 MCContext
*LocalCtx
= MCEInstance
.LocalCtx
.get();
2362 TheTarget
->createMCAsmBackend(*STI
, *MRI
, MCTargetOptions());
2364 SmallString
<256> Code
;
2365 raw_svector_ostream
VecOS(Code
);
2367 std::unique_ptr
<MCObjectWriter
> OW
= MAB
->createObjectWriter(VecOS
);
2368 std::unique_ptr
<MCStreamer
> Streamer(TheTarget
->createMCObjectStreamer(
2369 *TheTriple
, *LocalCtx
, std::unique_ptr
<MCAsmBackend
>(MAB
), std::move(OW
),
2370 std::unique_ptr
<MCCodeEmitter
>(MCEInstance
.MCE
.release()), *STI
));
2372 Streamer
->initSections(false, *STI
);
2374 MCSection
*Section
= MCEInstance
.LocalMOFI
->getTextSection();
2375 Section
->setHasInstructions(true);
2377 // Create symbols in the LocalCtx so that they get destroyed with it.
2378 MCSymbol
*StartLabel
= LocalCtx
->createTempSymbol();
2379 MCSymbol
*EndLabel
= LocalCtx
->createTempSymbol();
2381 Streamer
->switchSection(Section
);
2382 Streamer
->emitLabel(StartLabel
);
2383 emitFunctionBody(*Streamer
, BF
, BF
.getLayout().getMainFragment(),
2384 /*EmitCodeOnly=*/true);
2385 Streamer
->emitLabel(EndLabel
);
2387 using LabelRange
= std::pair
<const MCSymbol
*, const MCSymbol
*>;
2388 SmallVector
<LabelRange
> SplitLabels
;
2389 for (FunctionFragment
&FF
: BF
.getLayout().getSplitFragments()) {
2390 MCSymbol
*const SplitStartLabel
= LocalCtx
->createTempSymbol();
2391 MCSymbol
*const SplitEndLabel
= LocalCtx
->createTempSymbol();
2392 SplitLabels
.emplace_back(SplitStartLabel
, SplitEndLabel
);
2394 MCSectionELF
*const SplitSection
= LocalCtx
->getELFSection(
2395 BF
.getCodeSectionName(FF
.getFragmentNum()), ELF::SHT_PROGBITS
,
2396 ELF::SHF_EXECINSTR
| ELF::SHF_ALLOC
);
2397 SplitSection
->setHasInstructions(true);
2398 Streamer
->switchSection(SplitSection
);
2400 Streamer
->emitLabel(SplitStartLabel
);
2401 emitFunctionBody(*Streamer
, BF
, FF
, /*EmitCodeOnly=*/true);
2402 Streamer
->emitLabel(SplitEndLabel
);
2405 MCAssembler
&Assembler
=
2406 static_cast<MCObjectStreamer
*>(Streamer
.get())->getAssembler();
2409 // Obtain fragment sizes.
2410 std::vector
<uint64_t> FragmentSizes
;
2411 // Main fragment size.
2412 const uint64_t HotSize
= Assembler
.getSymbolOffset(*EndLabel
) -
2413 Assembler
.getSymbolOffset(*StartLabel
);
2414 FragmentSizes
.push_back(HotSize
);
2415 // Split fragment sizes.
2416 uint64_t ColdSize
= 0;
2417 for (const auto &Labels
: SplitLabels
) {
2418 uint64_t Size
= Assembler
.getSymbolOffset(*Labels
.second
) -
2419 Assembler
.getSymbolOffset(*Labels
.first
);
2420 FragmentSizes
.push_back(Size
);
2424 // Populate new start and end offsets of each basic block.
2425 uint64_t FragmentIndex
= 0;
2426 for (FunctionFragment
&FF
: BF
.getLayout().fragments()) {
2427 BinaryBasicBlock
*PrevBB
= nullptr;
2428 for (BinaryBasicBlock
*BB
: FF
) {
2429 const uint64_t BBStartOffset
=
2430 Assembler
.getSymbolOffset(*(BB
->getLabel()));
2431 BB
->setOutputStartAddress(BBStartOffset
);
2433 PrevBB
->setOutputEndAddress(BBStartOffset
);
2437 PrevBB
->setOutputEndAddress(FragmentSizes
[FragmentIndex
]);
2441 // Clean-up the effect of the code emission.
2442 for (const MCSymbol
&Symbol
: Assembler
.symbols()) {
2443 MCSymbol
*MutableSymbol
= const_cast<MCSymbol
*>(&Symbol
);
2444 MutableSymbol
->setUndefined();
2445 MutableSymbol
->setIsRegistered(false);
2448 return std::make_pair(HotSize
, ColdSize
);
2451 bool BinaryContext::validateInstructionEncoding(
2452 ArrayRef
<uint8_t> InputSequence
) const {
2455 DisAsm
->getInstruction(Inst
, InstSize
, InputSequence
, 0, nulls());
2456 assert(InstSize
== InputSequence
.size() &&
2457 "Disassembled instruction size does not match the sequence.");
2459 SmallString
<256> Code
;
2460 SmallVector
<MCFixup
, 4> Fixups
;
2462 MCE
->encodeInstruction(Inst
, Code
, Fixups
, *STI
);
2463 auto OutputSequence
= ArrayRef
<uint8_t>((uint8_t *)Code
.data(), Code
.size());
2464 if (InputSequence
!= OutputSequence
) {
2465 if (opts::Verbosity
> 1) {
2466 this->errs() << "BOLT-WARNING: mismatched encoding detected\n"
2467 << " input: " << InputSequence
<< '\n'
2468 << " output: " << OutputSequence
<< '\n';
2476 uint64_t BinaryContext::getHotThreshold() const {
2477 static uint64_t Threshold
= 0;
2478 if (Threshold
== 0) {
2479 Threshold
= std::max(
2480 (uint64_t)opts::ExecutionCountThreshold
,
2481 NumProfiledFuncs
? SumExecutionCount
/ (2 * NumProfiledFuncs
) : 1);
2486 BinaryFunction
*BinaryContext::getBinaryFunctionContainingAddress(
2487 uint64_t Address
, bool CheckPastEnd
, bool UseMaxSize
) {
2488 auto FI
= BinaryFunctions
.upper_bound(Address
);
2489 if (FI
== BinaryFunctions
.begin())
2493 const uint64_t UsedSize
=
2494 UseMaxSize
? FI
->second
.getMaxSize() : FI
->second
.getSize();
2496 if (Address
>= FI
->first
+ UsedSize
+ (CheckPastEnd
? 1 : 0))
2502 BinaryFunction
*BinaryContext::getBinaryFunctionAtAddress(uint64_t Address
) {
2503 // First, try to find a function starting at the given address. If the
2504 // function was folded, this will get us the original folded function if it
2505 // wasn't removed from the list, e.g. in non-relocation mode.
2506 auto BFI
= BinaryFunctions
.find(Address
);
2507 if (BFI
!= BinaryFunctions
.end())
2508 return &BFI
->second
;
2510 // We might have folded the function matching the object at the given
2511 // address. In such case, we look for a function matching the symbol
2512 // registered at the original address. The new function (the one that the
2513 // original was folded into) will hold the symbol.
2514 if (const BinaryData
*BD
= getBinaryDataAtAddress(Address
)) {
2515 uint64_t EntryID
= 0;
2516 BinaryFunction
*BF
= getFunctionForSymbol(BD
->getSymbol(), &EntryID
);
2517 if (BF
&& EntryID
== 0)
2523 /// Deregister JumpTable registered at a given \p Address and delete it.
2524 void BinaryContext::deleteJumpTable(uint64_t Address
) {
2525 assert(JumpTables
.count(Address
) && "Must have a jump table at address");
2526 JumpTable
*JT
= JumpTables
.at(Address
);
2527 for (BinaryFunction
*Parent
: JT
->Parents
)
2528 Parent
->JumpTables
.erase(Address
);
2529 JumpTables
.erase(Address
);
2533 DebugAddressRangesVector
BinaryContext::translateModuleAddressRanges(
2534 const DWARFAddressRangesVector
&InputRanges
) const {
2535 DebugAddressRangesVector OutputRanges
;
2537 for (const DWARFAddressRange Range
: InputRanges
) {
2538 auto BFI
= BinaryFunctions
.lower_bound(Range
.LowPC
);
2539 while (BFI
!= BinaryFunctions
.end()) {
2540 const BinaryFunction
&Function
= BFI
->second
;
2541 if (Function
.getAddress() >= Range
.HighPC
)
2543 const DebugAddressRangesVector FunctionRanges
=
2544 Function
.getOutputAddressRanges();
2545 llvm::move(FunctionRanges
, std::back_inserter(OutputRanges
));
2546 std::advance(BFI
, 1);
2550 return OutputRanges
;