1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the BinaryContext class.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Core/BinaryContext.h"
14 #include "bolt/Core/BinaryEmitter.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "bolt/Utils/Utils.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
21 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
22 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
23 #include "llvm/MC/MCAssembler.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
26 #include "llvm/MC/MCInstPrinter.h"
27 #include "llvm/MC/MCObjectStreamer.h"
28 #include "llvm/MC/MCObjectWriter.h"
29 #include "llvm/MC/MCRegisterInfo.h"
30 #include "llvm/MC/MCSectionELF.h"
31 #include "llvm/MC/MCStreamer.h"
32 #include "llvm/MC/MCSubtargetInfo.h"
33 #include "llvm/MC/MCSymbol.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Error.h"
36 #include "llvm/Support/Regex.h"
40 #include <unordered_set>
45 #define DEBUG_TYPE "bolt"
49 cl::opt
<bool> NoHugePages("no-huge-pages",
50 cl::desc("use regular size pages for code alignment"),
51 cl::Hidden
, cl::cat(BoltCategory
));
54 PrintDebugInfo("print-debug-info",
55 cl::desc("print debug info when printing functions"),
58 cl::cat(BoltCategory
));
60 cl::opt
<bool> PrintRelocations(
62 cl::desc("print relocations when printing functions/objects"), cl::Hidden
,
63 cl::cat(BoltCategory
));
66 PrintMemData("print-mem-data",
67 cl::desc("print memory data annotations when printing functions"),
70 cl::cat(BoltCategory
));
72 cl::opt
<std::string
> CompDirOverride(
74 cl::desc("overrides DW_AT_comp_dir, and provides an alternative base "
75 "location, which is used with DW_AT_dwo_name to construct a path "
77 cl::Hidden
, cl::init(""), cl::cat(BoltCategory
));
83 char BOLTError::ID
= 0;
85 BOLTError::BOLTError(bool IsFatal
, const Twine
&S
)
86 : IsFatal(IsFatal
), Msg(S
.str()) {}
88 void BOLTError::log(raw_ostream
&OS
) const {
91 StringRef ErrMsg
= StringRef(Msg
);
92 // Prepend our error prefix if it is missing
96 if (!ErrMsg
.starts_with("BOLT-ERROR"))
102 std::error_code
BOLTError::convertToErrorCode() const {
103 return inconvertibleErrorCode();
106 Error
createNonFatalBOLTError(const Twine
&S
) {
107 return make_error
<BOLTError
>(/*IsFatal*/ false, S
);
110 Error
createFatalBOLTError(const Twine
&S
) {
111 return make_error
<BOLTError
>(/*IsFatal*/ true, S
);
114 void BinaryContext::logBOLTErrorsAndQuitOnFatal(Error E
) {
115 handleAllErrors(Error(std::move(E
)), [&](const BOLTError
&E
) {
116 if (!E
.getMessage().empty())
123 BinaryContext::BinaryContext(std::unique_ptr
<MCContext
> Ctx
,
124 std::unique_ptr
<DWARFContext
> DwCtx
,
125 std::unique_ptr
<Triple
> TheTriple
,
126 std::shared_ptr
<orc::SymbolStringPool
> SSP
,
127 const Target
*TheTarget
, std::string TripleName
,
128 std::unique_ptr
<MCCodeEmitter
> MCE
,
129 std::unique_ptr
<MCObjectFileInfo
> MOFI
,
130 std::unique_ptr
<const MCAsmInfo
> AsmInfo
,
131 std::unique_ptr
<const MCInstrInfo
> MII
,
132 std::unique_ptr
<const MCSubtargetInfo
> STI
,
133 std::unique_ptr
<MCInstPrinter
> InstPrinter
,
134 std::unique_ptr
<const MCInstrAnalysis
> MIA
,
135 std::unique_ptr
<MCPlusBuilder
> MIB
,
136 std::unique_ptr
<const MCRegisterInfo
> MRI
,
137 std::unique_ptr
<MCDisassembler
> DisAsm
,
138 JournalingStreams Logger
)
139 : Ctx(std::move(Ctx
)), DwCtx(std::move(DwCtx
)),
140 TheTriple(std::move(TheTriple
)), SSP(std::move(SSP
)),
141 TheTarget(TheTarget
), TripleName(TripleName
), MCE(std::move(MCE
)),
142 MOFI(std::move(MOFI
)), AsmInfo(std::move(AsmInfo
)), MII(std::move(MII
)),
143 STI(std::move(STI
)), InstPrinter(std::move(InstPrinter
)),
144 MIA(std::move(MIA
)), MIB(std::move(MIB
)), MRI(std::move(MRI
)),
145 DisAsm(std::move(DisAsm
)), Logger(Logger
), InitialDynoStats(isAArch64()) {
146 RegularPageSize
= isAArch64() ? RegularPageSizeAArch64
: RegularPageSizeX86
;
147 PageAlign
= opts::NoHugePages
? RegularPageSize
: HugePageSize
;
150 BinaryContext::~BinaryContext() {
151 for (BinarySection
*Section
: Sections
)
153 for (BinaryFunction
*InjectedFunction
: InjectedBinaryFunctions
)
154 delete InjectedFunction
;
155 for (std::pair
<const uint64_t, JumpTable
*> JTI
: JumpTables
)
160 /// Create BinaryContext for a given architecture \p ArchName and
161 /// triple \p TripleName.
162 Expected
<std::unique_ptr
<BinaryContext
>> BinaryContext::createBinaryContext(
163 Triple TheTriple
, std::shared_ptr
<orc::SymbolStringPool
> SSP
,
164 StringRef InputFileName
, SubtargetFeatures
*Features
, bool IsPIC
,
165 std::unique_ptr
<DWARFContext
> DwCtx
, JournalingStreams Logger
) {
166 StringRef ArchName
= "";
167 std::string FeaturesStr
= "";
168 switch (TheTriple
.getArch()) {
169 case llvm::Triple::x86_64
:
171 return createFatalBOLTError(
172 "x86_64 target does not use SubtargetFeatures");
174 FeaturesStr
= "+nopl";
176 case llvm::Triple::aarch64
:
178 return createFatalBOLTError(
179 "AArch64 target does not use SubtargetFeatures");
180 ArchName
= "aarch64";
181 FeaturesStr
= "+all";
183 case llvm::Triple::riscv64
: {
184 ArchName
= "riscv64";
186 return createFatalBOLTError("RISCV target needs SubtargetFeatures");
187 // We rely on relaxation for some transformations (e.g., promoting all calls
188 // to PseudoCALL and then making JITLink relax them). Since the relax
189 // feature is not stored in the object file, we manually enable it.
190 Features
->AddFeature("relax");
191 FeaturesStr
= Features
->getString();
195 return createStringError(std::errc::not_supported
,
196 "BOLT-ERROR: Unrecognized machine in ELF file");
199 const std::string TripleName
= TheTriple
.str();
202 const Target
*TheTarget
=
203 TargetRegistry::lookupTarget(std::string(ArchName
), TheTriple
, Error
);
205 return createStringError(make_error_code(std::errc::not_supported
),
206 Twine("BOLT-ERROR: ", Error
));
208 std::unique_ptr
<const MCRegisterInfo
> MRI(
209 TheTarget
->createMCRegInfo(TripleName
));
211 return createStringError(
212 make_error_code(std::errc::not_supported
),
213 Twine("BOLT-ERROR: no register info for target ", TripleName
));
215 // Set up disassembler.
216 std::unique_ptr
<MCAsmInfo
> AsmInfo(
217 TheTarget
->createMCAsmInfo(*MRI
, TripleName
, MCTargetOptions()));
219 return createStringError(
220 make_error_code(std::errc::not_supported
),
221 Twine("BOLT-ERROR: no assembly info for target ", TripleName
));
222 // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump
223 // we want to emit such names as using @PLT without double quotes to convey
224 // variant kind to the assembler. BOLT doesn't rely on the linker so we can
225 // override the default AsmInfo behavior to emit names the way we want.
226 AsmInfo
->setAllowAtInName(true);
228 std::unique_ptr
<const MCSubtargetInfo
> STI(
229 TheTarget
->createMCSubtargetInfo(TripleName
, "", FeaturesStr
));
231 return createStringError(
232 make_error_code(std::errc::not_supported
),
233 Twine("BOLT-ERROR: no subtarget info for target ", TripleName
));
235 std::unique_ptr
<const MCInstrInfo
> MII(TheTarget
->createMCInstrInfo());
237 return createStringError(
238 make_error_code(std::errc::not_supported
),
239 Twine("BOLT-ERROR: no instruction info for target ", TripleName
));
241 std::unique_ptr
<MCContext
> Ctx(
242 new MCContext(TheTriple
, AsmInfo
.get(), MRI
.get(), STI
.get()));
243 std::unique_ptr
<MCObjectFileInfo
> MOFI(
244 TheTarget
->createMCObjectFileInfo(*Ctx
, IsPIC
));
245 Ctx
->setObjectFileInfo(MOFI
.get());
246 // We do not support X86 Large code model. Change this in the future.
248 if (TheTriple
.getArch() == llvm::Triple::aarch64
)
250 unsigned LSDAEncoding
=
251 Large
? dwarf::DW_EH_PE_absptr
: dwarf::DW_EH_PE_udata4
;
253 LSDAEncoding
= dwarf::DW_EH_PE_pcrel
|
254 (Large
? dwarf::DW_EH_PE_sdata8
: dwarf::DW_EH_PE_sdata4
);
257 std::unique_ptr
<MCDisassembler
> DisAsm(
258 TheTarget
->createMCDisassembler(*STI
, *Ctx
));
261 return createStringError(
262 make_error_code(std::errc::not_supported
),
263 Twine("BOLT-ERROR: no disassembler info for target ", TripleName
));
265 std::unique_ptr
<const MCInstrAnalysis
> MIA(
266 TheTarget
->createMCInstrAnalysis(MII
.get()));
268 return createStringError(
269 make_error_code(std::errc::not_supported
),
270 Twine("BOLT-ERROR: failed to create instruction analysis for target ",
273 int AsmPrinterVariant
= AsmInfo
->getAssemblerDialect();
274 std::unique_ptr
<MCInstPrinter
> InstructionPrinter(
275 TheTarget
->createMCInstPrinter(TheTriple
, AsmPrinterVariant
, *AsmInfo
,
277 if (!InstructionPrinter
)
278 return createStringError(
279 make_error_code(std::errc::not_supported
),
280 Twine("BOLT-ERROR: no instruction printer for target ", TripleName
));
281 InstructionPrinter
->setPrintImmHex(true);
283 std::unique_ptr
<MCCodeEmitter
> MCE(
284 TheTarget
->createMCCodeEmitter(*MII
, *Ctx
));
286 auto BC
= std::make_unique
<BinaryContext
>(
287 std::move(Ctx
), std::move(DwCtx
), std::make_unique
<Triple
>(TheTriple
),
288 std::move(SSP
), TheTarget
, std::string(TripleName
), std::move(MCE
),
289 std::move(MOFI
), std::move(AsmInfo
), std::move(MII
), std::move(STI
),
290 std::move(InstructionPrinter
), std::move(MIA
), nullptr, std::move(MRI
),
291 std::move(DisAsm
), Logger
);
293 BC
->LSDAEncoding
= LSDAEncoding
;
295 BC
->MAB
= std::unique_ptr
<MCAsmBackend
>(
296 BC
->TheTarget
->createMCAsmBackend(*BC
->STI
, *BC
->MRI
, MCTargetOptions()));
298 BC
->setFilename(InputFileName
);
300 BC
->HasFixedLoadAddress
= !IsPIC
;
302 BC
->SymbolicDisAsm
= std::unique_ptr
<MCDisassembler
>(
303 BC
->TheTarget
->createMCDisassembler(*BC
->STI
, *BC
->Ctx
));
305 if (!BC
->SymbolicDisAsm
)
306 return createStringError(
307 make_error_code(std::errc::not_supported
),
308 Twine("BOLT-ERROR: no disassembler info for target ", TripleName
));
310 return std::move(BC
);
313 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName
) const {
315 (SymbolName
== "__hot_start" || SymbolName
== "__hot_end"))
319 (SymbolName
== "__hot_data_start" || SymbolName
== "__hot_data_end"))
322 if (SymbolName
== "_end")
328 std::unique_ptr
<MCObjectWriter
>
329 BinaryContext::createObjectWriter(raw_pwrite_stream
&OS
) {
330 return MAB
->createObjectWriter(OS
);
333 bool BinaryContext::validateObjectNesting() const {
334 auto Itr
= BinaryDataMap
.begin();
335 auto End
= BinaryDataMap
.end();
338 auto Next
= std::next(Itr
);
339 while (Next
!= End
&&
340 Itr
->second
->getSection() == Next
->second
->getSection() &&
341 Itr
->second
->containsRange(Next
->second
->getAddress(),
342 Next
->second
->getSize())) {
343 if (Next
->second
->Parent
!= Itr
->second
) {
344 this->errs() << "BOLT-WARNING: object nesting incorrect for:\n"
345 << "BOLT-WARNING: " << *Itr
->second
<< "\n"
346 << "BOLT-WARNING: " << *Next
->second
<< "\n";
356 bool BinaryContext::validateHoles() const {
358 for (BinarySection
&Section
: sections()) {
359 for (const Relocation
&Rel
: Section
.relocations()) {
360 uint64_t RelAddr
= Rel
.Offset
+ Section
.getAddress();
361 const BinaryData
*BD
= getBinaryDataContainingAddress(RelAddr
);
364 << "BOLT-WARNING: no BinaryData found for relocation at address"
365 << " 0x" << Twine::utohexstr(RelAddr
) << " in " << Section
.getName()
368 } else if (!BD
->getAtomicRoot()) {
370 << "BOLT-WARNING: no atomic BinaryData found for relocation at "
371 << "address 0x" << Twine::utohexstr(RelAddr
) << " in "
372 << Section
.getName() << "\n";
380 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI
) {
381 const uint64_t Address
= GAI
->second
->getAddress();
382 const uint64_t Size
= GAI
->second
->getSize();
384 auto fixParents
= [&](BinaryDataMapType::iterator Itr
,
385 BinaryData
*NewParent
) {
386 BinaryData
*OldParent
= Itr
->second
->Parent
;
387 Itr
->second
->Parent
= NewParent
;
389 while (Itr
!= BinaryDataMap
.end() && OldParent
&&
390 Itr
->second
->Parent
== OldParent
) {
391 Itr
->second
->Parent
= NewParent
;
396 // Check if the previous symbol contains the newly added symbol.
397 if (GAI
!= BinaryDataMap
.begin()) {
398 BinaryData
*Prev
= std::prev(GAI
)->second
;
400 if (Prev
->getSection() == GAI
->second
->getSection() &&
401 Prev
->containsRange(Address
, Size
)) {
402 fixParents(GAI
, Prev
);
404 fixParents(GAI
, nullptr);
410 // Check if the newly added symbol contains any subsequent symbols.
412 BinaryData
*BD
= GAI
->second
->Parent
? GAI
->second
->Parent
: GAI
->second
;
413 auto Itr
= std::next(GAI
);
415 Itr
!= BinaryDataMap
.end() &&
416 BD
->containsRange(Itr
->second
->getAddress(), Itr
->second
->getSize())) {
417 Itr
->second
->Parent
= BD
;
423 iterator_range
<BinaryContext::binary_data_iterator
>
424 BinaryContext::getSubBinaryData(BinaryData
*BD
) {
425 auto Start
= std::next(BinaryDataMap
.find(BD
->getAddress()));
427 while (End
!= BinaryDataMap
.end() && BD
->isAncestorOf(End
->second
))
429 return make_range(Start
, End
);
432 std::pair
<const MCSymbol
*, uint64_t>
433 BinaryContext::handleAddressRef(uint64_t Address
, BinaryFunction
&BF
,
436 // Check if this is an access to a constant island and create bookkeeping
437 // to keep track of it and emit it later as part of this function.
438 if (MCSymbol
*IslandSym
= BF
.getOrCreateIslandAccess(Address
))
439 return std::make_pair(IslandSym
, 0);
441 // Detect custom code written in assembly that refers to arbitrary
442 // constant islands from other functions. Write this reference so we
443 // can pull this constant island and emit it as part of this function
445 auto IslandIter
= AddressToConstantIslandMap
.lower_bound(Address
);
447 if (IslandIter
!= AddressToConstantIslandMap
.begin() &&
448 (IslandIter
== AddressToConstantIslandMap
.end() ||
449 IslandIter
->first
> Address
))
452 if (IslandIter
!= AddressToConstantIslandMap
.end()) {
453 // Fall-back to referencing the original constant island in the presence
454 // of dynamic relocs, as we currently do not support cloning them.
455 // Notice: we might fail to link because of this, if the original constant
456 // island we are referring would be emitted too far away.
457 if (IslandIter
->second
->hasDynamicRelocationAtIsland()) {
458 MCSymbol
*IslandSym
=
459 IslandIter
->second
->getOrCreateIslandAccess(Address
);
461 return std::make_pair(IslandSym
, 0);
462 } else if (MCSymbol
*IslandSym
=
463 IslandIter
->second
->getOrCreateProxyIslandAccess(Address
,
465 BF
.createIslandDependency(IslandSym
, IslandIter
->second
);
466 return std::make_pair(IslandSym
, 0);
471 // Note that the address does not necessarily have to reside inside
472 // a section, it could be an absolute address too.
473 ErrorOr
<BinarySection
&> Section
= getSectionForAddress(Address
);
474 if (Section
&& Section
->isText()) {
475 if (BF
.containsAddress(Address
, /*UseMaxSize=*/isAArch64())) {
476 if (Address
!= BF
.getAddress()) {
477 // The address could potentially escape. Mark it as another entry
478 // point into the function.
479 if (opts::Verbosity
>= 1) {
480 this->outs() << "BOLT-INFO: potentially escaped address 0x"
481 << Twine::utohexstr(Address
) << " in function " << BF
484 BF
.HasInternalLabelReference
= true;
485 return std::make_pair(
486 BF
.addEntryPointAtOffset(Address
- BF
.getAddress()), 0);
489 addInterproceduralReference(&BF
, Address
);
493 // With relocations, catch jump table references outside of the basic block
494 // containing the indirect jump.
495 if (HasRelocations
) {
496 const MemoryContentsType MemType
= analyzeMemoryAt(Address
, BF
);
497 if (MemType
== MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE
&& IsPCRel
) {
498 const MCSymbol
*Symbol
=
499 getOrCreateJumpTable(BF
, Address
, JumpTable::JTT_PIC
);
501 return std::make_pair(Symbol
, 0);
505 if (BinaryData
*BD
= getBinaryDataContainingAddress(Address
))
506 return std::make_pair(BD
->getSymbol(), Address
- BD
->getAddress());
508 // TODO: use DWARF info to get size/alignment here?
509 MCSymbol
*TargetSymbol
= getOrCreateGlobalSymbol(Address
, "DATAat");
510 LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol
->getName() << '\n');
511 return std::make_pair(TargetSymbol
, 0);
514 MemoryContentsType
BinaryContext::analyzeMemoryAt(uint64_t Address
,
515 BinaryFunction
&BF
) {
517 return MemoryContentsType::UNKNOWN
;
519 ErrorOr
<BinarySection
&> Section
= getSectionForAddress(Address
);
521 // No section - possibly an absolute address. Since we don't allow
522 // internal function addresses to escape the function scope - we
523 // consider it a tail call.
524 if (opts::Verbosity
> 1) {
525 this->errs() << "BOLT-WARNING: no section for address 0x"
526 << Twine::utohexstr(Address
) << " referenced from function "
529 return MemoryContentsType::UNKNOWN
;
532 if (Section
->isVirtual()) {
533 // The contents are filled at runtime.
534 return MemoryContentsType::UNKNOWN
;
537 // No support for jump tables in code yet.
538 if (Section
->isText())
539 return MemoryContentsType::UNKNOWN
;
541 // Start with checking for PIC jump table. We expect non-PIC jump tables
542 // to have high 32 bits set to 0.
543 if (analyzeJumpTable(Address
, JumpTable::JTT_PIC
, BF
))
544 return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE
;
546 if (analyzeJumpTable(Address
, JumpTable::JTT_NORMAL
, BF
))
547 return MemoryContentsType::POSSIBLE_JUMP_TABLE
;
549 return MemoryContentsType::UNKNOWN
;
552 bool BinaryContext::analyzeJumpTable(const uint64_t Address
,
553 const JumpTable::JumpTableType Type
,
554 const BinaryFunction
&BF
,
555 const uint64_t NextJTAddress
,
556 JumpTable::AddressesType
*EntriesAsAddress
,
557 bool *HasEntryInFragment
) const {
558 // Target address of __builtin_unreachable.
559 const uint64_t UnreachableAddress
= BF
.getAddress() + BF
.getSize();
561 // Is one of the targets __builtin_unreachable?
562 bool HasUnreachable
= false;
564 // Does one of the entries match function start address?
565 bool HasStartAsEntry
= false;
567 // Number of targets other than __builtin_unreachable.
568 uint64_t NumRealEntries
= 0;
570 // Size of the jump table without trailing __builtin_unreachable entries.
571 size_t TrimmedSize
= 0;
573 auto addEntryAddress
= [&](uint64_t EntryAddress
, bool Unreachable
= false) {
574 if (!EntriesAsAddress
)
576 EntriesAsAddress
->emplace_back(EntryAddress
);
578 TrimmedSize
= EntriesAsAddress
->size();
581 ErrorOr
<const BinarySection
&> Section
= getSectionForAddress(Address
);
585 // The upper bound is defined by containing object, section limits, and
586 // the next jump table in memory.
587 uint64_t UpperBound
= Section
->getEndAddress();
588 const BinaryData
*JumpTableBD
= getBinaryDataAtAddress(Address
);
589 if (JumpTableBD
&& JumpTableBD
->getSize()) {
590 assert(JumpTableBD
->getEndAddress() <= UpperBound
&&
591 "data object cannot cross a section boundary");
592 UpperBound
= JumpTableBD
->getEndAddress();
595 UpperBound
= std::min(NextJTAddress
, UpperBound
);
598 using JTT
= JumpTable::JumpTableType
;
599 dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n",
600 Address
, BF
.getPrintName(),
601 Type
== JTT::JTT_PIC
? "PIC" : "Normal");
603 const uint64_t EntrySize
= getJumpTableEntrySize(Type
);
604 for (uint64_t EntryAddress
= Address
; EntryAddress
<= UpperBound
- EntrySize
;
605 EntryAddress
+= EntrySize
) {
606 LLVM_DEBUG(dbgs() << " * Checking 0x" << Twine::utohexstr(EntryAddress
)
608 // Check if there's a proper relocation against the jump table entry.
609 if (HasRelocations
) {
610 if (Type
== JumpTable::JTT_PIC
&&
611 !DataPCRelocations
.count(EntryAddress
)) {
613 dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n");
616 if (Type
== JumpTable::JTT_NORMAL
&& !getRelocationAt(EntryAddress
)) {
619 << "FAIL: JTT_NORMAL table, no relocation for this address\n");
624 const uint64_t Value
=
625 (Type
== JumpTable::JTT_PIC
)
626 ? Address
+ *getSignedValueAtAddress(EntryAddress
, EntrySize
)
627 : *getPointerAtAddress(EntryAddress
);
629 // __builtin_unreachable() case.
630 if (Value
== UnreachableAddress
) {
631 addEntryAddress(Value
, /*Unreachable*/ true);
632 HasUnreachable
= true;
633 LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value
));
637 // Function start is another special case. It is allowed in the jump table,
638 // but we need at least one another regular entry to distinguish the table
639 // from, e.g. a function pointer array.
640 if (Value
== BF
.getAddress()) {
641 HasStartAsEntry
= true;
642 addEntryAddress(Value
);
646 // Function or one of its fragments.
647 const BinaryFunction
*TargetBF
= getBinaryFunctionContainingAddress(Value
);
648 const bool DoesBelongToFunction
=
649 BF
.containsAddress(Value
) ||
650 (TargetBF
&& areRelatedFragments(TargetBF
, &BF
));
651 if (!DoesBelongToFunction
) {
653 if (!BF
.containsAddress(Value
)) {
654 dbgs() << "FAIL: function doesn't contain this address\n";
656 dbgs() << " ! function containing this address: "
657 << TargetBF
->getPrintName() << '\n';
658 if (TargetBF
->isFragment()) {
659 dbgs() << " ! is a fragment";
660 for (BinaryFunction
*Parent
: TargetBF
->ParentFragments
)
661 dbgs() << ", parent: " << Parent
->getPrintName();
670 // Check there's an instruction at this offset.
671 if (TargetBF
->getState() == BinaryFunction::State::Disassembled
&&
672 !TargetBF
->getInstructionAtOffset(Value
- TargetBF
->getAddress())) {
673 LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value
));
678 LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value
));
680 if (TargetBF
!= &BF
&& HasEntryInFragment
)
681 *HasEntryInFragment
= true;
682 addEntryAddress(Value
);
685 // Trim direct/normal jump table to exclude trailing unreachable entries that
686 // can collide with a function address.
687 if (Type
== JumpTable::JTT_NORMAL
&& EntriesAsAddress
&&
688 TrimmedSize
!= EntriesAsAddress
->size() &&
689 getBinaryFunctionAtAddress(UnreachableAddress
))
690 EntriesAsAddress
->resize(TrimmedSize
);
692 // It's a jump table if the number of real entries is more than 1, or there's
693 // one real entry and one or more special targets. If there are only multiple
694 // special targets, then it's not a jump table.
695 return NumRealEntries
+ (HasUnreachable
|| HasStartAsEntry
) >= 2;
698 void BinaryContext::populateJumpTables() {
699 LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations
.size()
701 for (auto JTI
= JumpTables
.begin(), JTE
= JumpTables
.end(); JTI
!= JTE
;
703 JumpTable
*JT
= JTI
->second
;
705 bool NonSimpleParent
= false;
706 for (BinaryFunction
*BF
: JT
->Parents
)
707 NonSimpleParent
|= !BF
->isSimple();
711 uint64_t NextJTAddress
= 0;
712 auto NextJTI
= std::next(JTI
);
714 NextJTAddress
= NextJTI
->second
->getAddress();
717 analyzeJumpTable(JT
->getAddress(), JT
->Type
, *(JT
->Parents
[0]),
718 NextJTAddress
, &JT
->EntriesAsAddress
, &JT
->IsSplit
);
721 dbgs() << "failed to analyze ";
723 if (NextJTI
!= JTE
) {
725 NextJTI
->second
->print(dbgs());
728 llvm_unreachable("jump table heuristic failure");
730 for (BinaryFunction
*Frag
: JT
->Parents
) {
732 Frag
->setHasIndirectTargetToSplitFragment(true);
733 for (uint64_t EntryAddress
: JT
->EntriesAsAddress
)
734 // if target is builtin_unreachable
735 if (EntryAddress
== Frag
->getAddress() + Frag
->getSize()) {
736 Frag
->IgnoredBranches
.emplace_back(EntryAddress
- Frag
->getAddress(),
738 } else if (EntryAddress
>= Frag
->getAddress() &&
739 EntryAddress
< Frag
->getAddress() + Frag
->getSize()) {
740 Frag
->registerReferencedOffset(EntryAddress
- Frag
->getAddress());
744 // In strict mode, erase PC-relative relocation record. Later we check that
745 // all such records are erased and thus have been accounted for.
746 if (opts::StrictMode
&& JT
->Type
== JumpTable::JTT_PIC
) {
747 for (uint64_t Address
= JT
->getAddress();
748 Address
< JT
->getAddress() + JT
->getSize();
749 Address
+= JT
->EntrySize
) {
750 DataPCRelocations
.erase(DataPCRelocations
.find(Address
));
754 // Mark to skip the function and all its fragments.
755 for (BinaryFunction
*Frag
: JT
->Parents
)
756 if (Frag
->hasIndirectTargetToSplitFragment())
757 addFragmentsToSkip(Frag
);
760 if (opts::StrictMode
&& DataPCRelocations
.size()) {
762 dbgs() << DataPCRelocations
.size()
763 << " unclaimed PC-relative relocations left in data:\n";
764 for (uint64_t Reloc
: DataPCRelocations
)
765 dbgs() << Twine::utohexstr(Reloc
) << '\n';
767 assert(0 && "unclaimed PC-relative relocations left in data\n");
769 clearList(DataPCRelocations
);
772 void BinaryContext::skipMarkedFragments() {
773 std::vector
<BinaryFunction
*> FragmentQueue
;
774 // Copy the functions to FragmentQueue.
775 FragmentQueue
.assign(FragmentsToSkip
.begin(), FragmentsToSkip
.end());
776 auto addToWorklist
= [&](BinaryFunction
*Function
) -> void {
777 if (FragmentsToSkip
.count(Function
))
779 FragmentQueue
.push_back(Function
);
780 addFragmentsToSkip(Function
);
782 // Functions containing split jump tables need to be skipped with all
783 // fragments (transitively).
784 for (size_t I
= 0; I
!= FragmentQueue
.size(); I
++) {
785 BinaryFunction
*BF
= FragmentQueue
[I
];
786 assert(FragmentsToSkip
.count(BF
) &&
787 "internal error in traversing function fragments");
788 if (opts::Verbosity
>= 1)
789 this->errs() << "BOLT-WARNING: Ignoring " << BF
->getPrintName() << '\n';
790 BF
->setSimple(false);
791 BF
->setHasIndirectTargetToSplitFragment(true);
793 llvm::for_each(BF
->Fragments
, addToWorklist
);
794 llvm::for_each(BF
->ParentFragments
, addToWorklist
);
796 if (!FragmentsToSkip
.empty())
797 this->errs() << "BOLT-WARNING: skipped " << FragmentsToSkip
.size()
798 << " function" << (FragmentsToSkip
.size() == 1 ? "" : "s")
799 << " due to cold fragments\n";
802 MCSymbol
*BinaryContext::getOrCreateGlobalSymbol(uint64_t Address
, Twine Prefix
,
806 auto Itr
= BinaryDataMap
.find(Address
);
807 if (Itr
!= BinaryDataMap
.end()) {
808 assert(Itr
->second
->getSize() == Size
|| !Size
);
809 return Itr
->second
->getSymbol();
812 std::string Name
= (Prefix
+ "0x" + Twine::utohexstr(Address
)).str();
813 assert(!GlobalSymbols
.count(Name
) && "created name is not unique");
814 return registerNameAtAddress(Name
, Address
, Size
, Alignment
, Flags
);
817 MCSymbol
*BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name
) {
818 return Ctx
->getOrCreateSymbol(Name
);
821 BinaryFunction
*BinaryContext::createBinaryFunction(
822 const std::string
&Name
, BinarySection
&Section
, uint64_t Address
,
823 uint64_t Size
, uint64_t SymbolSize
, uint16_t Alignment
) {
824 auto Result
= BinaryFunctions
.emplace(
825 Address
, BinaryFunction(Name
, Section
, Address
, Size
, *this));
826 assert(Result
.second
== true && "unexpected duplicate function");
827 BinaryFunction
*BF
= &Result
.first
->second
;
828 registerNameAtAddress(Name
, Address
, SymbolSize
? SymbolSize
: Size
,
830 setSymbolToFunctionMap(BF
->getSymbol(), BF
);
835 BinaryContext::getOrCreateJumpTable(BinaryFunction
&Function
, uint64_t Address
,
836 JumpTable::JumpTableType Type
) {
837 // Two fragments of same function access same jump table
838 if (JumpTable
*JT
= getJumpTableContainingAddress(Address
)) {
839 assert(JT
->Type
== Type
&& "jump table types have to match");
840 assert(Address
== JT
->getAddress() && "unexpected non-empty jump table");
842 // Prevent associating a jump table to a specific fragment twice.
843 if (!llvm::is_contained(JT
->Parents
, &Function
)) {
844 assert(llvm::all_of(JT
->Parents
,
845 [&](const BinaryFunction
*BF
) {
846 return areRelatedFragments(&Function
, BF
);
848 "cannot re-use jump table of a different function");
849 // Duplicate the entry for the parent function for easy access
850 JT
->Parents
.push_back(&Function
);
851 if (opts::Verbosity
> 2) {
852 this->outs() << "BOLT-INFO: Multiple fragments access same jump table: "
853 << JT
->Parents
[0]->getPrintName() << "; "
854 << Function
.getPrintName() << "\n";
855 JT
->print(this->outs());
857 Function
.JumpTables
.emplace(Address
, JT
);
858 for (BinaryFunction
*Parent
: JT
->Parents
)
859 Parent
->setHasIndirectTargetToSplitFragment(true);
862 bool IsJumpTableParent
= false;
863 (void)IsJumpTableParent
;
864 for (BinaryFunction
*Frag
: JT
->Parents
)
865 if (Frag
== &Function
)
866 IsJumpTableParent
= true;
867 assert(IsJumpTableParent
&&
868 "cannot re-use jump table of a different function");
869 return JT
->getFirstLabel();
872 // Re-use the existing symbol if possible.
873 MCSymbol
*JTLabel
= nullptr;
874 if (BinaryData
*Object
= getBinaryDataAtAddress(Address
)) {
875 if (!isInternalSymbolName(Object
->getSymbol()->getName()))
876 JTLabel
= Object
->getSymbol();
879 const uint64_t EntrySize
= getJumpTableEntrySize(Type
);
881 const std::string JumpTableName
= generateJumpTableName(Function
, Address
);
882 JTLabel
= registerNameAtAddress(JumpTableName
, Address
, 0, EntrySize
);
885 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel
->getName()
886 << " in function " << Function
<< '\n');
888 JumpTable
*JT
= new JumpTable(*JTLabel
, Address
, EntrySize
, Type
,
889 JumpTable::LabelMapType
{{0, JTLabel
}},
890 *getSectionForAddress(Address
));
891 JT
->Parents
.push_back(&Function
);
892 if (opts::Verbosity
> 2)
893 JT
->print(this->outs());
894 JumpTables
.emplace(Address
, JT
);
896 // Duplicate the entry for the parent function for easy access.
897 Function
.JumpTables
.emplace(Address
, JT
);
901 std::pair
<uint64_t, const MCSymbol
*>
902 BinaryContext::duplicateJumpTable(BinaryFunction
&Function
, JumpTable
*JT
,
903 const MCSymbol
*OldLabel
) {
904 auto L
= scopeLock();
907 for (std::pair
<const unsigned, MCSymbol
*> Elmt
: JT
->Labels
) {
908 if (Elmt
.second
!= OldLabel
)
914 assert(Found
&& "Label not found");
916 MCSymbol
*NewLabel
= Ctx
->createNamedTempSymbol("duplicatedJT");
918 new JumpTable(*NewLabel
, JT
->getAddress(), JT
->EntrySize
, JT
->Type
,
919 JumpTable::LabelMapType
{{Offset
, NewLabel
}},
920 *getSectionForAddress(JT
->getAddress()));
921 NewJT
->Parents
= JT
->Parents
;
922 NewJT
->Entries
= JT
->Entries
;
923 NewJT
->Counts
= JT
->Counts
;
924 uint64_t JumpTableID
= ++DuplicatedJumpTables
;
925 // Invert it to differentiate from regular jump tables whose IDs are their
926 // addresses in the input binary memory space
927 JumpTableID
= ~JumpTableID
;
928 JumpTables
.emplace(JumpTableID
, NewJT
);
929 Function
.JumpTables
.emplace(JumpTableID
, NewJT
);
930 return std::make_pair(JumpTableID
, NewLabel
);
933 std::string
BinaryContext::generateJumpTableName(const BinaryFunction
&BF
,
937 if (const JumpTable
*JT
= BF
.getJumpTableContainingAddress(Address
)) {
938 Offset
= Address
- JT
->getAddress();
939 auto JTLabelsIt
= JT
->Labels
.find(Offset
);
940 if (JTLabelsIt
!= JT
->Labels
.end())
941 return std::string(JTLabelsIt
->second
->getName());
943 auto JTIdsIt
= JumpTableIds
.find(JT
->getAddress());
944 assert(JTIdsIt
!= JumpTableIds
.end());
945 Id
= JTIdsIt
->second
;
947 Id
= JumpTableIds
[Address
] = BF
.JumpTables
.size();
949 return ("JUMP_TABLE/" + BF
.getOneName().str() + "." + std::to_string(Id
) +
950 (Offset
? ("." + std::to_string(Offset
)) : ""));
953 bool BinaryContext::hasValidCodePadding(const BinaryFunction
&BF
) {
954 // FIXME: aarch64 support is missing.
958 if (BF
.getSize() == BF
.getMaxSize())
961 ErrorOr
<ArrayRef
<unsigned char>> FunctionData
= BF
.getData();
962 assert(FunctionData
&& "cannot get function as data");
964 uint64_t Offset
= BF
.getSize();
966 uint64_t InstrSize
= 0;
967 uint64_t InstrAddress
= BF
.getAddress() + Offset
;
968 using std::placeholders::_1
;
970 // Skip instructions that satisfy the predicate condition.
971 auto skipInstructions
= [&](std::function
<bool(const MCInst
&)> Predicate
) {
972 const uint64_t StartOffset
= Offset
;
973 for (; Offset
< BF
.getMaxSize();
974 Offset
+= InstrSize
, InstrAddress
+= InstrSize
) {
975 if (!DisAsm
->getInstruction(Instr
, InstrSize
, FunctionData
->slice(Offset
),
976 InstrAddress
, nulls()))
978 if (!Predicate(Instr
))
982 return Offset
- StartOffset
;
985 // Skip a sequence of zero bytes.
986 auto skipZeros
= [&]() {
987 const uint64_t StartOffset
= Offset
;
988 for (; Offset
< BF
.getMaxSize(); ++Offset
)
989 if ((*FunctionData
)[Offset
] != 0)
992 return Offset
- StartOffset
;
995 // Accept the whole padding area filled with breakpoints.
996 auto isBreakpoint
= std::bind(&MCPlusBuilder::isBreakpoint
, MIB
.get(), _1
);
997 if (skipInstructions(isBreakpoint
) && Offset
== BF
.getMaxSize())
1000 auto isNoop
= std::bind(&MCPlusBuilder::isNoop
, MIB
.get(), _1
);
1002 // Some functions have a jump to the next function or to the padding area
1003 // inserted after the body.
1004 auto isSkipJump
= [&](const MCInst
&Instr
) {
1005 uint64_t TargetAddress
= 0;
1006 if (MIB
->isUnconditionalBranch(Instr
) &&
1007 MIB
->evaluateBranch(Instr
, InstrAddress
, InstrSize
, TargetAddress
)) {
1008 if (TargetAddress
>= InstrAddress
+ InstrSize
&&
1009 TargetAddress
<= BF
.getAddress() + BF
.getMaxSize()) {
1016 // Skip over nops, jumps, and zero padding. Allow interleaving (this happens).
1017 while (skipInstructions(isNoop
) || skipInstructions(isSkipJump
) ||
1021 if (Offset
== BF
.getMaxSize())
1024 if (opts::Verbosity
>= 1) {
1025 this->errs() << "BOLT-WARNING: bad padding at address 0x"
1026 << Twine::utohexstr(BF
.getAddress() + BF
.getSize())
1027 << " starting at offset " << (Offset
- BF
.getSize())
1028 << " in function " << BF
<< '\n'
1029 << FunctionData
->slice(BF
.getSize(),
1030 BF
.getMaxSize() - BF
.getSize())
1037 void BinaryContext::adjustCodePadding() {
1038 for (auto &BFI
: BinaryFunctions
) {
1039 BinaryFunction
&BF
= BFI
.second
;
1040 if (!shouldEmit(BF
))
1043 if (!hasValidCodePadding(BF
)) {
1044 if (HasRelocations
) {
1045 if (opts::Verbosity
>= 1) {
1046 this->outs() << "BOLT-INFO: function " << BF
1047 << " has invalid padding. Ignoring the function.\n";
1051 BF
.setMaxSize(BF
.getSize());
1057 MCSymbol
*BinaryContext::registerNameAtAddress(StringRef Name
, uint64_t Address
,
1061 // Register the name with MCContext.
1062 MCSymbol
*Symbol
= Ctx
->getOrCreateSymbol(Name
);
1064 auto GAI
= BinaryDataMap
.find(Address
);
1066 if (GAI
== BinaryDataMap
.end()) {
1067 ErrorOr
<BinarySection
&> SectionOrErr
= getSectionForAddress(Address
);
1068 BinarySection
&Section
=
1069 SectionOrErr
? SectionOrErr
.get() : absoluteSection();
1070 BD
= new BinaryData(*Symbol
, Address
, Size
, Alignment
? Alignment
: 1,
1072 GAI
= BinaryDataMap
.emplace(Address
, BD
).first
;
1073 GlobalSymbols
[Name
] = BD
;
1074 updateObjectNesting(GAI
);
1077 if (!BD
->hasName(Name
)) {
1078 GlobalSymbols
[Name
] = BD
;
1079 BD
->updateSize(Size
);
1080 BD
->Symbols
.push_back(Symbol
);
1088 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address
) const {
1089 auto NI
= BinaryDataMap
.lower_bound(Address
);
1090 auto End
= BinaryDataMap
.end();
1091 if ((NI
!= End
&& Address
== NI
->first
) ||
1092 ((NI
!= BinaryDataMap
.begin()) && (NI
-- != BinaryDataMap
.begin()))) {
1093 if (NI
->second
->containsAddress(Address
))
1096 // If this is a sub-symbol, see if a parent data contains the address.
1097 const BinaryData
*BD
= NI
->second
->getParent();
1099 if (BD
->containsAddress(Address
))
1101 BD
= BD
->getParent();
1107 BinaryData
*BinaryContext::getGOTSymbol() {
1108 // First tries to find a global symbol with that name
1109 BinaryData
*GOTSymBD
= getBinaryDataByName("_GLOBAL_OFFSET_TABLE_");
1113 // This symbol might be hidden from run-time link, so fetch the local
1114 // definition if available.
1115 GOTSymBD
= getBinaryDataByName("_GLOBAL_OFFSET_TABLE_/1");
1119 // If the local symbol is not unique, fail
1121 SmallString
<30> Storage
;
1122 while (const BinaryData
*BD
=
1123 getBinaryDataByName(Twine("_GLOBAL_OFFSET_TABLE_/")
1124 .concat(Twine(Index
++))
1125 .toStringRef(Storage
)))
1126 if (BD
->getAddress() != GOTSymBD
->getAddress())
1132 bool BinaryContext::setBinaryDataSize(uint64_t Address
, uint64_t Size
) {
1133 auto NI
= BinaryDataMap
.find(Address
);
1134 assert(NI
!= BinaryDataMap
.end());
1135 if (NI
== BinaryDataMap
.end())
1137 // TODO: it's possible that a jump table starts at the same address
1138 // as a larger blob of private data. When we set the size of the
1139 // jump table, it might be smaller than the total blob size. In this
1140 // case we just leave the original size since (currently) it won't really
1142 assert((!NI
->second
->Size
|| NI
->second
->Size
== Size
||
1143 (NI
->second
->isJumpTable() && NI
->second
->Size
> Size
)) &&
1144 "can't change the size of a symbol that has already had its "
1146 if (!NI
->second
->Size
) {
1147 NI
->second
->Size
= Size
;
1148 updateObjectNesting(NI
);
1154 void BinaryContext::generateSymbolHashes() {
1155 auto isPadding
= [](const BinaryData
&BD
) {
1156 StringRef Contents
= BD
.getSection().getContents();
1157 StringRef SymData
= Contents
.substr(BD
.getOffset(), BD
.getSize());
1158 return (BD
.getName().starts_with("HOLEat") ||
1159 SymData
.find_first_not_of(0) == StringRef::npos
);
1162 uint64_t NumCollisions
= 0;
1163 for (auto &Entry
: BinaryDataMap
) {
1164 BinaryData
&BD
= *Entry
.second
;
1165 StringRef Name
= BD
.getName();
1167 if (!isInternalSymbolName(Name
))
1170 // First check if a non-anonymous alias exists and move it to the front.
1171 if (BD
.getSymbols().size() > 1) {
1172 auto Itr
= llvm::find_if(BD
.getSymbols(), [&](const MCSymbol
*Symbol
) {
1173 return !isInternalSymbolName(Symbol
->getName());
1175 if (Itr
!= BD
.getSymbols().end()) {
1176 size_t Idx
= std::distance(BD
.getSymbols().begin(), Itr
);
1177 std::swap(BD
.getSymbols()[0], BD
.getSymbols()[Idx
]);
1182 // We have to skip 0 size symbols since they will all collide.
1183 if (BD
.getSize() == 0) {
1187 const uint64_t Hash
= BD
.getSection().hash(BD
);
1188 const size_t Idx
= Name
.find("0x");
1189 std::string NewName
=
1190 (Twine(Name
.substr(0, Idx
)) + "_" + Twine::utohexstr(Hash
)).str();
1191 if (getBinaryDataByName(NewName
)) {
1192 // Ignore collisions for symbols that appear to be padding
1193 // (i.e. all zeros or a "hole")
1194 if (!isPadding(BD
)) {
1195 if (opts::Verbosity
) {
1196 this->errs() << "BOLT-WARNING: collision detected when hashing " << BD
1197 << " with new name (" << NewName
<< "), skipping.\n";
1203 BD
.Symbols
.insert(BD
.Symbols
.begin(), Ctx
->getOrCreateSymbol(NewName
));
1204 GlobalSymbols
[NewName
] = &BD
;
1206 if (NumCollisions
) {
1207 this->errs() << "BOLT-WARNING: " << NumCollisions
1208 << " collisions detected while hashing binary objects";
1209 if (!opts::Verbosity
)
1210 this->errs() << ". Use -v=1 to see the list.";
1211 this->errs() << '\n';
1215 bool BinaryContext::registerFragment(BinaryFunction
&TargetFunction
,
1216 BinaryFunction
&Function
) {
1217 assert(TargetFunction
.isFragment() && "TargetFunction must be a fragment");
1218 if (TargetFunction
.isChildOf(Function
))
1220 TargetFunction
.addParentFragment(Function
);
1221 Function
.addFragment(TargetFunction
);
1222 FragmentClasses
.unionSets(&TargetFunction
, &Function
);
1223 if (!HasRelocations
) {
1224 TargetFunction
.setSimple(false);
1225 Function
.setSimple(false);
1227 if (opts::Verbosity
>= 1) {
1228 this->outs() << "BOLT-INFO: marking " << TargetFunction
1229 << " as a fragment of " << Function
<< '\n';
1234 void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction
&BF
,
1235 MCInst
&LoadLowBits
,
1238 const MCSymbol
*TargetSymbol
;
1239 uint64_t Addend
= 0;
1240 std::tie(TargetSymbol
, Addend
) = handleAddressRef(Target
, BF
,
1243 MIB
->replaceImmWithSymbolRef(LoadHiBits
, TargetSymbol
, Addend
, Ctx
.get(), Val
,
1244 ELF::R_AARCH64_ADR_PREL_PG_HI21
);
1245 MIB
->replaceImmWithSymbolRef(LoadLowBits
, TargetSymbol
, Addend
, Ctx
.get(),
1246 Val
, ELF::R_AARCH64_ADD_ABS_LO12_NC
);
1249 bool BinaryContext::handleAArch64Veneer(uint64_t Address
, bool MatchOnly
) {
1250 BinaryFunction
*TargetFunction
= getBinaryFunctionContainingAddress(Address
);
1254 ErrorOr
<BinarySection
&> Section
= getSectionForAddress(Address
);
1255 assert(Section
&& "cannot get section for referenced address");
1256 if (!Section
->isText())
1260 StringRef SectionContents
= Section
->getContents();
1261 uint64_t Offset
= Address
- Section
->getAddress();
1262 const uint64_t MaxSize
= SectionContents
.size() - Offset
;
1263 const uint8_t *Bytes
=
1264 reinterpret_cast<const uint8_t *>(SectionContents
.data());
1265 ArrayRef
<uint8_t> Data(Bytes
+ Offset
, MaxSize
);
1267 auto matchVeneer
= [&](BinaryFunction::InstrMapType
&Instructions
,
1268 MCInst
&Instruction
, uint64_t Offset
,
1269 uint64_t AbsoluteInstrAddr
,
1270 uint64_t TotalSize
) -> bool {
1271 MCInst
*TargetHiBits
, *TargetLowBits
;
1272 uint64_t TargetAddress
, Count
;
1273 Count
= MIB
->matchLinkerVeneer(Instructions
.begin(), Instructions
.end(),
1274 AbsoluteInstrAddr
, Instruction
, TargetHiBits
,
1275 TargetLowBits
, TargetAddress
);
1282 // NOTE The target symbol was created during disassemble's
1283 // handleExternalReference
1284 const MCSymbol
*VeneerSymbol
= getOrCreateGlobalSymbol(Address
, "FUNCat");
1285 BinaryFunction
*Veneer
= createBinaryFunction(VeneerSymbol
->getName().str(),
1286 *Section
, Address
, TotalSize
);
1287 addAdrpAddRelocAArch64(*Veneer
, *TargetLowBits
, *TargetHiBits
,
1289 MIB
->addAnnotation(Instruction
, "AArch64Veneer", true);
1290 Veneer
->addInstruction(Offset
, std::move(Instruction
));
1292 for (auto It
= Instructions
.rbegin(); Count
!= 0; ++It
, --Count
) {
1293 MIB
->addAnnotation(It
->second
, "AArch64Veneer", true);
1294 Veneer
->addInstruction(It
->first
, std::move(It
->second
));
1297 Veneer
->getOrCreateLocalLabel(Address
);
1298 Veneer
->setMaxSize(TotalSize
);
1299 Veneer
->updateState(BinaryFunction::State::Disassembled
);
1300 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x"
1301 << Twine::utohexstr(Address
) << "\n");
1305 uint64_t Size
= 0, TotalSize
= 0;
1306 BinaryFunction::InstrMapType VeneerInstructions
;
1307 for (Offset
= 0; Offset
< MaxSize
; Offset
+= Size
) {
1309 const uint64_t AbsoluteInstrAddr
= Address
+ Offset
;
1310 if (!SymbolicDisAsm
->getInstruction(Instruction
, Size
, Data
.slice(Offset
),
1311 AbsoluteInstrAddr
, nulls()))
1315 if (MIB
->isBranch(Instruction
)) {
1316 Ret
= matchVeneer(VeneerInstructions
, Instruction
, Offset
,
1317 AbsoluteInstrAddr
, TotalSize
);
1321 VeneerInstructions
.emplace(Offset
, std::move(Instruction
));
1327 void BinaryContext::processInterproceduralReferences() {
1328 for (const std::pair
<BinaryFunction
*, uint64_t> &It
:
1329 InterproceduralReferences
) {
1330 BinaryFunction
&Function
= *It
.first
;
1331 uint64_t Address
= It
.second
;
1332 // Process interprocedural references from ignored functions in BAT mode
1333 // (non-simple in non-relocation mode) to properly register entry points
1334 if (!Address
|| (Function
.isIgnored() && !HasBATSection
))
1337 BinaryFunction
*TargetFunction
=
1338 getBinaryFunctionContainingAddress(Address
);
1339 if (&Function
== TargetFunction
)
1342 if (TargetFunction
) {
1343 if (TargetFunction
->isFragment() &&
1344 !areRelatedFragments(TargetFunction
, &Function
)) {
1346 << "BOLT-WARNING: interprocedural reference between unrelated "
1348 << Function
.getPrintName() << " and "
1349 << TargetFunction
->getPrintName() << '\n';
1351 if (uint64_t Offset
= Address
- TargetFunction
->getAddress())
1352 TargetFunction
->addEntryPointAtOffset(Offset
);
1357 // Check if address falls in function padding space - this could be
1358 // unmarked data in code. In this case adjust the padding space size.
1359 ErrorOr
<BinarySection
&> Section
= getSectionForAddress(Address
);
1360 assert(Section
&& "cannot get section for referenced address");
1362 if (!Section
->isText())
1365 // PLT requires special handling and could be ignored in this context.
1366 StringRef SectionName
= Section
->getName();
1367 if (SectionName
== ".plt" || SectionName
== ".plt.got")
1370 // Check if it is aarch64 veneer written at Address
1371 if (isAArch64() && handleAArch64Veneer(Address
))
1374 if (opts::processAllFunctions()) {
1375 this->errs() << "BOLT-ERROR: cannot process binaries with unmarked "
1376 << "object in code at address 0x"
1377 << Twine::utohexstr(Address
) << " belonging to section "
1378 << SectionName
<< " in current mode\n";
1382 TargetFunction
= getBinaryFunctionContainingAddress(Address
,
1383 /*CheckPastEnd=*/false,
1384 /*UseMaxSize=*/true);
1385 // We are not going to overwrite non-simple functions, but for simple
1386 // ones - adjust the padding size.
1387 if (TargetFunction
&& TargetFunction
->isSimple()) {
1389 << "BOLT-WARNING: function " << *TargetFunction
1390 << " has an object detected in a padding region at address 0x"
1391 << Twine::utohexstr(Address
) << '\n';
1392 TargetFunction
->setMaxSize(TargetFunction
->getSize());
1396 InterproceduralReferences
.clear();
1399 void BinaryContext::postProcessSymbolTable() {
1400 fixBinaryDataHoles();
1402 for (auto &Entry
: BinaryDataMap
) {
1403 BinaryData
*BD
= Entry
.second
;
1404 if ((BD
->getName().starts_with("SYMBOLat") ||
1405 BD
->getName().starts_with("DATAat")) &&
1406 !BD
->getParent() && !BD
->getSize() && !BD
->isAbsolute() &&
1408 this->errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD
1415 generateSymbolHashes();
1418 void BinaryContext::foldFunction(BinaryFunction
&ChildBF
,
1419 BinaryFunction
&ParentBF
) {
1420 assert(!ChildBF
.isMultiEntry() && !ParentBF
.isMultiEntry() &&
1421 "cannot merge functions with multiple entry points");
1423 std::unique_lock
<llvm::sys::RWMutex
> WriteCtxLock(CtxMutex
, std::defer_lock
);
1424 std::unique_lock
<llvm::sys::RWMutex
> WriteSymbolMapLock(
1425 SymbolToFunctionMapMutex
, std::defer_lock
);
1427 const StringRef ChildName
= ChildBF
.getOneName();
1429 // Move symbols over and update bookkeeping info.
1430 for (MCSymbol
*Symbol
: ChildBF
.getSymbols()) {
1431 ParentBF
.getSymbols().push_back(Symbol
);
1432 WriteSymbolMapLock
.lock();
1433 SymbolToFunctionMap
[Symbol
] = &ParentBF
;
1434 WriteSymbolMapLock
.unlock();
1435 // NB: there's no need to update BinaryDataMap and GlobalSymbols.
1437 ChildBF
.getSymbols().clear();
1439 // Move other names the child function is known under.
1440 llvm::move(ChildBF
.Aliases
, std::back_inserter(ParentBF
.Aliases
));
1441 ChildBF
.Aliases
.clear();
1443 if (HasRelocations
) {
1444 // Merge execution counts of ChildBF into those of ParentBF.
1445 // Without relocations, we cannot reliably merge profiles as both functions
1446 // continue to exist and either one can be executed.
1447 ChildBF
.mergeProfileDataInto(ParentBF
);
1449 std::shared_lock
<llvm::sys::RWMutex
> ReadBfsLock(BinaryFunctionsMutex
,
1451 std::unique_lock
<llvm::sys::RWMutex
> WriteBfsLock(BinaryFunctionsMutex
,
1453 // Remove ChildBF from the global set of functions in relocs mode.
1455 auto FI
= BinaryFunctions
.find(ChildBF
.getAddress());
1456 ReadBfsLock
.unlock();
1458 assert(FI
!= BinaryFunctions
.end() && "function not found");
1459 assert(&ChildBF
== &FI
->second
&& "function mismatch");
1461 WriteBfsLock
.lock();
1462 ChildBF
.clearDisasmState();
1463 FI
= BinaryFunctions
.erase(FI
);
1464 WriteBfsLock
.unlock();
1467 // In non-relocation mode we keep the function, but rename it.
1468 std::string NewName
= "__ICF_" + ChildName
.str();
1470 WriteCtxLock
.lock();
1471 ChildBF
.getSymbols().push_back(Ctx
->getOrCreateSymbol(NewName
));
1472 WriteCtxLock
.unlock();
1474 ChildBF
.setFolded(&ParentBF
);
1477 ParentBF
.setHasFunctionsFoldedInto();
1480 void BinaryContext::fixBinaryDataHoles() {
1481 assert(validateObjectNesting() && "object nesting inconsistency detected");
1483 for (BinarySection
&Section
: allocatableSections()) {
1484 std::vector
<std::pair
<uint64_t, uint64_t>> Holes
;
1486 auto isNotHole
= [&Section
](const binary_data_iterator
&Itr
) {
1487 BinaryData
*BD
= Itr
->second
;
1488 bool isHole
= (!BD
->getParent() && !BD
->getSize() && BD
->isObject() &&
1489 (BD
->getName().starts_with("SYMBOLat0x") ||
1490 BD
->getName().starts_with("DATAat0x") ||
1491 BD
->getName().starts_with("ANONYMOUS")));
1492 return !isHole
&& BD
->getSection() == Section
&& !BD
->getParent();
1495 auto BDStart
= BinaryDataMap
.begin();
1496 auto BDEnd
= BinaryDataMap
.end();
1497 auto Itr
= FilteredBinaryDataIterator(isNotHole
, BDStart
, BDEnd
);
1498 auto End
= FilteredBinaryDataIterator(isNotHole
, BDEnd
, BDEnd
);
1500 uint64_t EndAddress
= Section
.getAddress();
1502 while (Itr
!= End
) {
1503 if (Itr
->second
->getAddress() > EndAddress
) {
1504 uint64_t Gap
= Itr
->second
->getAddress() - EndAddress
;
1505 Holes
.emplace_back(EndAddress
, Gap
);
1507 EndAddress
= Itr
->second
->getEndAddress();
1511 if (EndAddress
< Section
.getEndAddress())
1512 Holes
.emplace_back(EndAddress
, Section
.getEndAddress() - EndAddress
);
1514 // If there is already a symbol at the start of the hole, grow that symbol
1515 // to cover the rest. Otherwise, create a new symbol to cover the hole.
1516 for (std::pair
<uint64_t, uint64_t> &Hole
: Holes
) {
1517 BinaryData
*BD
= getBinaryDataAtAddress(Hole
.first
);
1519 // BD->getSection() can be != Section if there are sections that
1520 // overlap. In this case it is probably safe to just skip the holes
1521 // since the overlapping section will not(?) have any symbols in it.
1522 if (BD
->getSection() == Section
)
1523 setBinaryDataSize(Hole
.first
, Hole
.second
);
1525 getOrCreateGlobalSymbol(Hole
.first
, "HOLEat", Hole
.second
, 1);
1530 assert(validateObjectNesting() && "object nesting inconsistency detected");
1531 assert(validateHoles() && "top level hole detected in object map");
1534 void BinaryContext::printGlobalSymbols(raw_ostream
&OS
) const {
1535 const BinarySection
*CurrentSection
= nullptr;
1536 bool FirstSection
= true;
1538 for (auto &Entry
: BinaryDataMap
) {
1539 const BinaryData
*BD
= Entry
.second
;
1540 const BinarySection
&Section
= BD
->getSection();
1541 if (FirstSection
|| Section
!= *CurrentSection
) {
1542 uint64_t Address
, Size
;
1543 StringRef Name
= Section
.getName();
1545 Address
= Section
.getAddress();
1546 Size
= Section
.getSize();
1548 Address
= BD
->getAddress();
1549 Size
= BD
->getSize();
1551 OS
<< "BOLT-INFO: Section " << Name
<< ", "
1552 << "0x" + Twine::utohexstr(Address
) << ":"
1553 << "0x" + Twine::utohexstr(Address
+ Size
) << "/" << Size
<< "\n";
1554 CurrentSection
= &Section
;
1555 FirstSection
= false;
1558 OS
<< "BOLT-INFO: ";
1559 const BinaryData
*P
= BD
->getParent();
1568 Expected
<unsigned> BinaryContext::getDwarfFile(
1569 StringRef Directory
, StringRef FileName
, unsigned FileNumber
,
1570 std::optional
<MD5::MD5Result
> Checksum
, std::optional
<StringRef
> Source
,
1571 unsigned CUID
, unsigned DWARFVersion
) {
1572 DwarfLineTable
&Table
= DwarfLineTablesCUMap
[CUID
];
1573 return Table
.tryGetFile(Directory
, FileName
, Checksum
, Source
, DWARFVersion
,
1577 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID
,
1578 const uint32_t SrcCUID
,
1579 unsigned FileIndex
) {
1580 DWARFCompileUnit
*SrcUnit
= DwCtx
->getCompileUnitForOffset(SrcCUID
);
1581 const DWARFDebugLine::LineTable
*LineTable
=
1582 DwCtx
->getLineTableForUnit(SrcUnit
);
1583 const std::vector
<DWARFDebugLine::FileNameEntry
> &FileNames
=
1584 LineTable
->Prologue
.FileNames
;
1585 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1587 assert(FileIndex
> 0 && FileIndex
<= FileNames
.size() &&
1588 "FileIndex out of range for the compilation unit.");
1590 if (FileNames
[FileIndex
- 1].DirIdx
!= 0) {
1591 if (std::optional
<const char *> DirName
= dwarf::toString(
1593 .IncludeDirectories
[FileNames
[FileIndex
- 1].DirIdx
- 1])) {
1597 StringRef FileName
= "";
1598 if (std::optional
<const char *> FName
=
1599 dwarf::toString(FileNames
[FileIndex
- 1].Name
))
1601 assert(FileName
!= "");
1602 DWARFCompileUnit
*DstUnit
= DwCtx
->getCompileUnitForOffset(DestCUID
);
1603 return cantFail(getDwarfFile(Dir
, FileName
, 0, std::nullopt
, std::nullopt
,
1604 DestCUID
, DstUnit
->getVersion()));
1607 std::vector
<BinaryFunction
*> BinaryContext::getSortedFunctions() {
1608 std::vector
<BinaryFunction
*> SortedFunctions(BinaryFunctions
.size());
1609 llvm::transform(llvm::make_second_range(BinaryFunctions
),
1610 SortedFunctions
.begin(),
1611 [](BinaryFunction
&BF
) { return &BF
; });
1613 llvm::stable_sort(SortedFunctions
, compareBinaryFunctionByIndex
);
1614 return SortedFunctions
;
1617 std::vector
<BinaryFunction
*> BinaryContext::getAllBinaryFunctions() {
1618 std::vector
<BinaryFunction
*> AllFunctions
;
1619 AllFunctions
.reserve(BinaryFunctions
.size() + InjectedBinaryFunctions
.size());
1620 llvm::transform(llvm::make_second_range(BinaryFunctions
),
1621 std::back_inserter(AllFunctions
),
1622 [](BinaryFunction
&BF
) { return &BF
; });
1623 llvm::copy(InjectedBinaryFunctions
, std::back_inserter(AllFunctions
));
1625 return AllFunctions
;
1628 std::optional
<DWARFUnit
*> BinaryContext::getDWOCU(uint64_t DWOId
) {
1629 auto Iter
= DWOCUs
.find(DWOId
);
1630 if (Iter
== DWOCUs
.end())
1631 return std::nullopt
;
1633 return Iter
->second
;
1636 DWARFContext
*BinaryContext::getDWOContext() const {
1639 return &DWOCUs
.begin()->second
->getContext();
1642 /// Handles DWO sections that can either be in .o, .dwo or .dwp files.
1643 void BinaryContext::preprocessDWODebugInfo() {
1644 for (const std::unique_ptr
<DWARFUnit
> &CU
: DwCtx
->compile_units()) {
1645 DWARFUnit
*const DwarfUnit
= CU
.get();
1646 if (std::optional
<uint64_t> DWOId
= DwarfUnit
->getDWOId()) {
1647 std::string DWOName
= dwarf::toString(
1648 DwarfUnit
->getUnitDIE().find(
1649 {dwarf::DW_AT_dwo_name
, dwarf::DW_AT_GNU_dwo_name
}),
1651 SmallString
<16> AbsolutePath
;
1652 if (!opts::CompDirOverride
.empty()) {
1653 sys::path::append(AbsolutePath
, opts::CompDirOverride
);
1654 sys::path::append(AbsolutePath
, DWOName
);
1657 DwarfUnit
->getNonSkeletonUnitDIE(false, AbsolutePath
).getDwarfUnit();
1658 if (!DWOCU
->isDWOUnit()) {
1660 << "BOLT-WARNING: Debug Fission: DWO debug information for "
1662 << " was not retrieved and won't be updated. Please check "
1666 DWOCUs
[*DWOId
] = DWOCU
;
1669 if (!DWOCUs
.empty())
1670 this->outs() << "BOLT-INFO: processing split DWARF\n";
1673 void BinaryContext::preprocessDebugInfo() {
1679 bool operator<(const CURange
&Other
) const { return LowPC
< Other
.LowPC
; }
1682 // Building a map of address ranges to CUs similar to .debug_aranges and use
1683 // it to assign CU to functions.
1684 std::vector
<CURange
> AllRanges
;
1685 AllRanges
.reserve(DwCtx
->getNumCompileUnits());
1686 for (const std::unique_ptr
<DWARFUnit
> &CU
: DwCtx
->compile_units()) {
1687 Expected
<DWARFAddressRangesVector
> RangesOrError
=
1688 CU
->getUnitDIE().getAddressRanges();
1689 if (!RangesOrError
) {
1690 consumeError(RangesOrError
.takeError());
1693 for (DWARFAddressRange
&Range
: *RangesOrError
) {
1694 // Parts of the debug info could be invalidated due to corresponding code
1695 // being removed from the binary by the linker. Hence we check if the
1696 // address is a valid one.
1697 if (containsAddress(Range
.LowPC
))
1698 AllRanges
.emplace_back(CURange
{Range
.LowPC
, Range
.HighPC
, CU
.get()});
1701 ContainsDwarf5
|= CU
->getVersion() >= 5;
1702 ContainsDwarfLegacy
|= CU
->getVersion() < 5;
1705 llvm::sort(AllRanges
);
1706 for (auto &KV
: BinaryFunctions
) {
1707 const uint64_t FunctionAddress
= KV
.first
;
1708 BinaryFunction
&Function
= KV
.second
;
1710 auto It
= llvm::partition_point(
1711 AllRanges
, [=](CURange R
) { return R
.HighPC
<= FunctionAddress
; });
1712 if (It
!= AllRanges
.end() && It
->LowPC
<= FunctionAddress
)
1713 Function
.setDWARFUnit(It
->Unit
);
1716 // Discover units with debug info that needs to be updated.
1717 for (const auto &KV
: BinaryFunctions
) {
1718 const BinaryFunction
&BF
= KV
.second
;
1719 if (shouldEmit(BF
) && BF
.getDWARFUnit())
1720 ProcessedCUs
.insert(BF
.getDWARFUnit());
1723 // Clear debug info for functions from units that we are not going to process.
1724 for (auto &KV
: BinaryFunctions
) {
1725 BinaryFunction
&BF
= KV
.second
;
1726 if (BF
.getDWARFUnit() && !ProcessedCUs
.count(BF
.getDWARFUnit()))
1727 BF
.setDWARFUnit(nullptr);
1730 if (opts::Verbosity
>= 1) {
1731 this->outs() << "BOLT-INFO: " << ProcessedCUs
.size() << " out of "
1732 << DwCtx
->getNumCompileUnits() << " CUs will be updated\n";
1735 preprocessDWODebugInfo();
1737 // Populate MCContext with DWARF files from all units.
1738 StringRef GlobalPrefix
= AsmInfo
->getPrivateGlobalPrefix();
1739 for (const std::unique_ptr
<DWARFUnit
> &CU
: DwCtx
->compile_units()) {
1740 const uint64_t CUID
= CU
->getOffset();
1741 DwarfLineTable
&BinaryLineTable
= getDwarfLineTable(CUID
);
1742 BinaryLineTable
.setLabel(Ctx
->getOrCreateSymbol(
1743 GlobalPrefix
+ "line_table_start" + Twine(CUID
)));
1745 if (!ProcessedCUs
.count(CU
.get()))
1748 const DWARFDebugLine::LineTable
*LineTable
=
1749 DwCtx
->getLineTableForUnit(CU
.get());
1750 const std::vector
<DWARFDebugLine::FileNameEntry
> &FileNames
=
1751 LineTable
->Prologue
.FileNames
;
1753 uint16_t DwarfVersion
= LineTable
->Prologue
.getVersion();
1754 if (DwarfVersion
>= 5) {
1755 std::optional
<MD5::MD5Result
> Checksum
;
1756 if (LineTable
->Prologue
.ContentTypes
.HasMD5
)
1757 Checksum
= LineTable
->Prologue
.FileNames
[0].Checksum
;
1758 std::optional
<const char *> Name
=
1759 dwarf::toString(CU
->getUnitDIE().find(dwarf::DW_AT_name
), nullptr);
1760 if (std::optional
<uint64_t> DWOID
= CU
->getDWOId()) {
1761 auto Iter
= DWOCUs
.find(*DWOID
);
1762 assert(Iter
!= DWOCUs
.end() && "DWO CU was not found.");
1763 Name
= dwarf::toString(
1764 Iter
->second
->getUnitDIE().find(dwarf::DW_AT_name
), nullptr);
1766 BinaryLineTable
.setRootFile(CU
->getCompilationDir(), *Name
, Checksum
,
1770 BinaryLineTable
.setDwarfVersion(DwarfVersion
);
1772 // Assign a unique label to every line table, one per CU.
1773 // Make sure empty debug line tables are registered too.
1774 if (FileNames
.empty()) {
1775 cantFail(getDwarfFile("", "<unknown>", 0, std::nullopt
, std::nullopt
,
1776 CUID
, DwarfVersion
));
1779 const uint32_t Offset
= DwarfVersion
< 5 ? 1 : 0;
1780 for (size_t I
= 0, Size
= FileNames
.size(); I
!= Size
; ++I
) {
1781 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1784 if (FileNames
[I
].DirIdx
!= 0 || DwarfVersion
>= 5)
1785 if (std::optional
<const char *> DirName
= dwarf::toString(
1787 .IncludeDirectories
[FileNames
[I
].DirIdx
- Offset
]))
1789 StringRef FileName
= "";
1790 if (std::optional
<const char *> FName
=
1791 dwarf::toString(FileNames
[I
].Name
))
1793 assert(FileName
!= "");
1794 std::optional
<MD5::MD5Result
> Checksum
;
1795 if (DwarfVersion
>= 5 && LineTable
->Prologue
.ContentTypes
.HasMD5
)
1796 Checksum
= LineTable
->Prologue
.FileNames
[I
].Checksum
;
1797 cantFail(getDwarfFile(Dir
, FileName
, 0, Checksum
, std::nullopt
, CUID
,
1803 bool BinaryContext::shouldEmit(const BinaryFunction
&Function
) const {
1804 if (Function
.isPseudo())
1807 if (opts::processAllFunctions())
1810 if (Function
.isIgnored())
1813 // In relocation mode we will emit non-simple functions with CFG.
1814 // If the function does not have a CFG it should be marked as ignored.
1815 return HasRelocations
|| Function
.isSimple();
1818 void BinaryContext::dump(const MCInst
&Inst
) const {
1819 if (LLVM_UNLIKELY(!InstPrinter
)) {
1820 dbgs() << "Cannot dump for InstPrinter is not initialized.\n";
1823 InstPrinter
->printInst(&Inst
, 0, "", *STI
, dbgs());
1827 void BinaryContext::printCFI(raw_ostream
&OS
, const MCCFIInstruction
&Inst
) {
1828 uint32_t Operation
= Inst
.getOperation();
1829 switch (Operation
) {
1830 case MCCFIInstruction::OpSameValue
:
1831 OS
<< "OpSameValue Reg" << Inst
.getRegister();
1833 case MCCFIInstruction::OpRememberState
:
1834 OS
<< "OpRememberState";
1836 case MCCFIInstruction::OpRestoreState
:
1837 OS
<< "OpRestoreState";
1839 case MCCFIInstruction::OpOffset
:
1840 OS
<< "OpOffset Reg" << Inst
.getRegister() << " " << Inst
.getOffset();
1842 case MCCFIInstruction::OpDefCfaRegister
:
1843 OS
<< "OpDefCfaRegister Reg" << Inst
.getRegister();
1845 case MCCFIInstruction::OpDefCfaOffset
:
1846 OS
<< "OpDefCfaOffset " << Inst
.getOffset();
1848 case MCCFIInstruction::OpDefCfa
:
1849 OS
<< "OpDefCfa Reg" << Inst
.getRegister() << " " << Inst
.getOffset();
1851 case MCCFIInstruction::OpRelOffset
:
1852 OS
<< "OpRelOffset Reg" << Inst
.getRegister() << " " << Inst
.getOffset();
1854 case MCCFIInstruction::OpAdjustCfaOffset
:
1855 OS
<< "OfAdjustCfaOffset " << Inst
.getOffset();
1857 case MCCFIInstruction::OpEscape
:
1860 case MCCFIInstruction::OpRestore
:
1861 OS
<< "OpRestore Reg" << Inst
.getRegister();
1863 case MCCFIInstruction::OpUndefined
:
1864 OS
<< "OpUndefined Reg" << Inst
.getRegister();
1866 case MCCFIInstruction::OpRegister
:
1867 OS
<< "OpRegister Reg" << Inst
.getRegister() << " Reg"
1868 << Inst
.getRegister2();
1870 case MCCFIInstruction::OpWindowSave
:
1871 OS
<< "OpWindowSave";
1873 case MCCFIInstruction::OpGnuArgsSize
:
1874 OS
<< "OpGnuArgsSize";
1877 OS
<< "Op#" << Operation
;
1882 MarkerSymType
BinaryContext::getMarkerType(const SymbolRef
&Symbol
) const {
1883 // For aarch64 and riscv, the ABI defines mapping symbols so we identify data
1884 // in the code section (see IHI0056B). $x identifies a symbol starting code or
1885 // the end of a data chunk inside code, $d identifies start of data.
1886 if (isX86() || ELFSymbolRef(Symbol
).getSize())
1887 return MarkerSymType::NONE
;
1889 Expected
<StringRef
> NameOrError
= Symbol
.getName();
1890 Expected
<object::SymbolRef::Type
> TypeOrError
= Symbol
.getType();
1892 if (!TypeOrError
|| !NameOrError
)
1893 return MarkerSymType::NONE
;
1895 if (*TypeOrError
!= SymbolRef::ST_Unknown
)
1896 return MarkerSymType::NONE
;
1898 if (*NameOrError
== "$x" || NameOrError
->starts_with("$x."))
1899 return MarkerSymType::CODE
;
1902 if (isRISCV() && NameOrError
->starts_with("$x"))
1903 return MarkerSymType::CODE
;
1905 if (*NameOrError
== "$d" || NameOrError
->starts_with("$d."))
1906 return MarkerSymType::DATA
;
1908 return MarkerSymType::NONE
;
1911 bool BinaryContext::isMarker(const SymbolRef
&Symbol
) const {
1912 return getMarkerType(Symbol
) != MarkerSymType::NONE
;
1915 static void printDebugInfo(raw_ostream
&OS
, const MCInst
&Instruction
,
1916 const BinaryFunction
*Function
,
1917 DWARFContext
*DwCtx
) {
1918 DebugLineTableRowRef RowRef
=
1919 DebugLineTableRowRef::fromSMLoc(Instruction
.getLoc());
1920 if (RowRef
== DebugLineTableRowRef::NULL_ROW
)
1923 const DWARFDebugLine::LineTable
*LineTable
;
1924 if (Function
&& Function
->getDWARFUnit() &&
1925 Function
->getDWARFUnit()->getOffset() == RowRef
.DwCompileUnitIndex
) {
1926 LineTable
= Function
->getDWARFLineTable();
1928 LineTable
= DwCtx
->getLineTableForUnit(
1929 DwCtx
->getCompileUnitForOffset(RowRef
.DwCompileUnitIndex
));
1931 assert(LineTable
&& "line table expected for instruction with debug info");
1933 const DWARFDebugLine::Row
&Row
= LineTable
->Rows
[RowRef
.RowIndex
- 1];
1934 StringRef FileName
= "";
1935 if (std::optional
<const char *> FName
=
1936 dwarf::toString(LineTable
->Prologue
.FileNames
[Row
.File
- 1].Name
))
1938 OS
<< " # debug line " << FileName
<< ":" << Row
.Line
;
1940 OS
<< ":" << Row
.Column
;
1941 if (Row
.Discriminator
)
1942 OS
<< " discriminator:" << Row
.Discriminator
;
1945 void BinaryContext::printInstruction(raw_ostream
&OS
, const MCInst
&Instruction
,
1947 const BinaryFunction
*Function
,
1948 bool PrintMCInst
, bool PrintMemData
,
1949 bool PrintRelocations
,
1950 StringRef Endl
) const {
1951 OS
<< format(" %08" PRIx64
": ", Offset
);
1952 if (MIB
->isCFI(Instruction
)) {
1953 uint32_t Offset
= Instruction
.getOperand(0).getImm();
1954 OS
<< "\t!CFI\t$" << Offset
<< "\t; ";
1956 printCFI(OS
, *Function
->getCFIFor(Instruction
));
1960 if (std::optional
<uint32_t> DynamicID
=
1961 MIB
->getDynamicBranchID(Instruction
)) {
1962 OS
<< "\tjit\t" << MIB
->getTargetSymbol(Instruction
)->getName()
1963 << " # ID: " << DynamicID
;
1965 // If there are annotations on the instruction, the MCInstPrinter will fail
1966 // to print the preferred alias as it only does so when the number of
1967 // operands is as expected. See
1968 // https://github.com/llvm/llvm-project/blob/782f1a0d895646c364a53f9dcdd6d4ec1f3e5ea0/llvm/lib/MC/MCInstPrinter.cpp#L142
1969 // Therefore, create a temporary copy of the Inst from which the annotations
1970 // are removed, and print that Inst.
1971 MCInst InstNoAnnot
= Instruction
;
1972 MIB
->stripAnnotations(InstNoAnnot
);
1973 InstPrinter
->printInst(&InstNoAnnot
, 0, "", *STI
, OS
);
1975 if (MIB
->isCall(Instruction
)) {
1976 if (MIB
->isTailCall(Instruction
))
1977 OS
<< " # TAILCALL ";
1978 if (MIB
->isInvoke(Instruction
)) {
1979 const std::optional
<MCPlus::MCLandingPad
> EHInfo
=
1980 MIB
->getEHInfo(Instruction
);
1981 OS
<< " # handler: ";
1983 OS
<< *EHInfo
->first
;
1986 OS
<< "; action: " << EHInfo
->second
;
1987 const int64_t GnuArgsSize
= MIB
->getGnuArgsSize(Instruction
);
1988 if (GnuArgsSize
>= 0)
1989 OS
<< "; GNU_args_size = " << GnuArgsSize
;
1991 } else if (MIB
->isIndirectBranch(Instruction
)) {
1992 if (uint64_t JTAddress
= MIB
->getJumpTable(Instruction
)) {
1993 OS
<< " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress
);
1995 OS
<< " # UNKNOWN CONTROL FLOW";
1998 if (std::optional
<uint32_t> Offset
= MIB
->getOffset(Instruction
))
1999 OS
<< " # Offset: " << *Offset
;
2000 if (std::optional
<uint32_t> Size
= MIB
->getSize(Instruction
))
2001 OS
<< " # Size: " << *Size
;
2002 if (MCSymbol
*Label
= MIB
->getInstLabel(Instruction
))
2003 OS
<< " # Label: " << *Label
;
2005 MIB
->printAnnotations(Instruction
, OS
);
2007 if (opts::PrintDebugInfo
)
2008 printDebugInfo(OS
, Instruction
, Function
, DwCtx
.get());
2010 if ((opts::PrintRelocations
|| PrintRelocations
) && Function
) {
2011 const uint64_t Size
= computeCodeSize(&Instruction
, &Instruction
+ 1);
2012 Function
->printRelocations(OS
, Offset
, Size
);
2018 Instruction
.dump_pretty(OS
, InstPrinter
.get());
2023 std::optional
<uint64_t>
2024 BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress
,
2025 uint64_t FileOffset
) const {
2026 // Find a segment with a matching file offset.
2027 for (auto &KV
: SegmentMapInfo
) {
2028 const SegmentInfo
&SegInfo
= KV
.second
;
2029 // Only consider executable segments.
2030 if (!SegInfo
.IsExecutable
)
2032 // FileOffset is got from perf event,
2033 // and it is equal to alignDown(SegInfo.FileOffset, pagesize).
2034 // If the pagesize is not equal to SegInfo.Alignment.
2035 // FileOffset and SegInfo.FileOffset should be aligned first,
2036 // and then judge whether they are equal.
2037 if (alignDown(SegInfo
.FileOffset
, SegInfo
.Alignment
) ==
2038 alignDown(FileOffset
, SegInfo
.Alignment
)) {
2039 // The function's offset from base address in VAS is aligned by pagesize
2040 // instead of SegInfo.Alignment. Pagesize can't be got from perf events.
2041 // However, The ELF document says that SegInfo.FileOffset should equal
2042 // to SegInfo.Address, modulo the pagesize.
2043 // Reference: https://refspecs.linuxfoundation.org/elf/elf.pdf
2045 // So alignDown(SegInfo.Address, pagesize) can be calculated by:
2046 // alignDown(SegInfo.Address, pagesize)
2047 // = SegInfo.Address - (SegInfo.Address % pagesize)
2048 // = SegInfo.Address - (SegInfo.FileOffset % pagesize)
2049 // = SegInfo.Address - SegInfo.FileOffset +
2050 // alignDown(SegInfo.FileOffset, pagesize)
2051 // = SegInfo.Address - SegInfo.FileOffset + FileOffset
2052 return MMapAddress
- (SegInfo
.Address
- SegInfo
.FileOffset
+ FileOffset
);
2056 return std::nullopt
;
2059 ErrorOr
<BinarySection
&> BinaryContext::getSectionForAddress(uint64_t Address
) {
2060 auto SI
= AddressToSection
.upper_bound(Address
);
2061 if (SI
!= AddressToSection
.begin()) {
2063 uint64_t UpperBound
= SI
->first
+ SI
->second
->getSize();
2064 if (!SI
->second
->getSize())
2066 if (UpperBound
> Address
)
2069 return std::make_error_code(std::errc::bad_address
);
2073 BinaryContext::getSectionNameForAddress(uint64_t Address
) const {
2074 if (ErrorOr
<const BinarySection
&> Section
= getSectionForAddress(Address
))
2075 return Section
->getName();
2076 return std::make_error_code(std::errc::bad_address
);
2079 BinarySection
&BinaryContext::registerSection(BinarySection
*Section
) {
2080 auto Res
= Sections
.insert(Section
);
2082 assert(Res
.second
&& "can't register the same section twice.");
2084 // Only register allocatable sections in the AddressToSection map.
2085 if (Section
->isAllocatable() && Section
->getAddress())
2086 AddressToSection
.insert(std::make_pair(Section
->getAddress(), Section
));
2087 NameToSection
.insert(
2088 std::make_pair(std::string(Section
->getName()), Section
));
2089 if (Section
->hasSectionRef())
2090 SectionRefToBinarySection
.insert(
2091 std::make_pair(Section
->getSectionRef(), Section
));
2093 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section
<< "\n");
2097 BinarySection
&BinaryContext::registerSection(SectionRef Section
) {
2098 return registerSection(new BinarySection(*this, Section
));
2102 BinaryContext::registerSection(const Twine
&SectionName
,
2103 const BinarySection
&OriginalSection
) {
2104 return registerSection(
2105 new BinarySection(*this, SectionName
, OriginalSection
));
2109 BinaryContext::registerOrUpdateSection(const Twine
&Name
, unsigned ELFType
,
2110 unsigned ELFFlags
, uint8_t *Data
,
2111 uint64_t Size
, unsigned Alignment
) {
2112 auto NamedSections
= getSectionByName(Name
);
2113 if (NamedSections
.begin() != NamedSections
.end()) {
2114 assert(std::next(NamedSections
.begin()) == NamedSections
.end() &&
2115 "can only update unique sections");
2116 BinarySection
*Section
= NamedSections
.begin()->second
;
2118 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section
<< " -> ");
2119 const bool Flag
= Section
->isAllocatable();
2121 Section
->update(Data
, Size
, Alignment
, ELFType
, ELFFlags
);
2122 LLVM_DEBUG(dbgs() << *Section
<< "\n");
2123 // FIXME: Fix section flags/attributes for MachO.
2125 assert(Flag
== Section
->isAllocatable() &&
2126 "can't change section allocation status");
2130 return registerSection(
2131 new BinarySection(*this, Name
, Data
, Size
, Alignment
, ELFType
, ELFFlags
));
2134 void BinaryContext::deregisterSectionName(const BinarySection
&Section
) {
2135 auto NameRange
= NameToSection
.equal_range(Section
.getName().str());
2136 while (NameRange
.first
!= NameRange
.second
) {
2137 if (NameRange
.first
->second
== &Section
) {
2138 NameToSection
.erase(NameRange
.first
);
2145 void BinaryContext::deregisterUnusedSections() {
2146 ErrorOr
<BinarySection
&> AbsSection
= getUniqueSectionByName("<absolute>");
2147 for (auto SI
= Sections
.begin(); SI
!= Sections
.end();) {
2148 BinarySection
*Section
= *SI
;
2149 // We check getOutputData() instead of getOutputSize() because sometimes
2150 // zero-sized .text.cold sections are allocated.
2151 if (Section
->hasSectionRef() || Section
->getOutputData() ||
2152 (AbsSection
&& Section
== &AbsSection
.get())) {
2157 LLVM_DEBUG(dbgs() << "LLVM-DEBUG: deregistering " << Section
->getName()
2159 deregisterSectionName(*Section
);
2160 SI
= Sections
.erase(SI
);
2165 bool BinaryContext::deregisterSection(BinarySection
&Section
) {
2166 BinarySection
*SectionPtr
= &Section
;
2167 auto Itr
= Sections
.find(SectionPtr
);
2168 if (Itr
!= Sections
.end()) {
2169 auto Range
= AddressToSection
.equal_range(SectionPtr
->getAddress());
2170 while (Range
.first
!= Range
.second
) {
2171 if (Range
.first
->second
== SectionPtr
) {
2172 AddressToSection
.erase(Range
.first
);
2178 deregisterSectionName(*SectionPtr
);
2179 Sections
.erase(Itr
);
2186 void BinaryContext::renameSection(BinarySection
&Section
,
2187 const Twine
&NewName
) {
2188 auto Itr
= Sections
.find(&Section
);
2189 assert(Itr
!= Sections
.end() && "Section must exist to be renamed.");
2190 Sections
.erase(Itr
);
2192 deregisterSectionName(Section
);
2194 Section
.Name
= NewName
.str();
2195 Section
.setOutputName(Section
.Name
);
2197 NameToSection
.insert(std::make_pair(Section
.Name
, &Section
));
2199 // Reinsert with the new name.
2200 Sections
.insert(&Section
);
2203 void BinaryContext::printSections(raw_ostream
&OS
) const {
2204 for (BinarySection
*const &Section
: Sections
)
2205 OS
<< "BOLT-INFO: " << *Section
<< "\n";
2208 BinarySection
&BinaryContext::absoluteSection() {
2209 if (ErrorOr
<BinarySection
&> Section
= getUniqueSectionByName("<absolute>"))
2211 return registerOrUpdateSection("<absolute>", ELF::SHT_NULL
, 0u);
2214 ErrorOr
<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address
,
2215 size_t Size
) const {
2216 const ErrorOr
<const BinarySection
&> Section
= getSectionForAddress(Address
);
2218 return std::make_error_code(std::errc::bad_address
);
2220 if (Section
->isVirtual())
2223 DataExtractor
DE(Section
->getContents(), AsmInfo
->isLittleEndian(),
2224 AsmInfo
->getCodePointerSize());
2225 auto ValueOffset
= static_cast<uint64_t>(Address
- Section
->getAddress());
2226 return DE
.getUnsigned(&ValueOffset
, Size
);
2229 ErrorOr
<int64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address
,
2230 size_t Size
) const {
2231 const ErrorOr
<const BinarySection
&> Section
= getSectionForAddress(Address
);
2233 return std::make_error_code(std::errc::bad_address
);
2235 if (Section
->isVirtual())
2238 DataExtractor
DE(Section
->getContents(), AsmInfo
->isLittleEndian(),
2239 AsmInfo
->getCodePointerSize());
2240 auto ValueOffset
= static_cast<uint64_t>(Address
- Section
->getAddress());
2241 return DE
.getSigned(&ValueOffset
, Size
);
2244 void BinaryContext::addRelocation(uint64_t Address
, MCSymbol
*Symbol
,
2245 uint64_t Type
, uint64_t Addend
,
2247 ErrorOr
<BinarySection
&> Section
= getSectionForAddress(Address
);
2248 assert(Section
&& "cannot find section for address");
2249 Section
->addRelocation(Address
- Section
->getAddress(), Symbol
, Type
, Addend
,
2253 void BinaryContext::addDynamicRelocation(uint64_t Address
, MCSymbol
*Symbol
,
2254 uint64_t Type
, uint64_t Addend
,
2256 ErrorOr
<BinarySection
&> Section
= getSectionForAddress(Address
);
2257 assert(Section
&& "cannot find section for address");
2258 Section
->addDynamicRelocation(Address
- Section
->getAddress(), Symbol
, Type
,
2262 bool BinaryContext::removeRelocationAt(uint64_t Address
) {
2263 ErrorOr
<BinarySection
&> Section
= getSectionForAddress(Address
);
2264 assert(Section
&& "cannot find section for address");
2265 return Section
->removeRelocationAt(Address
- Section
->getAddress());
2268 const Relocation
*BinaryContext::getRelocationAt(uint64_t Address
) const {
2269 ErrorOr
<const BinarySection
&> Section
= getSectionForAddress(Address
);
2273 return Section
->getRelocationAt(Address
- Section
->getAddress());
2277 BinaryContext::getDynamicRelocationAt(uint64_t Address
) const {
2278 ErrorOr
<const BinarySection
&> Section
= getSectionForAddress(Address
);
2282 return Section
->getDynamicRelocationAt(Address
- Section
->getAddress());
2285 void BinaryContext::markAmbiguousRelocations(BinaryData
&BD
,
2286 const uint64_t Address
) {
2287 auto setImmovable
= [&](BinaryData
&BD
) {
2288 BinaryData
*Root
= BD
.getAtomicRoot();
2289 LLVM_DEBUG(if (Root
->isMoveable()) {
2290 dbgs() << "BOLT-DEBUG: setting " << *Root
<< " as immovable "
2291 << "due to ambiguous relocation referencing 0x"
2292 << Twine::utohexstr(Address
) << '\n';
2294 Root
->setIsMoveable(false);
2297 if (Address
== BD
.getAddress()) {
2300 // Set previous symbol as immovable
2301 BinaryData
*Prev
= getBinaryDataContainingAddress(Address
- 1);
2302 if (Prev
&& Prev
->getEndAddress() == BD
.getAddress())
2303 setImmovable(*Prev
);
2306 if (Address
== BD
.getEndAddress()) {
2309 // Set next symbol as immovable
2310 BinaryData
*Next
= getBinaryDataContainingAddress(BD
.getEndAddress());
2311 if (Next
&& Next
->getAddress() == BD
.getEndAddress())
2312 setImmovable(*Next
);
2316 BinaryFunction
*BinaryContext::getFunctionForSymbol(const MCSymbol
*Symbol
,
2317 uint64_t *EntryDesc
) {
2318 std::shared_lock
<llvm::sys::RWMutex
> Lock(SymbolToFunctionMapMutex
);
2319 auto BFI
= SymbolToFunctionMap
.find(Symbol
);
2320 if (BFI
== SymbolToFunctionMap
.end())
2323 BinaryFunction
*BF
= BFI
->second
;
2325 *EntryDesc
= BF
->getEntryIDForSymbol(Symbol
);
2331 BinaryContext::generateBugReportMessage(StringRef Message
,
2332 const BinaryFunction
&Function
) const {
2334 raw_string_ostream
SS(Msg
);
2335 SS
<< "=======================================\n";
2336 SS
<< "BOLT is unable to proceed because it couldn't properly understand "
2338 SS
<< "If you are running the most recent version of BOLT, you may "
2340 "report this and paste this dump.\nPlease check that there is no "
2341 "sensitive contents being shared in this dump.\n";
2342 SS
<< "\nOffending function: " << Function
.getPrintName() << "\n\n";
2343 ScopedPrinter
SP(SS
);
2344 SP
.printBinaryBlock("Function contents", *Function
.getData());
2346 const_cast<BinaryFunction
&>(Function
).print(SS
, "");
2347 SS
<< "ERROR: " << Message
;
2348 SS
<< "\n=======================================\n";
2353 BinaryContext::createInjectedBinaryFunction(const std::string
&Name
,
2355 InjectedBinaryFunctions
.push_back(new BinaryFunction(Name
, *this, IsSimple
));
2356 BinaryFunction
*BF
= InjectedBinaryFunctions
.back();
2357 setSymbolToFunctionMap(BF
->getSymbol(), BF
);
2358 BF
->CurrentState
= BinaryFunction::State::CFG
;
2362 std::pair
<size_t, size_t>
2363 BinaryContext::calculateEmittedSize(BinaryFunction
&BF
, bool FixBranches
) {
2364 // Adjust branch instruction to match the current layout.
2368 // Create local MC context to isolate the effect of ephemeral code emission.
2369 IndependentCodeEmitter MCEInstance
= createIndependentMCCodeEmitter();
2370 MCContext
*LocalCtx
= MCEInstance
.LocalCtx
.get();
2372 TheTarget
->createMCAsmBackend(*STI
, *MRI
, MCTargetOptions());
2374 SmallString
<256> Code
;
2375 raw_svector_ostream
VecOS(Code
);
2377 std::unique_ptr
<MCObjectWriter
> OW
= MAB
->createObjectWriter(VecOS
);
2378 std::unique_ptr
<MCStreamer
> Streamer(TheTarget
->createMCObjectStreamer(
2379 *TheTriple
, *LocalCtx
, std::unique_ptr
<MCAsmBackend
>(MAB
), std::move(OW
),
2380 std::unique_ptr
<MCCodeEmitter
>(MCEInstance
.MCE
.release()), *STI
));
2382 Streamer
->initSections(false, *STI
);
2384 MCSection
*Section
= MCEInstance
.LocalMOFI
->getTextSection();
2385 Section
->setHasInstructions(true);
2387 // Create symbols in the LocalCtx so that they get destroyed with it.
2388 MCSymbol
*StartLabel
= LocalCtx
->createTempSymbol();
2389 MCSymbol
*EndLabel
= LocalCtx
->createTempSymbol();
2391 Streamer
->switchSection(Section
);
2392 Streamer
->emitLabel(StartLabel
);
2393 emitFunctionBody(*Streamer
, BF
, BF
.getLayout().getMainFragment(),
2394 /*EmitCodeOnly=*/true);
2395 Streamer
->emitLabel(EndLabel
);
2397 using LabelRange
= std::pair
<const MCSymbol
*, const MCSymbol
*>;
2398 SmallVector
<LabelRange
> SplitLabels
;
2399 for (FunctionFragment
&FF
: BF
.getLayout().getSplitFragments()) {
2400 MCSymbol
*const SplitStartLabel
= LocalCtx
->createTempSymbol();
2401 MCSymbol
*const SplitEndLabel
= LocalCtx
->createTempSymbol();
2402 SplitLabels
.emplace_back(SplitStartLabel
, SplitEndLabel
);
2404 MCSectionELF
*const SplitSection
= LocalCtx
->getELFSection(
2405 BF
.getCodeSectionName(FF
.getFragmentNum()), ELF::SHT_PROGBITS
,
2406 ELF::SHF_EXECINSTR
| ELF::SHF_ALLOC
);
2407 SplitSection
->setHasInstructions(true);
2408 Streamer
->switchSection(SplitSection
);
2410 Streamer
->emitLabel(SplitStartLabel
);
2411 emitFunctionBody(*Streamer
, BF
, FF
, /*EmitCodeOnly=*/true);
2412 Streamer
->emitLabel(SplitEndLabel
);
2415 MCAssembler
&Assembler
=
2416 static_cast<MCObjectStreamer
*>(Streamer
.get())->getAssembler();
2419 // Obtain fragment sizes.
2420 std::vector
<uint64_t> FragmentSizes
;
2421 // Main fragment size.
2422 const uint64_t HotSize
= Assembler
.getSymbolOffset(*EndLabel
) -
2423 Assembler
.getSymbolOffset(*StartLabel
);
2424 FragmentSizes
.push_back(HotSize
);
2425 // Split fragment sizes.
2426 uint64_t ColdSize
= 0;
2427 for (const auto &Labels
: SplitLabels
) {
2428 uint64_t Size
= Assembler
.getSymbolOffset(*Labels
.second
) -
2429 Assembler
.getSymbolOffset(*Labels
.first
);
2430 FragmentSizes
.push_back(Size
);
2434 // Populate new start and end offsets of each basic block.
2435 uint64_t FragmentIndex
= 0;
2436 for (FunctionFragment
&FF
: BF
.getLayout().fragments()) {
2437 BinaryBasicBlock
*PrevBB
= nullptr;
2438 for (BinaryBasicBlock
*BB
: FF
) {
2439 const uint64_t BBStartOffset
=
2440 Assembler
.getSymbolOffset(*(BB
->getLabel()));
2441 BB
->setOutputStartAddress(BBStartOffset
);
2443 PrevBB
->setOutputEndAddress(BBStartOffset
);
2447 PrevBB
->setOutputEndAddress(FragmentSizes
[FragmentIndex
]);
2451 // Clean-up the effect of the code emission.
2452 for (const MCSymbol
&Symbol
: Assembler
.symbols()) {
2453 MCSymbol
*MutableSymbol
= const_cast<MCSymbol
*>(&Symbol
);
2454 MutableSymbol
->setUndefined();
2455 MutableSymbol
->setIsRegistered(false);
2458 return std::make_pair(HotSize
, ColdSize
);
2461 bool BinaryContext::validateInstructionEncoding(
2462 ArrayRef
<uint8_t> InputSequence
) const {
2465 DisAsm
->getInstruction(Inst
, InstSize
, InputSequence
, 0, nulls());
2466 assert(InstSize
== InputSequence
.size() &&
2467 "Disassembled instruction size does not match the sequence.");
2469 SmallString
<256> Code
;
2470 SmallVector
<MCFixup
, 4> Fixups
;
2472 MCE
->encodeInstruction(Inst
, Code
, Fixups
, *STI
);
2473 auto OutputSequence
= ArrayRef
<uint8_t>((uint8_t *)Code
.data(), Code
.size());
2474 if (InputSequence
!= OutputSequence
) {
2475 if (opts::Verbosity
> 1) {
2476 this->errs() << "BOLT-WARNING: mismatched encoding detected\n"
2477 << " input: " << InputSequence
<< '\n'
2478 << " output: " << OutputSequence
<< '\n';
2486 uint64_t BinaryContext::getHotThreshold() const {
2487 static uint64_t Threshold
= 0;
2488 if (Threshold
== 0) {
2489 Threshold
= std::max(
2490 (uint64_t)opts::ExecutionCountThreshold
,
2491 NumProfiledFuncs
? SumExecutionCount
/ (2 * NumProfiledFuncs
) : 1);
2496 BinaryFunction
*BinaryContext::getBinaryFunctionContainingAddress(
2497 uint64_t Address
, bool CheckPastEnd
, bool UseMaxSize
) {
2498 auto FI
= BinaryFunctions
.upper_bound(Address
);
2499 if (FI
== BinaryFunctions
.begin())
2503 const uint64_t UsedSize
=
2504 UseMaxSize
? FI
->second
.getMaxSize() : FI
->second
.getSize();
2506 if (Address
>= FI
->first
+ UsedSize
+ (CheckPastEnd
? 1 : 0))
2512 BinaryFunction
*BinaryContext::getBinaryFunctionAtAddress(uint64_t Address
) {
2513 // First, try to find a function starting at the given address. If the
2514 // function was folded, this will get us the original folded function if it
2515 // wasn't removed from the list, e.g. in non-relocation mode.
2516 auto BFI
= BinaryFunctions
.find(Address
);
2517 if (BFI
!= BinaryFunctions
.end())
2518 return &BFI
->second
;
2520 // We might have folded the function matching the object at the given
2521 // address. In such case, we look for a function matching the symbol
2522 // registered at the original address. The new function (the one that the
2523 // original was folded into) will hold the symbol.
2524 if (const BinaryData
*BD
= getBinaryDataAtAddress(Address
)) {
2525 uint64_t EntryID
= 0;
2526 BinaryFunction
*BF
= getFunctionForSymbol(BD
->getSymbol(), &EntryID
);
2527 if (BF
&& EntryID
== 0)
2533 /// Deregister JumpTable registered at a given \p Address and delete it.
2534 void BinaryContext::deleteJumpTable(uint64_t Address
) {
2535 assert(JumpTables
.count(Address
) && "Must have a jump table at address");
2536 JumpTable
*JT
= JumpTables
.at(Address
);
2537 for (BinaryFunction
*Parent
: JT
->Parents
)
2538 Parent
->JumpTables
.erase(Address
);
2539 JumpTables
.erase(Address
);
2543 DebugAddressRangesVector
BinaryContext::translateModuleAddressRanges(
2544 const DWARFAddressRangesVector
&InputRanges
) const {
2545 DebugAddressRangesVector OutputRanges
;
2547 for (const DWARFAddressRange Range
: InputRanges
) {
2548 auto BFI
= BinaryFunctions
.lower_bound(Range
.LowPC
);
2549 while (BFI
!= BinaryFunctions
.end()) {
2550 const BinaryFunction
&Function
= BFI
->second
;
2551 if (Function
.getAddress() >= Range
.HighPC
)
2553 const DebugAddressRangesVector FunctionRanges
=
2554 Function
.getOutputAddressRanges();
2555 llvm::move(FunctionRanges
, std::back_inserter(OutputRanges
));
2556 std::advance(BFI
, 1);
2560 return OutputRanges
;