1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the BinaryContext class.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Core/BinaryContext.h"
14 #include "bolt/Core/BinaryEmitter.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "bolt/Utils/NameResolver.h"
18 #include "bolt/Utils/Utils.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
22 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
23 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
24 #include "llvm/MC/MCAsmLayout.h"
25 #include "llvm/MC/MCAssembler.h"
26 #include "llvm/MC/MCContext.h"
27 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
28 #include "llvm/MC/MCInstPrinter.h"
29 #include "llvm/MC/MCObjectStreamer.h"
30 #include "llvm/MC/MCObjectWriter.h"
31 #include "llvm/MC/MCRegisterInfo.h"
32 #include "llvm/MC/MCSectionELF.h"
33 #include "llvm/MC/MCStreamer.h"
34 #include "llvm/MC/MCSubtargetInfo.h"
35 #include "llvm/MC/MCSymbol.h"
36 #include "llvm/Support/CommandLine.h"
37 #include "llvm/Support/Error.h"
38 #include "llvm/Support/Regex.h"
43 #include <unordered_set>
48 #define DEBUG_TYPE "bolt"
52 cl::opt
<bool> NoHugePages("no-huge-pages",
53 cl::desc("use regular size pages for code alignment"),
54 cl::Hidden
, cl::cat(BoltCategory
));
57 PrintDebugInfo("print-debug-info",
58 cl::desc("print debug info when printing functions"),
61 cl::cat(BoltCategory
));
63 cl::opt
<bool> PrintRelocations(
65 cl::desc("print relocations when printing functions/objects"), cl::Hidden
,
66 cl::cat(BoltCategory
));
69 PrintMemData("print-mem-data",
70 cl::desc("print memory data annotations when printing functions"),
73 cl::cat(BoltCategory
));
80 BinaryContext::BinaryContext(std::unique_ptr
<MCContext
> Ctx
,
81 std::unique_ptr
<DWARFContext
> DwCtx
,
82 std::unique_ptr
<Triple
> TheTriple
,
83 const Target
*TheTarget
, std::string TripleName
,
84 std::unique_ptr
<MCCodeEmitter
> MCE
,
85 std::unique_ptr
<MCObjectFileInfo
> MOFI
,
86 std::unique_ptr
<const MCAsmInfo
> AsmInfo
,
87 std::unique_ptr
<const MCInstrInfo
> MII
,
88 std::unique_ptr
<const MCSubtargetInfo
> STI
,
89 std::unique_ptr
<MCInstPrinter
> InstPrinter
,
90 std::unique_ptr
<const MCInstrAnalysis
> MIA
,
91 std::unique_ptr
<MCPlusBuilder
> MIB
,
92 std::unique_ptr
<const MCRegisterInfo
> MRI
,
93 std::unique_ptr
<MCDisassembler
> DisAsm
)
94 : Ctx(std::move(Ctx
)), DwCtx(std::move(DwCtx
)),
95 TheTriple(std::move(TheTriple
)), TheTarget(TheTarget
),
96 TripleName(TripleName
), MCE(std::move(MCE
)), MOFI(std::move(MOFI
)),
97 AsmInfo(std::move(AsmInfo
)), MII(std::move(MII
)), STI(std::move(STI
)),
98 InstPrinter(std::move(InstPrinter
)), MIA(std::move(MIA
)),
99 MIB(std::move(MIB
)), MRI(std::move(MRI
)), DisAsm(std::move(DisAsm
)) {
100 Relocation::Arch
= this->TheTriple
->getArch();
101 RegularPageSize
= isAArch64() ? RegularPageSizeAArch64
: RegularPageSizeX86
;
102 PageAlign
= opts::NoHugePages
? RegularPageSize
: HugePageSize
;
105 BinaryContext::~BinaryContext() {
106 for (BinarySection
*Section
: Sections
)
108 for (BinaryFunction
*InjectedFunction
: InjectedBinaryFunctions
)
109 delete InjectedFunction
;
110 for (std::pair
<const uint64_t, JumpTable
*> JTI
: JumpTables
)
115 /// Create BinaryContext for a given architecture \p ArchName and
116 /// triple \p TripleName.
117 Expected
<std::unique_ptr
<BinaryContext
>>
118 BinaryContext::createBinaryContext(const ObjectFile
*File
, bool IsPIC
,
119 std::unique_ptr
<DWARFContext
> DwCtx
) {
120 StringRef ArchName
= "";
121 std::string FeaturesStr
= "";
122 switch (File
->getArch()) {
123 case llvm::Triple::x86_64
:
125 FeaturesStr
= "+nopl";
127 case llvm::Triple::aarch64
:
128 ArchName
= "aarch64";
129 FeaturesStr
= "+all";
131 case llvm::Triple::riscv64
: {
132 ArchName
= "riscv64";
133 Expected
<SubtargetFeatures
> Features
= File
->getFeatures();
135 if (auto E
= Features
.takeError())
138 // We rely on relaxation for some transformations (e.g., promoting all calls
139 // to PseudoCALL and then making JITLink relax them). Since the relax
140 // feature is not stored in the object file, we manually enable it.
141 Features
->AddFeature("relax");
142 FeaturesStr
= Features
->getString();
146 return createStringError(std::errc::not_supported
,
147 "BOLT-ERROR: Unrecognized machine in ELF file");
150 auto TheTriple
= std::make_unique
<Triple
>(File
->makeTriple());
151 const std::string TripleName
= TheTriple
->str();
154 const Target
*TheTarget
=
155 TargetRegistry::lookupTarget(std::string(ArchName
), *TheTriple
, Error
);
157 return createStringError(make_error_code(std::errc::not_supported
),
158 Twine("BOLT-ERROR: ", Error
));
160 std::unique_ptr
<const MCRegisterInfo
> MRI(
161 TheTarget
->createMCRegInfo(TripleName
));
163 return createStringError(
164 make_error_code(std::errc::not_supported
),
165 Twine("BOLT-ERROR: no register info for target ", TripleName
));
167 // Set up disassembler.
168 std::unique_ptr
<MCAsmInfo
> AsmInfo(
169 TheTarget
->createMCAsmInfo(*MRI
, TripleName
, MCTargetOptions()));
171 return createStringError(
172 make_error_code(std::errc::not_supported
),
173 Twine("BOLT-ERROR: no assembly info for target ", TripleName
));
174 // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump
175 // we want to emit such names as using @PLT without double quotes to convey
176 // variant kind to the assembler. BOLT doesn't rely on the linker so we can
177 // override the default AsmInfo behavior to emit names the way we want.
178 AsmInfo
->setAllowAtInName(true);
180 std::unique_ptr
<const MCSubtargetInfo
> STI(
181 TheTarget
->createMCSubtargetInfo(TripleName
, "", FeaturesStr
));
183 return createStringError(
184 make_error_code(std::errc::not_supported
),
185 Twine("BOLT-ERROR: no subtarget info for target ", TripleName
));
187 std::unique_ptr
<const MCInstrInfo
> MII(TheTarget
->createMCInstrInfo());
189 return createStringError(
190 make_error_code(std::errc::not_supported
),
191 Twine("BOLT-ERROR: no instruction info for target ", TripleName
));
193 std::unique_ptr
<MCContext
> Ctx(
194 new MCContext(*TheTriple
, AsmInfo
.get(), MRI
.get(), STI
.get()));
195 std::unique_ptr
<MCObjectFileInfo
> MOFI(
196 TheTarget
->createMCObjectFileInfo(*Ctx
, IsPIC
));
197 Ctx
->setObjectFileInfo(MOFI
.get());
198 // We do not support X86 Large code model. Change this in the future.
200 if (TheTriple
->getArch() == llvm::Triple::aarch64
)
202 unsigned LSDAEncoding
=
203 Large
? dwarf::DW_EH_PE_absptr
: dwarf::DW_EH_PE_udata4
;
205 LSDAEncoding
= dwarf::DW_EH_PE_pcrel
|
206 (Large
? dwarf::DW_EH_PE_sdata8
: dwarf::DW_EH_PE_sdata4
);
209 std::unique_ptr
<MCDisassembler
> DisAsm(
210 TheTarget
->createMCDisassembler(*STI
, *Ctx
));
213 return createStringError(
214 make_error_code(std::errc::not_supported
),
215 Twine("BOLT-ERROR: no disassembler info for target ", TripleName
));
217 std::unique_ptr
<const MCInstrAnalysis
> MIA(
218 TheTarget
->createMCInstrAnalysis(MII
.get()));
220 return createStringError(
221 make_error_code(std::errc::not_supported
),
222 Twine("BOLT-ERROR: failed to create instruction analysis for target ",
225 int AsmPrinterVariant
= AsmInfo
->getAssemblerDialect();
226 std::unique_ptr
<MCInstPrinter
> InstructionPrinter(
227 TheTarget
->createMCInstPrinter(*TheTriple
, AsmPrinterVariant
, *AsmInfo
,
229 if (!InstructionPrinter
)
230 return createStringError(
231 make_error_code(std::errc::not_supported
),
232 Twine("BOLT-ERROR: no instruction printer for target ", TripleName
));
233 InstructionPrinter
->setPrintImmHex(true);
235 std::unique_ptr
<MCCodeEmitter
> MCE(
236 TheTarget
->createMCCodeEmitter(*MII
, *Ctx
));
238 // Make sure we don't miss any output on core dumps.
239 outs().SetUnbuffered();
240 errs().SetUnbuffered();
241 dbgs().SetUnbuffered();
243 auto BC
= std::make_unique
<BinaryContext
>(
244 std::move(Ctx
), std::move(DwCtx
), std::move(TheTriple
), TheTarget
,
245 std::string(TripleName
), std::move(MCE
), std::move(MOFI
),
246 std::move(AsmInfo
), std::move(MII
), std::move(STI
),
247 std::move(InstructionPrinter
), std::move(MIA
), nullptr, std::move(MRI
),
250 BC
->LSDAEncoding
= LSDAEncoding
;
252 BC
->MAB
= std::unique_ptr
<MCAsmBackend
>(
253 BC
->TheTarget
->createMCAsmBackend(*BC
->STI
, *BC
->MRI
, MCTargetOptions()));
255 BC
->setFilename(File
->getFileName());
257 BC
->HasFixedLoadAddress
= !IsPIC
;
259 BC
->SymbolicDisAsm
= std::unique_ptr
<MCDisassembler
>(
260 BC
->TheTarget
->createMCDisassembler(*BC
->STI
, *BC
->Ctx
));
262 if (!BC
->SymbolicDisAsm
)
263 return createStringError(
264 make_error_code(std::errc::not_supported
),
265 Twine("BOLT-ERROR: no disassembler info for target ", TripleName
));
267 return std::move(BC
);
270 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName
) const {
272 (SymbolName
== "__hot_start" || SymbolName
== "__hot_end"))
276 (SymbolName
== "__hot_data_start" || SymbolName
== "__hot_data_end"))
279 if (SymbolName
== "_end")
285 std::unique_ptr
<MCObjectWriter
>
286 BinaryContext::createObjectWriter(raw_pwrite_stream
&OS
) {
287 return MAB
->createObjectWriter(OS
);
290 bool BinaryContext::validateObjectNesting() const {
291 auto Itr
= BinaryDataMap
.begin();
292 auto End
= BinaryDataMap
.end();
295 auto Next
= std::next(Itr
);
296 while (Next
!= End
&&
297 Itr
->second
->getSection() == Next
->second
->getSection() &&
298 Itr
->second
->containsRange(Next
->second
->getAddress(),
299 Next
->second
->getSize())) {
300 if (Next
->second
->Parent
!= Itr
->second
) {
301 errs() << "BOLT-WARNING: object nesting incorrect for:\n"
302 << "BOLT-WARNING: " << *Itr
->second
<< "\n"
303 << "BOLT-WARNING: " << *Next
->second
<< "\n";
313 bool BinaryContext::validateHoles() const {
315 for (BinarySection
&Section
: sections()) {
316 for (const Relocation
&Rel
: Section
.relocations()) {
317 uint64_t RelAddr
= Rel
.Offset
+ Section
.getAddress();
318 const BinaryData
*BD
= getBinaryDataContainingAddress(RelAddr
);
320 errs() << "BOLT-WARNING: no BinaryData found for relocation at address"
321 << " 0x" << Twine::utohexstr(RelAddr
) << " in "
322 << Section
.getName() << "\n";
324 } else if (!BD
->getAtomicRoot()) {
325 errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at "
326 << "address 0x" << Twine::utohexstr(RelAddr
) << " in "
327 << Section
.getName() << "\n";
335 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI
) {
336 const uint64_t Address
= GAI
->second
->getAddress();
337 const uint64_t Size
= GAI
->second
->getSize();
339 auto fixParents
= [&](BinaryDataMapType::iterator Itr
,
340 BinaryData
*NewParent
) {
341 BinaryData
*OldParent
= Itr
->second
->Parent
;
342 Itr
->second
->Parent
= NewParent
;
344 while (Itr
!= BinaryDataMap
.end() && OldParent
&&
345 Itr
->second
->Parent
== OldParent
) {
346 Itr
->second
->Parent
= NewParent
;
351 // Check if the previous symbol contains the newly added symbol.
352 if (GAI
!= BinaryDataMap
.begin()) {
353 BinaryData
*Prev
= std::prev(GAI
)->second
;
355 if (Prev
->getSection() == GAI
->second
->getSection() &&
356 Prev
->containsRange(Address
, Size
)) {
357 fixParents(GAI
, Prev
);
359 fixParents(GAI
, nullptr);
365 // Check if the newly added symbol contains any subsequent symbols.
367 BinaryData
*BD
= GAI
->second
->Parent
? GAI
->second
->Parent
: GAI
->second
;
368 auto Itr
= std::next(GAI
);
370 Itr
!= BinaryDataMap
.end() &&
371 BD
->containsRange(Itr
->second
->getAddress(), Itr
->second
->getSize())) {
372 Itr
->second
->Parent
= BD
;
378 iterator_range
<BinaryContext::binary_data_iterator
>
379 BinaryContext::getSubBinaryData(BinaryData
*BD
) {
380 auto Start
= std::next(BinaryDataMap
.find(BD
->getAddress()));
382 while (End
!= BinaryDataMap
.end() && BD
->isAncestorOf(End
->second
))
384 return make_range(Start
, End
);
387 std::pair
<const MCSymbol
*, uint64_t>
388 BinaryContext::handleAddressRef(uint64_t Address
, BinaryFunction
&BF
,
391 // Check if this is an access to a constant island and create bookkeeping
392 // to keep track of it and emit it later as part of this function.
393 if (MCSymbol
*IslandSym
= BF
.getOrCreateIslandAccess(Address
))
394 return std::make_pair(IslandSym
, 0);
396 // Detect custom code written in assembly that refers to arbitrary
397 // constant islands from other functions. Write this reference so we
398 // can pull this constant island and emit it as part of this function
400 auto IslandIter
= AddressToConstantIslandMap
.lower_bound(Address
);
402 if (IslandIter
!= AddressToConstantIslandMap
.begin() &&
403 (IslandIter
== AddressToConstantIslandMap
.end() ||
404 IslandIter
->first
> Address
))
407 if (IslandIter
!= AddressToConstantIslandMap
.end()) {
408 // Fall-back to referencing the original constant island in the presence
409 // of dynamic relocs, as we currently do not support cloning them.
410 // Notice: we might fail to link because of this, if the original constant
411 // island we are referring would be emitted too far away.
412 if (IslandIter
->second
->hasDynamicRelocationAtIsland()) {
413 MCSymbol
*IslandSym
=
414 IslandIter
->second
->getOrCreateIslandAccess(Address
);
416 return std::make_pair(IslandSym
, 0);
417 } else if (MCSymbol
*IslandSym
=
418 IslandIter
->second
->getOrCreateProxyIslandAccess(Address
,
420 BF
.createIslandDependency(IslandSym
, IslandIter
->second
);
421 return std::make_pair(IslandSym
, 0);
426 // Note that the address does not necessarily have to reside inside
427 // a section, it could be an absolute address too.
428 ErrorOr
<BinarySection
&> Section
= getSectionForAddress(Address
);
429 if (Section
&& Section
->isText()) {
430 if (BF
.containsAddress(Address
, /*UseMaxSize=*/isAArch64())) {
431 if (Address
!= BF
.getAddress()) {
432 // The address could potentially escape. Mark it as another entry
433 // point into the function.
434 if (opts::Verbosity
>= 1) {
435 outs() << "BOLT-INFO: potentially escaped address 0x"
436 << Twine::utohexstr(Address
) << " in function " << BF
<< '\n';
438 BF
.HasInternalLabelReference
= true;
439 return std::make_pair(
440 BF
.addEntryPointAtOffset(Address
- BF
.getAddress()), 0);
443 addInterproceduralReference(&BF
, Address
);
447 // With relocations, catch jump table references outside of the basic block
448 // containing the indirect jump.
449 if (HasRelocations
) {
450 const MemoryContentsType MemType
= analyzeMemoryAt(Address
, BF
);
451 if (MemType
== MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE
&& IsPCRel
) {
452 const MCSymbol
*Symbol
=
453 getOrCreateJumpTable(BF
, Address
, JumpTable::JTT_PIC
);
455 return std::make_pair(Symbol
, 0);
459 if (BinaryData
*BD
= getBinaryDataContainingAddress(Address
))
460 return std::make_pair(BD
->getSymbol(), Address
- BD
->getAddress());
462 // TODO: use DWARF info to get size/alignment here?
463 MCSymbol
*TargetSymbol
= getOrCreateGlobalSymbol(Address
, "DATAat");
464 LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol
->getName() << '\n');
465 return std::make_pair(TargetSymbol
, 0);
468 MemoryContentsType
BinaryContext::analyzeMemoryAt(uint64_t Address
,
469 BinaryFunction
&BF
) {
471 return MemoryContentsType::UNKNOWN
;
473 ErrorOr
<BinarySection
&> Section
= getSectionForAddress(Address
);
475 // No section - possibly an absolute address. Since we don't allow
476 // internal function addresses to escape the function scope - we
477 // consider it a tail call.
478 if (opts::Verbosity
> 1) {
479 errs() << "BOLT-WARNING: no section for address 0x"
480 << Twine::utohexstr(Address
) << " referenced from function " << BF
483 return MemoryContentsType::UNKNOWN
;
486 if (Section
->isVirtual()) {
487 // The contents are filled at runtime.
488 return MemoryContentsType::UNKNOWN
;
491 // No support for jump tables in code yet.
492 if (Section
->isText())
493 return MemoryContentsType::UNKNOWN
;
495 // Start with checking for PIC jump table. We expect non-PIC jump tables
496 // to have high 32 bits set to 0.
497 if (analyzeJumpTable(Address
, JumpTable::JTT_PIC
, BF
))
498 return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE
;
500 if (analyzeJumpTable(Address
, JumpTable::JTT_NORMAL
, BF
))
501 return MemoryContentsType::POSSIBLE_JUMP_TABLE
;
503 return MemoryContentsType::UNKNOWN
;
506 bool BinaryContext::analyzeJumpTable(const uint64_t Address
,
507 const JumpTable::JumpTableType Type
,
508 const BinaryFunction
&BF
,
509 const uint64_t NextJTAddress
,
510 JumpTable::AddressesType
*EntriesAsAddress
,
511 bool *HasEntryInFragment
) const {
512 // Is one of the targets __builtin_unreachable?
513 bool HasUnreachable
= false;
515 // Does one of the entries match function start address?
516 bool HasStartAsEntry
= false;
518 // Number of targets other than __builtin_unreachable.
519 uint64_t NumRealEntries
= 0;
521 auto addEntryAddress
= [&](uint64_t EntryAddress
) {
522 if (EntriesAsAddress
)
523 EntriesAsAddress
->emplace_back(EntryAddress
);
526 ErrorOr
<const BinarySection
&> Section
= getSectionForAddress(Address
);
530 // The upper bound is defined by containing object, section limits, and
531 // the next jump table in memory.
532 uint64_t UpperBound
= Section
->getEndAddress();
533 const BinaryData
*JumpTableBD
= getBinaryDataAtAddress(Address
);
534 if (JumpTableBD
&& JumpTableBD
->getSize()) {
535 assert(JumpTableBD
->getEndAddress() <= UpperBound
&&
536 "data object cannot cross a section boundary");
537 UpperBound
= JumpTableBD
->getEndAddress();
540 UpperBound
= std::min(NextJTAddress
, UpperBound
);
543 using JTT
= JumpTable::JumpTableType
;
544 dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n",
545 Address
, BF
.getPrintName(),
546 Type
== JTT::JTT_PIC
? "PIC" : "Normal");
548 const uint64_t EntrySize
= getJumpTableEntrySize(Type
);
549 for (uint64_t EntryAddress
= Address
; EntryAddress
<= UpperBound
- EntrySize
;
550 EntryAddress
+= EntrySize
) {
551 LLVM_DEBUG(dbgs() << " * Checking 0x" << Twine::utohexstr(EntryAddress
)
553 // Check if there's a proper relocation against the jump table entry.
554 if (HasRelocations
) {
555 if (Type
== JumpTable::JTT_PIC
&&
556 !DataPCRelocations
.count(EntryAddress
)) {
558 dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n");
561 if (Type
== JumpTable::JTT_NORMAL
&& !getRelocationAt(EntryAddress
)) {
564 << "FAIL: JTT_NORMAL table, no relocation for this address\n");
569 const uint64_t Value
=
570 (Type
== JumpTable::JTT_PIC
)
571 ? Address
+ *getSignedValueAtAddress(EntryAddress
, EntrySize
)
572 : *getPointerAtAddress(EntryAddress
);
574 // __builtin_unreachable() case.
575 if (Value
== BF
.getAddress() + BF
.getSize()) {
576 addEntryAddress(Value
);
577 HasUnreachable
= true;
578 LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value
));
582 // Function start is another special case. It is allowed in the jump table,
583 // but we need at least one another regular entry to distinguish the table
584 // from, e.g. a function pointer array.
585 if (Value
== BF
.getAddress()) {
586 HasStartAsEntry
= true;
587 addEntryAddress(Value
);
591 // Function or one of its fragments.
592 const BinaryFunction
*TargetBF
= getBinaryFunctionContainingAddress(Value
);
593 const bool DoesBelongToFunction
=
594 BF
.containsAddress(Value
) ||
595 (TargetBF
&& TargetBF
->isParentOrChildOf(BF
));
596 if (!DoesBelongToFunction
) {
598 if (!BF
.containsAddress(Value
)) {
599 dbgs() << "FAIL: function doesn't contain this address\n";
601 dbgs() << " ! function containing this address: "
602 << TargetBF
->getPrintName() << '\n';
603 if (TargetBF
->isFragment()) {
604 dbgs() << " ! is a fragment";
605 for (BinaryFunction
*Parent
: TargetBF
->ParentFragments
)
606 dbgs() << ", parent: " << Parent
->getPrintName();
615 // Check there's an instruction at this offset.
616 if (TargetBF
->getState() == BinaryFunction::State::Disassembled
&&
617 !TargetBF
->getInstructionAtOffset(Value
- TargetBF
->getAddress())) {
618 LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value
));
623 LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value
));
625 if (TargetBF
!= &BF
&& HasEntryInFragment
)
626 *HasEntryInFragment
= true;
627 addEntryAddress(Value
);
630 // It's a jump table if the number of real entries is more than 1, or there's
631 // one real entry and one or more special targets. If there are only multiple
632 // special targets, then it's not a jump table.
633 return NumRealEntries
+ (HasUnreachable
|| HasStartAsEntry
) >= 2;
636 void BinaryContext::populateJumpTables() {
637 LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations
.size()
639 for (auto JTI
= JumpTables
.begin(), JTE
= JumpTables
.end(); JTI
!= JTE
;
641 JumpTable
*JT
= JTI
->second
;
643 bool NonSimpleParent
= false;
644 for (BinaryFunction
*BF
: JT
->Parents
)
645 NonSimpleParent
|= !BF
->isSimple();
649 uint64_t NextJTAddress
= 0;
650 auto NextJTI
= std::next(JTI
);
652 NextJTAddress
= NextJTI
->second
->getAddress();
655 analyzeJumpTable(JT
->getAddress(), JT
->Type
, *(JT
->Parents
[0]),
656 NextJTAddress
, &JT
->EntriesAsAddress
, &JT
->IsSplit
);
659 dbgs() << "failed to analyze ";
661 if (NextJTI
!= JTE
) {
663 NextJTI
->second
->print(dbgs());
666 llvm_unreachable("jump table heuristic failure");
668 for (BinaryFunction
*Frag
: JT
->Parents
) {
670 Frag
->setHasIndirectTargetToSplitFragment(true);
671 for (uint64_t EntryAddress
: JT
->EntriesAsAddress
)
672 // if target is builtin_unreachable
673 if (EntryAddress
== Frag
->getAddress() + Frag
->getSize()) {
674 Frag
->IgnoredBranches
.emplace_back(EntryAddress
- Frag
->getAddress(),
676 } else if (EntryAddress
>= Frag
->getAddress() &&
677 EntryAddress
< Frag
->getAddress() + Frag
->getSize()) {
678 Frag
->registerReferencedOffset(EntryAddress
- Frag
->getAddress());
682 // In strict mode, erase PC-relative relocation record. Later we check that
683 // all such records are erased and thus have been accounted for.
684 if (opts::StrictMode
&& JT
->Type
== JumpTable::JTT_PIC
) {
685 for (uint64_t Address
= JT
->getAddress();
686 Address
< JT
->getAddress() + JT
->getSize();
687 Address
+= JT
->EntrySize
) {
688 DataPCRelocations
.erase(DataPCRelocations
.find(Address
));
692 // Mark to skip the function and all its fragments.
693 for (BinaryFunction
*Frag
: JT
->Parents
)
694 if (Frag
->hasIndirectTargetToSplitFragment())
695 addFragmentsToSkip(Frag
);
698 if (opts::StrictMode
&& DataPCRelocations
.size()) {
700 dbgs() << DataPCRelocations
.size()
701 << " unclaimed PC-relative relocations left in data:\n";
702 for (uint64_t Reloc
: DataPCRelocations
)
703 dbgs() << Twine::utohexstr(Reloc
) << '\n';
705 assert(0 && "unclaimed PC-relative relocations left in data\n");
707 clearList(DataPCRelocations
);
710 void BinaryContext::skipMarkedFragments() {
711 std::vector
<BinaryFunction
*> FragmentQueue
;
712 // Copy the functions to FragmentQueue.
713 FragmentQueue
.assign(FragmentsToSkip
.begin(), FragmentsToSkip
.end());
714 auto addToWorklist
= [&](BinaryFunction
*Function
) -> void {
715 if (FragmentsToSkip
.count(Function
))
717 FragmentQueue
.push_back(Function
);
718 addFragmentsToSkip(Function
);
720 // Functions containing split jump tables need to be skipped with all
721 // fragments (transitively).
722 for (size_t I
= 0; I
!= FragmentQueue
.size(); I
++) {
723 BinaryFunction
*BF
= FragmentQueue
[I
];
724 assert(FragmentsToSkip
.count(BF
) &&
725 "internal error in traversing function fragments");
726 if (opts::Verbosity
>= 1)
727 errs() << "BOLT-WARNING: Ignoring " << BF
->getPrintName() << '\n';
728 BF
->setSimple(false);
729 BF
->setHasIndirectTargetToSplitFragment(true);
731 llvm::for_each(BF
->Fragments
, addToWorklist
);
732 llvm::for_each(BF
->ParentFragments
, addToWorklist
);
734 if (!FragmentsToSkip
.empty())
735 errs() << "BOLT-WARNING: skipped " << FragmentsToSkip
.size() << " function"
736 << (FragmentsToSkip
.size() == 1 ? "" : "s")
737 << " due to cold fragments\n";
740 MCSymbol
*BinaryContext::getOrCreateGlobalSymbol(uint64_t Address
, Twine Prefix
,
744 auto Itr
= BinaryDataMap
.find(Address
);
745 if (Itr
!= BinaryDataMap
.end()) {
746 assert(Itr
->second
->getSize() == Size
|| !Size
);
747 return Itr
->second
->getSymbol();
750 std::string Name
= (Prefix
+ "0x" + Twine::utohexstr(Address
)).str();
751 assert(!GlobalSymbols
.count(Name
) && "created name is not unique");
752 return registerNameAtAddress(Name
, Address
, Size
, Alignment
, Flags
);
755 MCSymbol
*BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name
) {
756 return Ctx
->getOrCreateSymbol(Name
);
759 BinaryFunction
*BinaryContext::createBinaryFunction(
760 const std::string
&Name
, BinarySection
&Section
, uint64_t Address
,
761 uint64_t Size
, uint64_t SymbolSize
, uint16_t Alignment
) {
762 auto Result
= BinaryFunctions
.emplace(
763 Address
, BinaryFunction(Name
, Section
, Address
, Size
, *this));
764 assert(Result
.second
== true && "unexpected duplicate function");
765 BinaryFunction
*BF
= &Result
.first
->second
;
766 registerNameAtAddress(Name
, Address
, SymbolSize
? SymbolSize
: Size
,
768 setSymbolToFunctionMap(BF
->getSymbol(), BF
);
773 BinaryContext::getOrCreateJumpTable(BinaryFunction
&Function
, uint64_t Address
,
774 JumpTable::JumpTableType Type
) {
775 // Two fragments of same function access same jump table
776 if (JumpTable
*JT
= getJumpTableContainingAddress(Address
)) {
777 assert(JT
->Type
== Type
&& "jump table types have to match");
778 assert(Address
== JT
->getAddress() && "unexpected non-empty jump table");
780 // Prevent associating a jump table to a specific fragment twice.
781 // This simple check arises from the assumption: no more than 2 fragments.
782 if (JT
->Parents
.size() == 1 && JT
->Parents
[0] != &Function
) {
783 assert(JT
->Parents
[0]->isParentOrChildOf(Function
) &&
784 "cannot re-use jump table of a different function");
785 // Duplicate the entry for the parent function for easy access
786 JT
->Parents
.push_back(&Function
);
787 if (opts::Verbosity
> 2) {
788 outs() << "BOLT-INFO: Multiple fragments access same jump table: "
789 << JT
->Parents
[0]->getPrintName() << "; "
790 << Function
.getPrintName() << "\n";
793 Function
.JumpTables
.emplace(Address
, JT
);
794 JT
->Parents
[0]->setHasIndirectTargetToSplitFragment(true);
795 JT
->Parents
[1]->setHasIndirectTargetToSplitFragment(true);
798 bool IsJumpTableParent
= false;
799 (void)IsJumpTableParent
;
800 for (BinaryFunction
*Frag
: JT
->Parents
)
801 if (Frag
== &Function
)
802 IsJumpTableParent
= true;
803 assert(IsJumpTableParent
&&
804 "cannot re-use jump table of a different function");
805 return JT
->getFirstLabel();
808 // Re-use the existing symbol if possible.
809 MCSymbol
*JTLabel
= nullptr;
810 if (BinaryData
*Object
= getBinaryDataAtAddress(Address
)) {
811 if (!isInternalSymbolName(Object
->getSymbol()->getName()))
812 JTLabel
= Object
->getSymbol();
815 const uint64_t EntrySize
= getJumpTableEntrySize(Type
);
817 const std::string JumpTableName
= generateJumpTableName(Function
, Address
);
818 JTLabel
= registerNameAtAddress(JumpTableName
, Address
, 0, EntrySize
);
821 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel
->getName()
822 << " in function " << Function
<< '\n');
824 JumpTable
*JT
= new JumpTable(*JTLabel
, Address
, EntrySize
, Type
,
825 JumpTable::LabelMapType
{{0, JTLabel
}},
826 *getSectionForAddress(Address
));
827 JT
->Parents
.push_back(&Function
);
828 if (opts::Verbosity
> 2)
830 JumpTables
.emplace(Address
, JT
);
832 // Duplicate the entry for the parent function for easy access.
833 Function
.JumpTables
.emplace(Address
, JT
);
837 std::pair
<uint64_t, const MCSymbol
*>
838 BinaryContext::duplicateJumpTable(BinaryFunction
&Function
, JumpTable
*JT
,
839 const MCSymbol
*OldLabel
) {
840 auto L
= scopeLock();
843 for (std::pair
<const unsigned, MCSymbol
*> Elmt
: JT
->Labels
) {
844 if (Elmt
.second
!= OldLabel
)
850 assert(Found
&& "Label not found");
852 MCSymbol
*NewLabel
= Ctx
->createNamedTempSymbol("duplicatedJT");
854 new JumpTable(*NewLabel
, JT
->getAddress(), JT
->EntrySize
, JT
->Type
,
855 JumpTable::LabelMapType
{{Offset
, NewLabel
}},
856 *getSectionForAddress(JT
->getAddress()));
857 NewJT
->Parents
= JT
->Parents
;
858 NewJT
->Entries
= JT
->Entries
;
859 NewJT
->Counts
= JT
->Counts
;
860 uint64_t JumpTableID
= ++DuplicatedJumpTables
;
861 // Invert it to differentiate from regular jump tables whose IDs are their
862 // addresses in the input binary memory space
863 JumpTableID
= ~JumpTableID
;
864 JumpTables
.emplace(JumpTableID
, NewJT
);
865 Function
.JumpTables
.emplace(JumpTableID
, NewJT
);
866 return std::make_pair(JumpTableID
, NewLabel
);
869 std::string
BinaryContext::generateJumpTableName(const BinaryFunction
&BF
,
873 if (const JumpTable
*JT
= BF
.getJumpTableContainingAddress(Address
)) {
874 Offset
= Address
- JT
->getAddress();
875 auto Itr
= JT
->Labels
.find(Offset
);
876 if (Itr
!= JT
->Labels
.end())
877 return std::string(Itr
->second
->getName());
878 Id
= JumpTableIds
.at(JT
->getAddress());
880 Id
= JumpTableIds
[Address
] = BF
.JumpTables
.size();
882 return ("JUMP_TABLE/" + BF
.getOneName().str() + "." + std::to_string(Id
) +
883 (Offset
? ("." + std::to_string(Offset
)) : ""));
886 bool BinaryContext::hasValidCodePadding(const BinaryFunction
&BF
) {
887 // FIXME: aarch64 support is missing.
891 if (BF
.getSize() == BF
.getMaxSize())
894 ErrorOr
<ArrayRef
<unsigned char>> FunctionData
= BF
.getData();
895 assert(FunctionData
&& "cannot get function as data");
897 uint64_t Offset
= BF
.getSize();
899 uint64_t InstrSize
= 0;
900 uint64_t InstrAddress
= BF
.getAddress() + Offset
;
901 using std::placeholders::_1
;
903 // Skip instructions that satisfy the predicate condition.
904 auto skipInstructions
= [&](std::function
<bool(const MCInst
&)> Predicate
) {
905 const uint64_t StartOffset
= Offset
;
906 for (; Offset
< BF
.getMaxSize();
907 Offset
+= InstrSize
, InstrAddress
+= InstrSize
) {
908 if (!DisAsm
->getInstruction(Instr
, InstrSize
, FunctionData
->slice(Offset
),
909 InstrAddress
, nulls()))
911 if (!Predicate(Instr
))
915 return Offset
- StartOffset
;
918 // Skip a sequence of zero bytes.
919 auto skipZeros
= [&]() {
920 const uint64_t StartOffset
= Offset
;
921 for (; Offset
< BF
.getMaxSize(); ++Offset
)
922 if ((*FunctionData
)[Offset
] != 0)
925 return Offset
- StartOffset
;
928 // Accept the whole padding area filled with breakpoints.
929 auto isBreakpoint
= std::bind(&MCPlusBuilder::isBreakpoint
, MIB
.get(), _1
);
930 if (skipInstructions(isBreakpoint
) && Offset
== BF
.getMaxSize())
933 auto isNoop
= std::bind(&MCPlusBuilder::isNoop
, MIB
.get(), _1
);
935 // Some functions have a jump to the next function or to the padding area
936 // inserted after the body.
937 auto isSkipJump
= [&](const MCInst
&Instr
) {
938 uint64_t TargetAddress
= 0;
939 if (MIB
->isUnconditionalBranch(Instr
) &&
940 MIB
->evaluateBranch(Instr
, InstrAddress
, InstrSize
, TargetAddress
)) {
941 if (TargetAddress
>= InstrAddress
+ InstrSize
&&
942 TargetAddress
<= BF
.getAddress() + BF
.getMaxSize()) {
949 // Skip over nops, jumps, and zero padding. Allow interleaving (this happens).
950 while (skipInstructions(isNoop
) || skipInstructions(isSkipJump
) ||
954 if (Offset
== BF
.getMaxSize())
957 if (opts::Verbosity
>= 1) {
958 errs() << "BOLT-WARNING: bad padding at address 0x"
959 << Twine::utohexstr(BF
.getAddress() + BF
.getSize())
960 << " starting at offset " << (Offset
- BF
.getSize())
961 << " in function " << BF
<< '\n'
962 << FunctionData
->slice(BF
.getSize(), BF
.getMaxSize() - BF
.getSize())
969 void BinaryContext::adjustCodePadding() {
970 for (auto &BFI
: BinaryFunctions
) {
971 BinaryFunction
&BF
= BFI
.second
;
975 if (!hasValidCodePadding(BF
)) {
976 if (HasRelocations
) {
977 if (opts::Verbosity
>= 1) {
978 outs() << "BOLT-INFO: function " << BF
979 << " has invalid padding. Ignoring the function.\n";
983 BF
.setMaxSize(BF
.getSize());
989 MCSymbol
*BinaryContext::registerNameAtAddress(StringRef Name
, uint64_t Address
,
993 // Register the name with MCContext.
994 MCSymbol
*Symbol
= Ctx
->getOrCreateSymbol(Name
);
996 auto GAI
= BinaryDataMap
.find(Address
);
998 if (GAI
== BinaryDataMap
.end()) {
999 ErrorOr
<BinarySection
&> SectionOrErr
= getSectionForAddress(Address
);
1000 BinarySection
&Section
=
1001 SectionOrErr
? SectionOrErr
.get() : absoluteSection();
1002 BD
= new BinaryData(*Symbol
, Address
, Size
, Alignment
? Alignment
: 1,
1004 GAI
= BinaryDataMap
.emplace(Address
, BD
).first
;
1005 GlobalSymbols
[Name
] = BD
;
1006 updateObjectNesting(GAI
);
1009 if (!BD
->hasName(Name
)) {
1010 GlobalSymbols
[Name
] = BD
;
1011 BD
->Symbols
.push_back(Symbol
);
1019 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address
) const {
1020 auto NI
= BinaryDataMap
.lower_bound(Address
);
1021 auto End
= BinaryDataMap
.end();
1022 if ((NI
!= End
&& Address
== NI
->first
) ||
1023 ((NI
!= BinaryDataMap
.begin()) && (NI
-- != BinaryDataMap
.begin()))) {
1024 if (NI
->second
->containsAddress(Address
))
1027 // If this is a sub-symbol, see if a parent data contains the address.
1028 const BinaryData
*BD
= NI
->second
->getParent();
1030 if (BD
->containsAddress(Address
))
1032 BD
= BD
->getParent();
1038 BinaryData
*BinaryContext::getGOTSymbol() {
1039 // First tries to find a global symbol with that name
1040 BinaryData
*GOTSymBD
= getBinaryDataByName("_GLOBAL_OFFSET_TABLE_");
1044 // This symbol might be hidden from run-time link, so fetch the local
1045 // definition if available.
1046 GOTSymBD
= getBinaryDataByName("_GLOBAL_OFFSET_TABLE_/1");
1050 // If the local symbol is not unique, fail
1052 SmallString
<30> Storage
;
1053 while (const BinaryData
*BD
=
1054 getBinaryDataByName(Twine("_GLOBAL_OFFSET_TABLE_/")
1055 .concat(Twine(Index
++))
1056 .toStringRef(Storage
)))
1057 if (BD
->getAddress() != GOTSymBD
->getAddress())
1063 bool BinaryContext::setBinaryDataSize(uint64_t Address
, uint64_t Size
) {
1064 auto NI
= BinaryDataMap
.find(Address
);
1065 assert(NI
!= BinaryDataMap
.end());
1066 if (NI
== BinaryDataMap
.end())
1068 // TODO: it's possible that a jump table starts at the same address
1069 // as a larger blob of private data. When we set the size of the
1070 // jump table, it might be smaller than the total blob size. In this
1071 // case we just leave the original size since (currently) it won't really
1073 assert((!NI
->second
->Size
|| NI
->second
->Size
== Size
||
1074 (NI
->second
->isJumpTable() && NI
->second
->Size
> Size
)) &&
1075 "can't change the size of a symbol that has already had its "
1077 if (!NI
->second
->Size
) {
1078 NI
->second
->Size
= Size
;
1079 updateObjectNesting(NI
);
1085 void BinaryContext::generateSymbolHashes() {
1086 auto isPadding
= [](const BinaryData
&BD
) {
1087 StringRef Contents
= BD
.getSection().getContents();
1088 StringRef SymData
= Contents
.substr(BD
.getOffset(), BD
.getSize());
1089 return (BD
.getName().starts_with("HOLEat") ||
1090 SymData
.find_first_not_of(0) == StringRef::npos
);
1093 uint64_t NumCollisions
= 0;
1094 for (auto &Entry
: BinaryDataMap
) {
1095 BinaryData
&BD
= *Entry
.second
;
1096 StringRef Name
= BD
.getName();
1098 if (!isInternalSymbolName(Name
))
1101 // First check if a non-anonymous alias exists and move it to the front.
1102 if (BD
.getSymbols().size() > 1) {
1103 auto Itr
= llvm::find_if(BD
.getSymbols(), [&](const MCSymbol
*Symbol
) {
1104 return !isInternalSymbolName(Symbol
->getName());
1106 if (Itr
!= BD
.getSymbols().end()) {
1107 size_t Idx
= std::distance(BD
.getSymbols().begin(), Itr
);
1108 std::swap(BD
.getSymbols()[0], BD
.getSymbols()[Idx
]);
1113 // We have to skip 0 size symbols since they will all collide.
1114 if (BD
.getSize() == 0) {
1118 const uint64_t Hash
= BD
.getSection().hash(BD
);
1119 const size_t Idx
= Name
.find("0x");
1120 std::string NewName
=
1121 (Twine(Name
.substr(0, Idx
)) + "_" + Twine::utohexstr(Hash
)).str();
1122 if (getBinaryDataByName(NewName
)) {
1123 // Ignore collisions for symbols that appear to be padding
1124 // (i.e. all zeros or a "hole")
1125 if (!isPadding(BD
)) {
1126 if (opts::Verbosity
) {
1127 errs() << "BOLT-WARNING: collision detected when hashing " << BD
1128 << " with new name (" << NewName
<< "), skipping.\n";
1134 BD
.Symbols
.insert(BD
.Symbols
.begin(), Ctx
->getOrCreateSymbol(NewName
));
1135 GlobalSymbols
[NewName
] = &BD
;
1137 if (NumCollisions
) {
1138 errs() << "BOLT-WARNING: " << NumCollisions
1139 << " collisions detected while hashing binary objects";
1140 if (!opts::Verbosity
)
1141 errs() << ". Use -v=1 to see the list.";
1146 bool BinaryContext::registerFragment(BinaryFunction
&TargetFunction
,
1147 BinaryFunction
&Function
) const {
1148 assert(TargetFunction
.isFragment() && "TargetFunction must be a fragment");
1149 if (TargetFunction
.isChildOf(Function
))
1151 TargetFunction
.addParentFragment(Function
);
1152 Function
.addFragment(TargetFunction
);
1153 if (!HasRelocations
) {
1154 TargetFunction
.setSimple(false);
1155 Function
.setSimple(false);
1157 if (opts::Verbosity
>= 1) {
1158 outs() << "BOLT-INFO: marking " << TargetFunction
<< " as a fragment of "
1159 << Function
<< '\n';
1164 void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction
&BF
,
1165 MCInst
&LoadLowBits
,
1168 const MCSymbol
*TargetSymbol
;
1169 uint64_t Addend
= 0;
1170 std::tie(TargetSymbol
, Addend
) = handleAddressRef(Target
, BF
,
1173 MIB
->replaceImmWithSymbolRef(LoadHiBits
, TargetSymbol
, Addend
, Ctx
.get(), Val
,
1174 ELF::R_AARCH64_ADR_PREL_PG_HI21
);
1175 MIB
->replaceImmWithSymbolRef(LoadLowBits
, TargetSymbol
, Addend
, Ctx
.get(),
1176 Val
, ELF::R_AARCH64_ADD_ABS_LO12_NC
);
1179 bool BinaryContext::handleAArch64Veneer(uint64_t Address
, bool MatchOnly
) {
1180 BinaryFunction
*TargetFunction
= getBinaryFunctionContainingAddress(Address
);
1184 ErrorOr
<BinarySection
&> Section
= getSectionForAddress(Address
);
1185 assert(Section
&& "cannot get section for referenced address");
1186 if (!Section
->isText())
1190 StringRef SectionContents
= Section
->getContents();
1191 uint64_t Offset
= Address
- Section
->getAddress();
1192 const uint64_t MaxSize
= SectionContents
.size() - Offset
;
1193 const uint8_t *Bytes
=
1194 reinterpret_cast<const uint8_t *>(SectionContents
.data());
1195 ArrayRef
<uint8_t> Data(Bytes
+ Offset
, MaxSize
);
1197 auto matchVeneer
= [&](BinaryFunction::InstrMapType
&Instructions
,
1198 MCInst
&Instruction
, uint64_t Offset
,
1199 uint64_t AbsoluteInstrAddr
,
1200 uint64_t TotalSize
) -> bool {
1201 MCInst
*TargetHiBits
, *TargetLowBits
;
1202 uint64_t TargetAddress
, Count
;
1203 Count
= MIB
->matchLinkerVeneer(Instructions
.begin(), Instructions
.end(),
1204 AbsoluteInstrAddr
, Instruction
, TargetHiBits
,
1205 TargetLowBits
, TargetAddress
);
1212 // NOTE The target symbol was created during disassemble's
1213 // handleExternalReference
1214 const MCSymbol
*VeneerSymbol
= getOrCreateGlobalSymbol(Address
, "FUNCat");
1215 BinaryFunction
*Veneer
= createBinaryFunction(VeneerSymbol
->getName().str(),
1216 *Section
, Address
, TotalSize
);
1217 addAdrpAddRelocAArch64(*Veneer
, *TargetLowBits
, *TargetHiBits
,
1219 MIB
->addAnnotation(Instruction
, "AArch64Veneer", true);
1220 Veneer
->addInstruction(Offset
, std::move(Instruction
));
1222 for (auto It
= Instructions
.rbegin(); Count
!= 0; ++It
, --Count
) {
1223 MIB
->addAnnotation(It
->second
, "AArch64Veneer", true);
1224 Veneer
->addInstruction(It
->first
, std::move(It
->second
));
1227 Veneer
->getOrCreateLocalLabel(Address
);
1228 Veneer
->setMaxSize(TotalSize
);
1229 Veneer
->updateState(BinaryFunction::State::Disassembled
);
1230 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" << Address
1235 uint64_t Size
= 0, TotalSize
= 0;
1236 BinaryFunction::InstrMapType VeneerInstructions
;
1237 for (Offset
= 0; Offset
< MaxSize
; Offset
+= Size
) {
1239 const uint64_t AbsoluteInstrAddr
= Address
+ Offset
;
1240 if (!SymbolicDisAsm
->getInstruction(Instruction
, Size
, Data
.slice(Offset
),
1241 AbsoluteInstrAddr
, nulls()))
1245 if (MIB
->isBranch(Instruction
)) {
1246 Ret
= matchVeneer(VeneerInstructions
, Instruction
, Offset
,
1247 AbsoluteInstrAddr
, TotalSize
);
1251 VeneerInstructions
.emplace(Offset
, std::move(Instruction
));
1257 void BinaryContext::processInterproceduralReferences() {
1258 for (const std::pair
<BinaryFunction
*, uint64_t> &It
:
1259 InterproceduralReferences
) {
1260 BinaryFunction
&Function
= *It
.first
;
1261 uint64_t Address
= It
.second
;
1262 if (!Address
|| Function
.isIgnored())
1265 BinaryFunction
*TargetFunction
=
1266 getBinaryFunctionContainingAddress(Address
);
1267 if (&Function
== TargetFunction
)
1270 if (TargetFunction
) {
1271 if (TargetFunction
->isFragment() &&
1272 !TargetFunction
->isChildOf(Function
)) {
1273 errs() << "BOLT-WARNING: interprocedural reference between unrelated "
1275 << Function
.getPrintName() << " and "
1276 << TargetFunction
->getPrintName() << '\n';
1278 if (uint64_t Offset
= Address
- TargetFunction
->getAddress())
1279 TargetFunction
->addEntryPointAtOffset(Offset
);
1284 // Check if address falls in function padding space - this could be
1285 // unmarked data in code. In this case adjust the padding space size.
1286 ErrorOr
<BinarySection
&> Section
= getSectionForAddress(Address
);
1287 assert(Section
&& "cannot get section for referenced address");
1289 if (!Section
->isText())
1292 // PLT requires special handling and could be ignored in this context.
1293 StringRef SectionName
= Section
->getName();
1294 if (SectionName
== ".plt" || SectionName
== ".plt.got")
1297 // Check if it is aarch64 veneer written at Address
1298 if (isAArch64() && handleAArch64Veneer(Address
))
1301 if (opts::processAllFunctions()) {
1302 errs() << "BOLT-ERROR: cannot process binaries with unmarked "
1303 << "object in code at address 0x" << Twine::utohexstr(Address
)
1304 << " belonging to section " << SectionName
<< " in current mode\n";
1308 TargetFunction
= getBinaryFunctionContainingAddress(Address
,
1309 /*CheckPastEnd=*/false,
1310 /*UseMaxSize=*/true);
1311 // We are not going to overwrite non-simple functions, but for simple
1312 // ones - adjust the padding size.
1313 if (TargetFunction
&& TargetFunction
->isSimple()) {
1314 errs() << "BOLT-WARNING: function " << *TargetFunction
1315 << " has an object detected in a padding region at address 0x"
1316 << Twine::utohexstr(Address
) << '\n';
1317 TargetFunction
->setMaxSize(TargetFunction
->getSize());
1321 InterproceduralReferences
.clear();
1324 void BinaryContext::postProcessSymbolTable() {
1325 fixBinaryDataHoles();
1327 for (auto &Entry
: BinaryDataMap
) {
1328 BinaryData
*BD
= Entry
.second
;
1329 if ((BD
->getName().starts_with("SYMBOLat") ||
1330 BD
->getName().starts_with("DATAat")) &&
1331 !BD
->getParent() && !BD
->getSize() && !BD
->isAbsolute() &&
1333 errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD
<< "\n";
1339 generateSymbolHashes();
1342 void BinaryContext::foldFunction(BinaryFunction
&ChildBF
,
1343 BinaryFunction
&ParentBF
) {
1344 assert(!ChildBF
.isMultiEntry() && !ParentBF
.isMultiEntry() &&
1345 "cannot merge functions with multiple entry points");
1347 std::unique_lock
<llvm::sys::RWMutex
> WriteCtxLock(CtxMutex
, std::defer_lock
);
1348 std::unique_lock
<llvm::sys::RWMutex
> WriteSymbolMapLock(
1349 SymbolToFunctionMapMutex
, std::defer_lock
);
1351 const StringRef ChildName
= ChildBF
.getOneName();
1353 // Move symbols over and update bookkeeping info.
1354 for (MCSymbol
*Symbol
: ChildBF
.getSymbols()) {
1355 ParentBF
.getSymbols().push_back(Symbol
);
1356 WriteSymbolMapLock
.lock();
1357 SymbolToFunctionMap
[Symbol
] = &ParentBF
;
1358 WriteSymbolMapLock
.unlock();
1359 // NB: there's no need to update BinaryDataMap and GlobalSymbols.
1361 ChildBF
.getSymbols().clear();
1363 // Move other names the child function is known under.
1364 llvm::move(ChildBF
.Aliases
, std::back_inserter(ParentBF
.Aliases
));
1365 ChildBF
.Aliases
.clear();
1367 if (HasRelocations
) {
1368 // Merge execution counts of ChildBF into those of ParentBF.
1369 // Without relocations, we cannot reliably merge profiles as both functions
1370 // continue to exist and either one can be executed.
1371 ChildBF
.mergeProfileDataInto(ParentBF
);
1373 std::shared_lock
<llvm::sys::RWMutex
> ReadBfsLock(BinaryFunctionsMutex
,
1375 std::unique_lock
<llvm::sys::RWMutex
> WriteBfsLock(BinaryFunctionsMutex
,
1377 // Remove ChildBF from the global set of functions in relocs mode.
1379 auto FI
= BinaryFunctions
.find(ChildBF
.getAddress());
1380 ReadBfsLock
.unlock();
1382 assert(FI
!= BinaryFunctions
.end() && "function not found");
1383 assert(&ChildBF
== &FI
->second
&& "function mismatch");
1385 WriteBfsLock
.lock();
1386 ChildBF
.clearDisasmState();
1387 FI
= BinaryFunctions
.erase(FI
);
1388 WriteBfsLock
.unlock();
1391 // In non-relocation mode we keep the function, but rename it.
1392 std::string NewName
= "__ICF_" + ChildName
.str();
1394 WriteCtxLock
.lock();
1395 ChildBF
.getSymbols().push_back(Ctx
->getOrCreateSymbol(NewName
));
1396 WriteCtxLock
.unlock();
1398 ChildBF
.setFolded(&ParentBF
);
1401 ParentBF
.setHasFunctionsFoldedInto();
1404 void BinaryContext::fixBinaryDataHoles() {
1405 assert(validateObjectNesting() && "object nesting inconsistency detected");
1407 for (BinarySection
&Section
: allocatableSections()) {
1408 std::vector
<std::pair
<uint64_t, uint64_t>> Holes
;
1410 auto isNotHole
= [&Section
](const binary_data_iterator
&Itr
) {
1411 BinaryData
*BD
= Itr
->second
;
1412 bool isHole
= (!BD
->getParent() && !BD
->getSize() && BD
->isObject() &&
1413 (BD
->getName().starts_with("SYMBOLat0x") ||
1414 BD
->getName().starts_with("DATAat0x") ||
1415 BD
->getName().starts_with("ANONYMOUS")));
1416 return !isHole
&& BD
->getSection() == Section
&& !BD
->getParent();
1419 auto BDStart
= BinaryDataMap
.begin();
1420 auto BDEnd
= BinaryDataMap
.end();
1421 auto Itr
= FilteredBinaryDataIterator(isNotHole
, BDStart
, BDEnd
);
1422 auto End
= FilteredBinaryDataIterator(isNotHole
, BDEnd
, BDEnd
);
1424 uint64_t EndAddress
= Section
.getAddress();
1426 while (Itr
!= End
) {
1427 if (Itr
->second
->getAddress() > EndAddress
) {
1428 uint64_t Gap
= Itr
->second
->getAddress() - EndAddress
;
1429 Holes
.emplace_back(EndAddress
, Gap
);
1431 EndAddress
= Itr
->second
->getEndAddress();
1435 if (EndAddress
< Section
.getEndAddress())
1436 Holes
.emplace_back(EndAddress
, Section
.getEndAddress() - EndAddress
);
1438 // If there is already a symbol at the start of the hole, grow that symbol
1439 // to cover the rest. Otherwise, create a new symbol to cover the hole.
1440 for (std::pair
<uint64_t, uint64_t> &Hole
: Holes
) {
1441 BinaryData
*BD
= getBinaryDataAtAddress(Hole
.first
);
1443 // BD->getSection() can be != Section if there are sections that
1444 // overlap. In this case it is probably safe to just skip the holes
1445 // since the overlapping section will not(?) have any symbols in it.
1446 if (BD
->getSection() == Section
)
1447 setBinaryDataSize(Hole
.first
, Hole
.second
);
1449 getOrCreateGlobalSymbol(Hole
.first
, "HOLEat", Hole
.second
, 1);
1454 assert(validateObjectNesting() && "object nesting inconsistency detected");
1455 assert(validateHoles() && "top level hole detected in object map");
1458 void BinaryContext::printGlobalSymbols(raw_ostream
&OS
) const {
1459 const BinarySection
*CurrentSection
= nullptr;
1460 bool FirstSection
= true;
1462 for (auto &Entry
: BinaryDataMap
) {
1463 const BinaryData
*BD
= Entry
.second
;
1464 const BinarySection
&Section
= BD
->getSection();
1465 if (FirstSection
|| Section
!= *CurrentSection
) {
1466 uint64_t Address
, Size
;
1467 StringRef Name
= Section
.getName();
1469 Address
= Section
.getAddress();
1470 Size
= Section
.getSize();
1472 Address
= BD
->getAddress();
1473 Size
= BD
->getSize();
1475 OS
<< "BOLT-INFO: Section " << Name
<< ", "
1476 << "0x" + Twine::utohexstr(Address
) << ":"
1477 << "0x" + Twine::utohexstr(Address
+ Size
) << "/" << Size
<< "\n";
1478 CurrentSection
= &Section
;
1479 FirstSection
= false;
1482 OS
<< "BOLT-INFO: ";
1483 const BinaryData
*P
= BD
->getParent();
1492 Expected
<unsigned> BinaryContext::getDwarfFile(
1493 StringRef Directory
, StringRef FileName
, unsigned FileNumber
,
1494 std::optional
<MD5::MD5Result
> Checksum
, std::optional
<StringRef
> Source
,
1495 unsigned CUID
, unsigned DWARFVersion
) {
1496 DwarfLineTable
&Table
= DwarfLineTablesCUMap
[CUID
];
1497 return Table
.tryGetFile(Directory
, FileName
, Checksum
, Source
, DWARFVersion
,
1501 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID
,
1502 const uint32_t SrcCUID
,
1503 unsigned FileIndex
) {
1504 DWARFCompileUnit
*SrcUnit
= DwCtx
->getCompileUnitForOffset(SrcCUID
);
1505 const DWARFDebugLine::LineTable
*LineTable
=
1506 DwCtx
->getLineTableForUnit(SrcUnit
);
1507 const std::vector
<DWARFDebugLine::FileNameEntry
> &FileNames
=
1508 LineTable
->Prologue
.FileNames
;
1509 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1511 assert(FileIndex
> 0 && FileIndex
<= FileNames
.size() &&
1512 "FileIndex out of range for the compilation unit.");
1514 if (FileNames
[FileIndex
- 1].DirIdx
!= 0) {
1515 if (std::optional
<const char *> DirName
= dwarf::toString(
1517 .IncludeDirectories
[FileNames
[FileIndex
- 1].DirIdx
- 1])) {
1521 StringRef FileName
= "";
1522 if (std::optional
<const char *> FName
=
1523 dwarf::toString(FileNames
[FileIndex
- 1].Name
))
1525 assert(FileName
!= "");
1526 DWARFCompileUnit
*DstUnit
= DwCtx
->getCompileUnitForOffset(DestCUID
);
1527 return cantFail(getDwarfFile(Dir
, FileName
, 0, std::nullopt
, std::nullopt
,
1528 DestCUID
, DstUnit
->getVersion()));
1531 std::vector
<BinaryFunction
*> BinaryContext::getSortedFunctions() {
1532 std::vector
<BinaryFunction
*> SortedFunctions(BinaryFunctions
.size());
1533 llvm::transform(llvm::make_second_range(BinaryFunctions
),
1534 SortedFunctions
.begin(),
1535 [](BinaryFunction
&BF
) { return &BF
; });
1537 llvm::stable_sort(SortedFunctions
,
1538 [](const BinaryFunction
*A
, const BinaryFunction
*B
) {
1539 if (A
->hasValidIndex() && B
->hasValidIndex()) {
1540 return A
->getIndex() < B
->getIndex();
1542 return A
->hasValidIndex();
1544 return SortedFunctions
;
1547 std::vector
<BinaryFunction
*> BinaryContext::getAllBinaryFunctions() {
1548 std::vector
<BinaryFunction
*> AllFunctions
;
1549 AllFunctions
.reserve(BinaryFunctions
.size() + InjectedBinaryFunctions
.size());
1550 llvm::transform(llvm::make_second_range(BinaryFunctions
),
1551 std::back_inserter(AllFunctions
),
1552 [](BinaryFunction
&BF
) { return &BF
; });
1553 llvm::copy(InjectedBinaryFunctions
, std::back_inserter(AllFunctions
));
1555 return AllFunctions
;
1558 std::optional
<DWARFUnit
*> BinaryContext::getDWOCU(uint64_t DWOId
) {
1559 auto Iter
= DWOCUs
.find(DWOId
);
1560 if (Iter
== DWOCUs
.end())
1561 return std::nullopt
;
1563 return Iter
->second
;
1566 DWARFContext
*BinaryContext::getDWOContext() const {
1569 return &DWOCUs
.begin()->second
->getContext();
1572 /// Handles DWO sections that can either be in .o, .dwo or .dwp files.
1573 void BinaryContext::preprocessDWODebugInfo() {
1574 for (const std::unique_ptr
<DWARFUnit
> &CU
: DwCtx
->compile_units()) {
1575 DWARFUnit
*const DwarfUnit
= CU
.get();
1576 if (std::optional
<uint64_t> DWOId
= DwarfUnit
->getDWOId()) {
1577 DWARFUnit
*DWOCU
= DwarfUnit
->getNonSkeletonUnitDIE(false).getDwarfUnit();
1578 if (!DWOCU
->isDWOUnit()) {
1579 std::string DWOName
= dwarf::toString(
1580 DwarfUnit
->getUnitDIE().find(
1581 {dwarf::DW_AT_dwo_name
, dwarf::DW_AT_GNU_dwo_name
}),
1583 outs() << "BOLT-WARNING: Debug Fission: DWO debug information for "
1585 << " was not retrieved and won't be updated. Please check "
1589 DWOCUs
[*DWOId
] = DWOCU
;
1592 if (!DWOCUs
.empty())
1593 outs() << "BOLT-INFO: processing split DWARF\n";
1596 void BinaryContext::preprocessDebugInfo() {
1602 bool operator<(const CURange
&Other
) const { return LowPC
< Other
.LowPC
; }
1605 // Building a map of address ranges to CUs similar to .debug_aranges and use
1606 // it to assign CU to functions.
1607 std::vector
<CURange
> AllRanges
;
1608 AllRanges
.reserve(DwCtx
->getNumCompileUnits());
1609 for (const std::unique_ptr
<DWARFUnit
> &CU
: DwCtx
->compile_units()) {
1610 Expected
<DWARFAddressRangesVector
> RangesOrError
=
1611 CU
->getUnitDIE().getAddressRanges();
1612 if (!RangesOrError
) {
1613 consumeError(RangesOrError
.takeError());
1616 for (DWARFAddressRange
&Range
: *RangesOrError
) {
1617 // Parts of the debug info could be invalidated due to corresponding code
1618 // being removed from the binary by the linker. Hence we check if the
1619 // address is a valid one.
1620 if (containsAddress(Range
.LowPC
))
1621 AllRanges
.emplace_back(CURange
{Range
.LowPC
, Range
.HighPC
, CU
.get()});
1624 ContainsDwarf5
|= CU
->getVersion() >= 5;
1625 ContainsDwarfLegacy
|= CU
->getVersion() < 5;
1628 llvm::sort(AllRanges
);
1629 for (auto &KV
: BinaryFunctions
) {
1630 const uint64_t FunctionAddress
= KV
.first
;
1631 BinaryFunction
&Function
= KV
.second
;
1633 auto It
= llvm::partition_point(
1634 AllRanges
, [=](CURange R
) { return R
.HighPC
<= FunctionAddress
; });
1635 if (It
!= AllRanges
.end() && It
->LowPC
<= FunctionAddress
)
1636 Function
.setDWARFUnit(It
->Unit
);
1639 // Discover units with debug info that needs to be updated.
1640 for (const auto &KV
: BinaryFunctions
) {
1641 const BinaryFunction
&BF
= KV
.second
;
1642 if (shouldEmit(BF
) && BF
.getDWARFUnit())
1643 ProcessedCUs
.insert(BF
.getDWARFUnit());
1646 // Clear debug info for functions from units that we are not going to process.
1647 for (auto &KV
: BinaryFunctions
) {
1648 BinaryFunction
&BF
= KV
.second
;
1649 if (BF
.getDWARFUnit() && !ProcessedCUs
.count(BF
.getDWARFUnit()))
1650 BF
.setDWARFUnit(nullptr);
1653 if (opts::Verbosity
>= 1) {
1654 outs() << "BOLT-INFO: " << ProcessedCUs
.size() << " out of "
1655 << DwCtx
->getNumCompileUnits() << " CUs will be updated\n";
1658 preprocessDWODebugInfo();
1660 // Populate MCContext with DWARF files from all units.
1661 StringRef GlobalPrefix
= AsmInfo
->getPrivateGlobalPrefix();
1662 for (const std::unique_ptr
<DWARFUnit
> &CU
: DwCtx
->compile_units()) {
1663 const uint64_t CUID
= CU
->getOffset();
1664 DwarfLineTable
&BinaryLineTable
= getDwarfLineTable(CUID
);
1665 BinaryLineTable
.setLabel(Ctx
->getOrCreateSymbol(
1666 GlobalPrefix
+ "line_table_start" + Twine(CUID
)));
1668 if (!ProcessedCUs
.count(CU
.get()))
1671 const DWARFDebugLine::LineTable
*LineTable
=
1672 DwCtx
->getLineTableForUnit(CU
.get());
1673 const std::vector
<DWARFDebugLine::FileNameEntry
> &FileNames
=
1674 LineTable
->Prologue
.FileNames
;
1676 uint16_t DwarfVersion
= LineTable
->Prologue
.getVersion();
1677 if (DwarfVersion
>= 5) {
1678 std::optional
<MD5::MD5Result
> Checksum
;
1679 if (LineTable
->Prologue
.ContentTypes
.HasMD5
)
1680 Checksum
= LineTable
->Prologue
.FileNames
[0].Checksum
;
1681 std::optional
<const char *> Name
=
1682 dwarf::toString(CU
->getUnitDIE().find(dwarf::DW_AT_name
), nullptr);
1683 if (std::optional
<uint64_t> DWOID
= CU
->getDWOId()) {
1684 auto Iter
= DWOCUs
.find(*DWOID
);
1685 assert(Iter
!= DWOCUs
.end() && "DWO CU was not found.");
1686 Name
= dwarf::toString(
1687 Iter
->second
->getUnitDIE().find(dwarf::DW_AT_name
), nullptr);
1689 BinaryLineTable
.setRootFile(CU
->getCompilationDir(), *Name
, Checksum
,
1693 BinaryLineTable
.setDwarfVersion(DwarfVersion
);
1695 // Assign a unique label to every line table, one per CU.
1696 // Make sure empty debug line tables are registered too.
1697 if (FileNames
.empty()) {
1698 cantFail(getDwarfFile("", "<unknown>", 0, std::nullopt
, std::nullopt
,
1699 CUID
, DwarfVersion
));
1702 const uint32_t Offset
= DwarfVersion
< 5 ? 1 : 0;
1703 for (size_t I
= 0, Size
= FileNames
.size(); I
!= Size
; ++I
) {
1704 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1707 if (FileNames
[I
].DirIdx
!= 0 || DwarfVersion
>= 5)
1708 if (std::optional
<const char *> DirName
= dwarf::toString(
1710 .IncludeDirectories
[FileNames
[I
].DirIdx
- Offset
]))
1712 StringRef FileName
= "";
1713 if (std::optional
<const char *> FName
=
1714 dwarf::toString(FileNames
[I
].Name
))
1716 assert(FileName
!= "");
1717 std::optional
<MD5::MD5Result
> Checksum
;
1718 if (DwarfVersion
>= 5 && LineTable
->Prologue
.ContentTypes
.HasMD5
)
1719 Checksum
= LineTable
->Prologue
.FileNames
[I
].Checksum
;
1720 cantFail(getDwarfFile(Dir
, FileName
, 0, Checksum
, std::nullopt
, CUID
,
1726 bool BinaryContext::shouldEmit(const BinaryFunction
&Function
) const {
1727 if (Function
.isPseudo())
1730 if (opts::processAllFunctions())
1733 if (Function
.isIgnored())
1736 // In relocation mode we will emit non-simple functions with CFG.
1737 // If the function does not have a CFG it should be marked as ignored.
1738 return HasRelocations
|| Function
.isSimple();
1741 void BinaryContext::dump(const MCInst
&Inst
) const {
1742 if (LLVM_UNLIKELY(!InstPrinter
)) {
1743 dbgs() << "Cannot dump for InstPrinter is not initialized.\n";
1746 InstPrinter
->printInst(&Inst
, 0, "", *STI
, dbgs());
1750 void BinaryContext::printCFI(raw_ostream
&OS
, const MCCFIInstruction
&Inst
) {
1751 uint32_t Operation
= Inst
.getOperation();
1752 switch (Operation
) {
1753 case MCCFIInstruction::OpSameValue
:
1754 OS
<< "OpSameValue Reg" << Inst
.getRegister();
1756 case MCCFIInstruction::OpRememberState
:
1757 OS
<< "OpRememberState";
1759 case MCCFIInstruction::OpRestoreState
:
1760 OS
<< "OpRestoreState";
1762 case MCCFIInstruction::OpOffset
:
1763 OS
<< "OpOffset Reg" << Inst
.getRegister() << " " << Inst
.getOffset();
1765 case MCCFIInstruction::OpDefCfaRegister
:
1766 OS
<< "OpDefCfaRegister Reg" << Inst
.getRegister();
1768 case MCCFIInstruction::OpDefCfaOffset
:
1769 OS
<< "OpDefCfaOffset " << Inst
.getOffset();
1771 case MCCFIInstruction::OpDefCfa
:
1772 OS
<< "OpDefCfa Reg" << Inst
.getRegister() << " " << Inst
.getOffset();
1774 case MCCFIInstruction::OpRelOffset
:
1775 OS
<< "OpRelOffset Reg" << Inst
.getRegister() << " " << Inst
.getOffset();
1777 case MCCFIInstruction::OpAdjustCfaOffset
:
1778 OS
<< "OfAdjustCfaOffset " << Inst
.getOffset();
1780 case MCCFIInstruction::OpEscape
:
1783 case MCCFIInstruction::OpRestore
:
1784 OS
<< "OpRestore Reg" << Inst
.getRegister();
1786 case MCCFIInstruction::OpUndefined
:
1787 OS
<< "OpUndefined Reg" << Inst
.getRegister();
1789 case MCCFIInstruction::OpRegister
:
1790 OS
<< "OpRegister Reg" << Inst
.getRegister() << " Reg"
1791 << Inst
.getRegister2();
1793 case MCCFIInstruction::OpWindowSave
:
1794 OS
<< "OpWindowSave";
1796 case MCCFIInstruction::OpGnuArgsSize
:
1797 OS
<< "OpGnuArgsSize";
1800 OS
<< "Op#" << Operation
;
1805 MarkerSymType
BinaryContext::getMarkerType(const SymbolRef
&Symbol
) const {
1806 // For aarch64 and riscv, the ABI defines mapping symbols so we identify data
1807 // in the code section (see IHI0056B). $x identifies a symbol starting code or
1808 // the end of a data chunk inside code, $d identifies start of data.
1809 if ((!isAArch64() && !isRISCV()) || ELFSymbolRef(Symbol
).getSize())
1810 return MarkerSymType::NONE
;
1812 Expected
<StringRef
> NameOrError
= Symbol
.getName();
1813 Expected
<object::SymbolRef::Type
> TypeOrError
= Symbol
.getType();
1815 if (!TypeOrError
|| !NameOrError
)
1816 return MarkerSymType::NONE
;
1818 if (*TypeOrError
!= SymbolRef::ST_Unknown
)
1819 return MarkerSymType::NONE
;
1821 if (*NameOrError
== "$x" || NameOrError
->starts_with("$x."))
1822 return MarkerSymType::CODE
;
1825 if (isRISCV() && NameOrError
->starts_with("$x"))
1826 return MarkerSymType::CODE
;
1828 if (*NameOrError
== "$d" || NameOrError
->starts_with("$d."))
1829 return MarkerSymType::DATA
;
1831 return MarkerSymType::NONE
;
1834 bool BinaryContext::isMarker(const SymbolRef
&Symbol
) const {
1835 return getMarkerType(Symbol
) != MarkerSymType::NONE
;
1838 static void printDebugInfo(raw_ostream
&OS
, const MCInst
&Instruction
,
1839 const BinaryFunction
*Function
,
1840 DWARFContext
*DwCtx
) {
1841 DebugLineTableRowRef RowRef
=
1842 DebugLineTableRowRef::fromSMLoc(Instruction
.getLoc());
1843 if (RowRef
== DebugLineTableRowRef::NULL_ROW
)
1846 const DWARFDebugLine::LineTable
*LineTable
;
1847 if (Function
&& Function
->getDWARFUnit() &&
1848 Function
->getDWARFUnit()->getOffset() == RowRef
.DwCompileUnitIndex
) {
1849 LineTable
= Function
->getDWARFLineTable();
1851 LineTable
= DwCtx
->getLineTableForUnit(
1852 DwCtx
->getCompileUnitForOffset(RowRef
.DwCompileUnitIndex
));
1854 assert(LineTable
&& "line table expected for instruction with debug info");
1856 const DWARFDebugLine::Row
&Row
= LineTable
->Rows
[RowRef
.RowIndex
- 1];
1857 StringRef FileName
= "";
1858 if (std::optional
<const char *> FName
=
1859 dwarf::toString(LineTable
->Prologue
.FileNames
[Row
.File
- 1].Name
))
1861 OS
<< " # debug line " << FileName
<< ":" << Row
.Line
;
1863 OS
<< ":" << Row
.Column
;
1864 if (Row
.Discriminator
)
1865 OS
<< " discriminator:" << Row
.Discriminator
;
1868 void BinaryContext::printInstruction(raw_ostream
&OS
, const MCInst
&Instruction
,
1870 const BinaryFunction
*Function
,
1871 bool PrintMCInst
, bool PrintMemData
,
1872 bool PrintRelocations
,
1873 StringRef Endl
) const {
1874 OS
<< format(" %08" PRIx64
": ", Offset
);
1875 if (MIB
->isCFI(Instruction
)) {
1876 uint32_t Offset
= Instruction
.getOperand(0).getImm();
1877 OS
<< "\t!CFI\t$" << Offset
<< "\t; ";
1879 printCFI(OS
, *Function
->getCFIFor(Instruction
));
1883 InstPrinter
->printInst(&Instruction
, 0, "", *STI
, OS
);
1884 if (MIB
->isCall(Instruction
)) {
1885 if (MIB
->isTailCall(Instruction
))
1886 OS
<< " # TAILCALL ";
1887 if (MIB
->isInvoke(Instruction
)) {
1888 const std::optional
<MCPlus::MCLandingPad
> EHInfo
=
1889 MIB
->getEHInfo(Instruction
);
1890 OS
<< " # handler: ";
1892 OS
<< *EHInfo
->first
;
1895 OS
<< "; action: " << EHInfo
->second
;
1896 const int64_t GnuArgsSize
= MIB
->getGnuArgsSize(Instruction
);
1897 if (GnuArgsSize
>= 0)
1898 OS
<< "; GNU_args_size = " << GnuArgsSize
;
1900 } else if (MIB
->isIndirectBranch(Instruction
)) {
1901 if (uint64_t JTAddress
= MIB
->getJumpTable(Instruction
)) {
1902 OS
<< " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress
);
1904 OS
<< " # UNKNOWN CONTROL FLOW";
1907 if (std::optional
<uint32_t> Offset
= MIB
->getOffset(Instruction
))
1908 OS
<< " # Offset: " << *Offset
;
1909 if (std::optional
<uint32_t> Size
= MIB
->getSize(Instruction
))
1910 OS
<< " # Size: " << *Size
;
1911 if (MCSymbol
*Label
= MIB
->getLabel(Instruction
))
1912 OS
<< " # Label: " << *Label
;
1914 MIB
->printAnnotations(Instruction
, OS
);
1916 if (opts::PrintDebugInfo
)
1917 printDebugInfo(OS
, Instruction
, Function
, DwCtx
.get());
1919 if ((opts::PrintRelocations
|| PrintRelocations
) && Function
) {
1920 const uint64_t Size
= computeCodeSize(&Instruction
, &Instruction
+ 1);
1921 Function
->printRelocations(OS
, Offset
, Size
);
1927 Instruction
.dump_pretty(OS
, InstPrinter
.get());
1932 std::optional
<uint64_t>
1933 BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress
,
1934 uint64_t FileOffset
) const {
1935 // Find a segment with a matching file offset.
1936 for (auto &KV
: SegmentMapInfo
) {
1937 const SegmentInfo
&SegInfo
= KV
.second
;
1938 // FileOffset is got from perf event,
1939 // and it is equal to alignDown(SegInfo.FileOffset, pagesize).
1940 // If the pagesize is not equal to SegInfo.Alignment.
1941 // FileOffset and SegInfo.FileOffset should be aligned first,
1942 // and then judge whether they are equal.
1943 if (alignDown(SegInfo
.FileOffset
, SegInfo
.Alignment
) ==
1944 alignDown(FileOffset
, SegInfo
.Alignment
)) {
1945 // The function's offset from base address in VAS is aligned by pagesize
1946 // instead of SegInfo.Alignment. Pagesize can't be got from perf events.
1947 // However, The ELF document says that SegInfo.FileOffset should equal
1948 // to SegInfo.Address, modulo the pagesize.
1949 // Reference: https://refspecs.linuxfoundation.org/elf/elf.pdf
1951 // So alignDown(SegInfo.Address, pagesize) can be calculated by:
1952 // alignDown(SegInfo.Address, pagesize)
1953 // = SegInfo.Address - (SegInfo.Address % pagesize)
1954 // = SegInfo.Address - (SegInfo.FileOffset % pagesize)
1955 // = SegInfo.Address - SegInfo.FileOffset +
1956 // alignDown(SegInfo.FileOffset, pagesize)
1957 // = SegInfo.Address - SegInfo.FileOffset + FileOffset
1958 return MMapAddress
- (SegInfo
.Address
- SegInfo
.FileOffset
+ FileOffset
);
1962 return std::nullopt
;
1965 ErrorOr
<BinarySection
&> BinaryContext::getSectionForAddress(uint64_t Address
) {
1966 auto SI
= AddressToSection
.upper_bound(Address
);
1967 if (SI
!= AddressToSection
.begin()) {
1969 uint64_t UpperBound
= SI
->first
+ SI
->second
->getSize();
1970 if (!SI
->second
->getSize())
1972 if (UpperBound
> Address
)
1975 return std::make_error_code(std::errc::bad_address
);
1979 BinaryContext::getSectionNameForAddress(uint64_t Address
) const {
1980 if (ErrorOr
<const BinarySection
&> Section
= getSectionForAddress(Address
))
1981 return Section
->getName();
1982 return std::make_error_code(std::errc::bad_address
);
1985 BinarySection
&BinaryContext::registerSection(BinarySection
*Section
) {
1986 auto Res
= Sections
.insert(Section
);
1988 assert(Res
.second
&& "can't register the same section twice.");
1990 // Only register allocatable sections in the AddressToSection map.
1991 if (Section
->isAllocatable() && Section
->getAddress())
1992 AddressToSection
.insert(std::make_pair(Section
->getAddress(), Section
));
1993 NameToSection
.insert(
1994 std::make_pair(std::string(Section
->getName()), Section
));
1995 if (Section
->hasSectionRef())
1996 SectionRefToBinarySection
.insert(
1997 std::make_pair(Section
->getSectionRef(), Section
));
1999 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section
<< "\n");
2003 BinarySection
&BinaryContext::registerSection(SectionRef Section
) {
2004 return registerSection(new BinarySection(*this, Section
));
2008 BinaryContext::registerSection(const Twine
&SectionName
,
2009 const BinarySection
&OriginalSection
) {
2010 return registerSection(
2011 new BinarySection(*this, SectionName
, OriginalSection
));
2015 BinaryContext::registerOrUpdateSection(const Twine
&Name
, unsigned ELFType
,
2016 unsigned ELFFlags
, uint8_t *Data
,
2017 uint64_t Size
, unsigned Alignment
) {
2018 auto NamedSections
= getSectionByName(Name
);
2019 if (NamedSections
.begin() != NamedSections
.end()) {
2020 assert(std::next(NamedSections
.begin()) == NamedSections
.end() &&
2021 "can only update unique sections");
2022 BinarySection
*Section
= NamedSections
.begin()->second
;
2024 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section
<< " -> ");
2025 const bool Flag
= Section
->isAllocatable();
2027 Section
->update(Data
, Size
, Alignment
, ELFType
, ELFFlags
);
2028 LLVM_DEBUG(dbgs() << *Section
<< "\n");
2029 // FIXME: Fix section flags/attributes for MachO.
2031 assert(Flag
== Section
->isAllocatable() &&
2032 "can't change section allocation status");
2036 return registerSection(
2037 new BinarySection(*this, Name
, Data
, Size
, Alignment
, ELFType
, ELFFlags
));
2040 void BinaryContext::deregisterSectionName(const BinarySection
&Section
) {
2041 auto NameRange
= NameToSection
.equal_range(Section
.getName().str());
2042 while (NameRange
.first
!= NameRange
.second
) {
2043 if (NameRange
.first
->second
== &Section
) {
2044 NameToSection
.erase(NameRange
.first
);
2051 void BinaryContext::deregisterUnusedSections() {
2052 ErrorOr
<BinarySection
&> AbsSection
= getUniqueSectionByName("<absolute>");
2053 for (auto SI
= Sections
.begin(); SI
!= Sections
.end();) {
2054 BinarySection
*Section
= *SI
;
2055 // We check getOutputData() instead of getOutputSize() because sometimes
2056 // zero-sized .text.cold sections are allocated.
2057 if (Section
->hasSectionRef() || Section
->getOutputData() ||
2058 (AbsSection
&& Section
== &AbsSection
.get())) {
2063 LLVM_DEBUG(dbgs() << "LLVM-DEBUG: deregistering " << Section
->getName()
2065 deregisterSectionName(*Section
);
2066 SI
= Sections
.erase(SI
);
2071 bool BinaryContext::deregisterSection(BinarySection
&Section
) {
2072 BinarySection
*SectionPtr
= &Section
;
2073 auto Itr
= Sections
.find(SectionPtr
);
2074 if (Itr
!= Sections
.end()) {
2075 auto Range
= AddressToSection
.equal_range(SectionPtr
->getAddress());
2076 while (Range
.first
!= Range
.second
) {
2077 if (Range
.first
->second
== SectionPtr
) {
2078 AddressToSection
.erase(Range
.first
);
2084 deregisterSectionName(*SectionPtr
);
2085 Sections
.erase(Itr
);
2092 void BinaryContext::renameSection(BinarySection
&Section
,
2093 const Twine
&NewName
) {
2094 auto Itr
= Sections
.find(&Section
);
2095 assert(Itr
!= Sections
.end() && "Section must exist to be renamed.");
2096 Sections
.erase(Itr
);
2098 deregisterSectionName(Section
);
2100 Section
.Name
= NewName
.str();
2101 Section
.setOutputName(Section
.Name
);
2103 NameToSection
.insert(std::make_pair(Section
.Name
, &Section
));
2105 // Reinsert with the new name.
2106 Sections
.insert(&Section
);
2109 void BinaryContext::printSections(raw_ostream
&OS
) const {
2110 for (BinarySection
*const &Section
: Sections
)
2111 OS
<< "BOLT-INFO: " << *Section
<< "\n";
2114 BinarySection
&BinaryContext::absoluteSection() {
2115 if (ErrorOr
<BinarySection
&> Section
= getUniqueSectionByName("<absolute>"))
2117 return registerOrUpdateSection("<absolute>", ELF::SHT_NULL
, 0u);
2120 ErrorOr
<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address
,
2121 size_t Size
) const {
2122 const ErrorOr
<const BinarySection
&> Section
= getSectionForAddress(Address
);
2124 return std::make_error_code(std::errc::bad_address
);
2126 if (Section
->isVirtual())
2129 DataExtractor
DE(Section
->getContents(), AsmInfo
->isLittleEndian(),
2130 AsmInfo
->getCodePointerSize());
2131 auto ValueOffset
= static_cast<uint64_t>(Address
- Section
->getAddress());
2132 return DE
.getUnsigned(&ValueOffset
, Size
);
2135 ErrorOr
<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address
,
2136 size_t Size
) const {
2137 const ErrorOr
<const BinarySection
&> Section
= getSectionForAddress(Address
);
2139 return std::make_error_code(std::errc::bad_address
);
2141 if (Section
->isVirtual())
2144 DataExtractor
DE(Section
->getContents(), AsmInfo
->isLittleEndian(),
2145 AsmInfo
->getCodePointerSize());
2146 auto ValueOffset
= static_cast<uint64_t>(Address
- Section
->getAddress());
2147 return DE
.getSigned(&ValueOffset
, Size
);
2150 void BinaryContext::addRelocation(uint64_t Address
, MCSymbol
*Symbol
,
2151 uint64_t Type
, uint64_t Addend
,
2153 ErrorOr
<BinarySection
&> Section
= getSectionForAddress(Address
);
2154 assert(Section
&& "cannot find section for address");
2155 Section
->addRelocation(Address
- Section
->getAddress(), Symbol
, Type
, Addend
,
2159 void BinaryContext::addDynamicRelocation(uint64_t Address
, MCSymbol
*Symbol
,
2160 uint64_t Type
, uint64_t Addend
,
2162 ErrorOr
<BinarySection
&> Section
= getSectionForAddress(Address
);
2163 assert(Section
&& "cannot find section for address");
2164 Section
->addDynamicRelocation(Address
- Section
->getAddress(), Symbol
, Type
,
2168 bool BinaryContext::removeRelocationAt(uint64_t Address
) {
2169 ErrorOr
<BinarySection
&> Section
= getSectionForAddress(Address
);
2170 assert(Section
&& "cannot find section for address");
2171 return Section
->removeRelocationAt(Address
- Section
->getAddress());
2174 const Relocation
*BinaryContext::getRelocationAt(uint64_t Address
) const {
2175 ErrorOr
<const BinarySection
&> Section
= getSectionForAddress(Address
);
2179 return Section
->getRelocationAt(Address
- Section
->getAddress());
2183 BinaryContext::getDynamicRelocationAt(uint64_t Address
) const {
2184 ErrorOr
<const BinarySection
&> Section
= getSectionForAddress(Address
);
2188 return Section
->getDynamicRelocationAt(Address
- Section
->getAddress());
2191 void BinaryContext::markAmbiguousRelocations(BinaryData
&BD
,
2192 const uint64_t Address
) {
2193 auto setImmovable
= [&](BinaryData
&BD
) {
2194 BinaryData
*Root
= BD
.getAtomicRoot();
2195 LLVM_DEBUG(if (Root
->isMoveable()) {
2196 dbgs() << "BOLT-DEBUG: setting " << *Root
<< " as immovable "
2197 << "due to ambiguous relocation referencing 0x"
2198 << Twine::utohexstr(Address
) << '\n';
2200 Root
->setIsMoveable(false);
2203 if (Address
== BD
.getAddress()) {
2206 // Set previous symbol as immovable
2207 BinaryData
*Prev
= getBinaryDataContainingAddress(Address
- 1);
2208 if (Prev
&& Prev
->getEndAddress() == BD
.getAddress())
2209 setImmovable(*Prev
);
2212 if (Address
== BD
.getEndAddress()) {
2215 // Set next symbol as immovable
2216 BinaryData
*Next
= getBinaryDataContainingAddress(BD
.getEndAddress());
2217 if (Next
&& Next
->getAddress() == BD
.getEndAddress())
2218 setImmovable(*Next
);
2222 BinaryFunction
*BinaryContext::getFunctionForSymbol(const MCSymbol
*Symbol
,
2223 uint64_t *EntryDesc
) {
2224 std::shared_lock
<llvm::sys::RWMutex
> Lock(SymbolToFunctionMapMutex
);
2225 auto BFI
= SymbolToFunctionMap
.find(Symbol
);
2226 if (BFI
== SymbolToFunctionMap
.end())
2229 BinaryFunction
*BF
= BFI
->second
;
2231 *EntryDesc
= BF
->getEntryIDForSymbol(Symbol
);
2236 void BinaryContext::exitWithBugReport(StringRef Message
,
2237 const BinaryFunction
&Function
) const {
2238 errs() << "=======================================\n";
2239 errs() << "BOLT is unable to proceed because it couldn't properly understand "
2241 errs() << "If you are running the most recent version of BOLT, you may "
2243 "report this and paste this dump.\nPlease check that there is no "
2244 "sensitive contents being shared in this dump.\n";
2245 errs() << "\nOffending function: " << Function
.getPrintName() << "\n\n";
2246 ScopedPrinter
SP(errs());
2247 SP
.printBinaryBlock("Function contents", *Function
.getData());
2250 errs() << "ERROR: " << Message
;
2251 errs() << "\n=======================================\n";
2256 BinaryContext::createInjectedBinaryFunction(const std::string
&Name
,
2258 InjectedBinaryFunctions
.push_back(new BinaryFunction(Name
, *this, IsSimple
));
2259 BinaryFunction
*BF
= InjectedBinaryFunctions
.back();
2260 setSymbolToFunctionMap(BF
->getSymbol(), BF
);
2261 BF
->CurrentState
= BinaryFunction::State::CFG
;
2265 std::pair
<size_t, size_t>
2266 BinaryContext::calculateEmittedSize(BinaryFunction
&BF
, bool FixBranches
) {
2267 // Adjust branch instruction to match the current layout.
2271 // Create local MC context to isolate the effect of ephemeral code emission.
2272 IndependentCodeEmitter MCEInstance
= createIndependentMCCodeEmitter();
2273 MCContext
*LocalCtx
= MCEInstance
.LocalCtx
.get();
2275 TheTarget
->createMCAsmBackend(*STI
, *MRI
, MCTargetOptions());
2277 SmallString
<256> Code
;
2278 raw_svector_ostream
VecOS(Code
);
2280 std::unique_ptr
<MCObjectWriter
> OW
= MAB
->createObjectWriter(VecOS
);
2281 std::unique_ptr
<MCStreamer
> Streamer(TheTarget
->createMCObjectStreamer(
2282 *TheTriple
, *LocalCtx
, std::unique_ptr
<MCAsmBackend
>(MAB
), std::move(OW
),
2283 std::unique_ptr
<MCCodeEmitter
>(MCEInstance
.MCE
.release()), *STI
,
2285 /*IncrementalLinkerCompatible=*/false,
2286 /*DWARFMustBeAtTheEnd=*/false));
2288 Streamer
->initSections(false, *STI
);
2290 MCSection
*Section
= MCEInstance
.LocalMOFI
->getTextSection();
2291 Section
->setHasInstructions(true);
2293 // Create symbols in the LocalCtx so that they get destroyed with it.
2294 MCSymbol
*StartLabel
= LocalCtx
->createTempSymbol();
2295 MCSymbol
*EndLabel
= LocalCtx
->createTempSymbol();
2297 Streamer
->switchSection(Section
);
2298 Streamer
->emitLabel(StartLabel
);
2299 emitFunctionBody(*Streamer
, BF
, BF
.getLayout().getMainFragment(),
2300 /*EmitCodeOnly=*/true);
2301 Streamer
->emitLabel(EndLabel
);
2303 using LabelRange
= std::pair
<const MCSymbol
*, const MCSymbol
*>;
2304 SmallVector
<LabelRange
> SplitLabels
;
2305 for (FunctionFragment
&FF
: BF
.getLayout().getSplitFragments()) {
2306 MCSymbol
*const SplitStartLabel
= LocalCtx
->createTempSymbol();
2307 MCSymbol
*const SplitEndLabel
= LocalCtx
->createTempSymbol();
2308 SplitLabels
.emplace_back(SplitStartLabel
, SplitEndLabel
);
2310 MCSectionELF
*const SplitSection
= LocalCtx
->getELFSection(
2311 BF
.getCodeSectionName(FF
.getFragmentNum()), ELF::SHT_PROGBITS
,
2312 ELF::SHF_EXECINSTR
| ELF::SHF_ALLOC
);
2313 SplitSection
->setHasInstructions(true);
2314 Streamer
->switchSection(SplitSection
);
2316 Streamer
->emitLabel(SplitStartLabel
);
2317 emitFunctionBody(*Streamer
, BF
, FF
, /*EmitCodeOnly=*/true);
2318 Streamer
->emitLabel(SplitEndLabel
);
2319 // To avoid calling MCObjectStreamer::flushPendingLabels() which is
2321 Streamer
->emitBytes(StringRef(""));
2322 Streamer
->switchSection(Section
);
2325 // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or
2326 // MCStreamer::Finish(), which does more than we want
2327 Streamer
->emitBytes(StringRef(""));
2329 MCAssembler
&Assembler
=
2330 static_cast<MCObjectStreamer
*>(Streamer
.get())->getAssembler();
2331 MCAsmLayout
Layout(Assembler
);
2332 Assembler
.layout(Layout
);
2334 // Obtain fragment sizes.
2335 std::vector
<uint64_t> FragmentSizes
;
2336 // Main fragment size.
2337 const uint64_t HotSize
=
2338 Layout
.getSymbolOffset(*EndLabel
) - Layout
.getSymbolOffset(*StartLabel
);
2339 FragmentSizes
.push_back(HotSize
);
2340 // Split fragment sizes.
2341 uint64_t ColdSize
= 0;
2342 for (const auto &Labels
: SplitLabels
) {
2343 uint64_t Size
= Layout
.getSymbolOffset(*Labels
.second
) -
2344 Layout
.getSymbolOffset(*Labels
.first
);
2345 FragmentSizes
.push_back(Size
);
2349 // Populate new start and end offsets of each basic block.
2350 uint64_t FragmentIndex
= 0;
2351 for (FunctionFragment
&FF
: BF
.getLayout().fragments()) {
2352 BinaryBasicBlock
*PrevBB
= nullptr;
2353 for (BinaryBasicBlock
*BB
: FF
) {
2354 const uint64_t BBStartOffset
= Layout
.getSymbolOffset(*(BB
->getLabel()));
2355 BB
->setOutputStartAddress(BBStartOffset
);
2357 PrevBB
->setOutputEndAddress(BBStartOffset
);
2361 PrevBB
->setOutputEndAddress(FragmentSizes
[FragmentIndex
]);
2365 // Clean-up the effect of the code emission.
2366 for (const MCSymbol
&Symbol
: Assembler
.symbols()) {
2367 MCSymbol
*MutableSymbol
= const_cast<MCSymbol
*>(&Symbol
);
2368 MutableSymbol
->setUndefined();
2369 MutableSymbol
->setIsRegistered(false);
2372 return std::make_pair(HotSize
, ColdSize
);
2375 bool BinaryContext::validateInstructionEncoding(
2376 ArrayRef
<uint8_t> InputSequence
) const {
2379 DisAsm
->getInstruction(Inst
, InstSize
, InputSequence
, 0, nulls());
2380 assert(InstSize
== InputSequence
.size() &&
2381 "Disassembled instruction size does not match the sequence.");
2383 SmallString
<256> Code
;
2384 SmallVector
<MCFixup
, 4> Fixups
;
2386 MCE
->encodeInstruction(Inst
, Code
, Fixups
, *STI
);
2387 auto OutputSequence
= ArrayRef
<uint8_t>((uint8_t *)Code
.data(), Code
.size());
2388 if (InputSequence
!= OutputSequence
) {
2389 if (opts::Verbosity
> 1) {
2390 errs() << "BOLT-WARNING: mismatched encoding detected\n"
2391 << " input: " << InputSequence
<< '\n'
2392 << " output: " << OutputSequence
<< '\n';
2400 uint64_t BinaryContext::getHotThreshold() const {
2401 static uint64_t Threshold
= 0;
2402 if (Threshold
== 0) {
2403 Threshold
= std::max(
2404 (uint64_t)opts::ExecutionCountThreshold
,
2405 NumProfiledFuncs
? SumExecutionCount
/ (2 * NumProfiledFuncs
) : 1);
2410 BinaryFunction
*BinaryContext::getBinaryFunctionContainingAddress(
2411 uint64_t Address
, bool CheckPastEnd
, bool UseMaxSize
) {
2412 auto FI
= BinaryFunctions
.upper_bound(Address
);
2413 if (FI
== BinaryFunctions
.begin())
2417 const uint64_t UsedSize
=
2418 UseMaxSize
? FI
->second
.getMaxSize() : FI
->second
.getSize();
2420 if (Address
>= FI
->first
+ UsedSize
+ (CheckPastEnd
? 1 : 0))
2426 BinaryFunction
*BinaryContext::getBinaryFunctionAtAddress(uint64_t Address
) {
2427 // First, try to find a function starting at the given address. If the
2428 // function was folded, this will get us the original folded function if it
2429 // wasn't removed from the list, e.g. in non-relocation mode.
2430 auto BFI
= BinaryFunctions
.find(Address
);
2431 if (BFI
!= BinaryFunctions
.end())
2432 return &BFI
->second
;
2434 // We might have folded the function matching the object at the given
2435 // address. In such case, we look for a function matching the symbol
2436 // registered at the original address. The new function (the one that the
2437 // original was folded into) will hold the symbol.
2438 if (const BinaryData
*BD
= getBinaryDataAtAddress(Address
)) {
2439 uint64_t EntryID
= 0;
2440 BinaryFunction
*BF
= getFunctionForSymbol(BD
->getSymbol(), &EntryID
);
2441 if (BF
&& EntryID
== 0)
2447 DebugAddressRangesVector
BinaryContext::translateModuleAddressRanges(
2448 const DWARFAddressRangesVector
&InputRanges
) const {
2449 DebugAddressRangesVector OutputRanges
;
2451 for (const DWARFAddressRange Range
: InputRanges
) {
2452 auto BFI
= BinaryFunctions
.lower_bound(Range
.LowPC
);
2453 while (BFI
!= BinaryFunctions
.end()) {
2454 const BinaryFunction
&Function
= BFI
->second
;
2455 if (Function
.getAddress() >= Range
.HighPC
)
2457 const DebugAddressRangesVector FunctionRanges
=
2458 Function
.getOutputAddressRanges();
2459 llvm::move(FunctionRanges
, std::back_inserter(OutputRanges
));
2460 std::advance(BFI
, 1);
2464 return OutputRanges
;