[llvm-shlib] Fix the version naming style of libLLVM for Windows (#85710)
[llvm-project.git] / bolt / lib / Core / BinaryContext.cpp
blob3f96ea265e425f72380585aa6f8a08d2ac4deaa9
1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the BinaryContext class.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Core/BinaryContext.h"
14 #include "bolt/Core/BinaryEmitter.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "bolt/Utils/NameResolver.h"
18 #include "bolt/Utils/Utils.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
22 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
23 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
24 #include "llvm/MC/MCAsmLayout.h"
25 #include "llvm/MC/MCAssembler.h"
26 #include "llvm/MC/MCContext.h"
27 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
28 #include "llvm/MC/MCInstPrinter.h"
29 #include "llvm/MC/MCObjectStreamer.h"
30 #include "llvm/MC/MCObjectWriter.h"
31 #include "llvm/MC/MCRegisterInfo.h"
32 #include "llvm/MC/MCSectionELF.h"
33 #include "llvm/MC/MCStreamer.h"
34 #include "llvm/MC/MCSubtargetInfo.h"
35 #include "llvm/MC/MCSymbol.h"
36 #include "llvm/Support/CommandLine.h"
37 #include "llvm/Support/Error.h"
38 #include "llvm/Support/Regex.h"
39 #include <algorithm>
40 #include <functional>
41 #include <iterator>
42 #include <numeric>
43 #include <unordered_set>
45 using namespace llvm;
47 #undef DEBUG_TYPE
48 #define DEBUG_TYPE "bolt"
50 namespace opts {
52 cl::opt<bool> NoHugePages("no-huge-pages",
53 cl::desc("use regular size pages for code alignment"),
54 cl::Hidden, cl::cat(BoltCategory));
56 static cl::opt<bool>
57 PrintDebugInfo("print-debug-info",
58 cl::desc("print debug info when printing functions"),
59 cl::Hidden,
60 cl::ZeroOrMore,
61 cl::cat(BoltCategory));
63 cl::opt<bool> PrintRelocations(
64 "print-relocations",
65 cl::desc("print relocations when printing functions/objects"), cl::Hidden,
66 cl::cat(BoltCategory));
68 static cl::opt<bool>
69 PrintMemData("print-mem-data",
70 cl::desc("print memory data annotations when printing functions"),
71 cl::Hidden,
72 cl::ZeroOrMore,
73 cl::cat(BoltCategory));
75 } // namespace opts
77 namespace llvm {
78 namespace bolt {
80 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx,
81 std::unique_ptr<DWARFContext> DwCtx,
82 std::unique_ptr<Triple> TheTriple,
83 const Target *TheTarget, std::string TripleName,
84 std::unique_ptr<MCCodeEmitter> MCE,
85 std::unique_ptr<MCObjectFileInfo> MOFI,
86 std::unique_ptr<const MCAsmInfo> AsmInfo,
87 std::unique_ptr<const MCInstrInfo> MII,
88 std::unique_ptr<const MCSubtargetInfo> STI,
89 std::unique_ptr<MCInstPrinter> InstPrinter,
90 std::unique_ptr<const MCInstrAnalysis> MIA,
91 std::unique_ptr<MCPlusBuilder> MIB,
92 std::unique_ptr<const MCRegisterInfo> MRI,
93 std::unique_ptr<MCDisassembler> DisAsm)
94 : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)),
95 TheTriple(std::move(TheTriple)), TheTarget(TheTarget),
96 TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)),
97 AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)),
98 InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)),
99 MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)) {
100 Relocation::Arch = this->TheTriple->getArch();
101 RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86;
102 PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize;
105 BinaryContext::~BinaryContext() {
106 for (BinarySection *Section : Sections)
107 delete Section;
108 for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions)
109 delete InjectedFunction;
110 for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables)
111 delete JTI.second;
112 clearBinaryData();
115 /// Create BinaryContext for a given architecture \p ArchName and
116 /// triple \p TripleName.
117 Expected<std::unique_ptr<BinaryContext>>
118 BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC,
119 std::unique_ptr<DWARFContext> DwCtx) {
120 StringRef ArchName = "";
121 std::string FeaturesStr = "";
122 switch (File->getArch()) {
123 case llvm::Triple::x86_64:
124 ArchName = "x86-64";
125 FeaturesStr = "+nopl";
126 break;
127 case llvm::Triple::aarch64:
128 ArchName = "aarch64";
129 FeaturesStr = "+all";
130 break;
131 case llvm::Triple::riscv64: {
132 ArchName = "riscv64";
133 Expected<SubtargetFeatures> Features = File->getFeatures();
135 if (auto E = Features.takeError())
136 return std::move(E);
138 // We rely on relaxation for some transformations (e.g., promoting all calls
139 // to PseudoCALL and then making JITLink relax them). Since the relax
140 // feature is not stored in the object file, we manually enable it.
141 Features->AddFeature("relax");
142 FeaturesStr = Features->getString();
143 break;
145 default:
146 return createStringError(std::errc::not_supported,
147 "BOLT-ERROR: Unrecognized machine in ELF file");
150 auto TheTriple = std::make_unique<Triple>(File->makeTriple());
151 const std::string TripleName = TheTriple->str();
153 std::string Error;
154 const Target *TheTarget =
155 TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error);
156 if (!TheTarget)
157 return createStringError(make_error_code(std::errc::not_supported),
158 Twine("BOLT-ERROR: ", Error));
160 std::unique_ptr<const MCRegisterInfo> MRI(
161 TheTarget->createMCRegInfo(TripleName));
162 if (!MRI)
163 return createStringError(
164 make_error_code(std::errc::not_supported),
165 Twine("BOLT-ERROR: no register info for target ", TripleName));
167 // Set up disassembler.
168 std::unique_ptr<MCAsmInfo> AsmInfo(
169 TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions()));
170 if (!AsmInfo)
171 return createStringError(
172 make_error_code(std::errc::not_supported),
173 Twine("BOLT-ERROR: no assembly info for target ", TripleName));
174 // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump
175 // we want to emit such names as using @PLT without double quotes to convey
176 // variant kind to the assembler. BOLT doesn't rely on the linker so we can
177 // override the default AsmInfo behavior to emit names the way we want.
178 AsmInfo->setAllowAtInName(true);
180 std::unique_ptr<const MCSubtargetInfo> STI(
181 TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr));
182 if (!STI)
183 return createStringError(
184 make_error_code(std::errc::not_supported),
185 Twine("BOLT-ERROR: no subtarget info for target ", TripleName));
187 std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
188 if (!MII)
189 return createStringError(
190 make_error_code(std::errc::not_supported),
191 Twine("BOLT-ERROR: no instruction info for target ", TripleName));
193 std::unique_ptr<MCContext> Ctx(
194 new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
195 std::unique_ptr<MCObjectFileInfo> MOFI(
196 TheTarget->createMCObjectFileInfo(*Ctx, IsPIC));
197 Ctx->setObjectFileInfo(MOFI.get());
198 // We do not support X86 Large code model. Change this in the future.
199 bool Large = false;
200 if (TheTriple->getArch() == llvm::Triple::aarch64)
201 Large = true;
202 unsigned LSDAEncoding =
203 Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
204 if (IsPIC) {
205 LSDAEncoding = dwarf::DW_EH_PE_pcrel |
206 (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
209 std::unique_ptr<MCDisassembler> DisAsm(
210 TheTarget->createMCDisassembler(*STI, *Ctx));
212 if (!DisAsm)
213 return createStringError(
214 make_error_code(std::errc::not_supported),
215 Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
217 std::unique_ptr<const MCInstrAnalysis> MIA(
218 TheTarget->createMCInstrAnalysis(MII.get()));
219 if (!MIA)
220 return createStringError(
221 make_error_code(std::errc::not_supported),
222 Twine("BOLT-ERROR: failed to create instruction analysis for target ",
223 TripleName));
225 int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
226 std::unique_ptr<MCInstPrinter> InstructionPrinter(
227 TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo,
228 *MII, *MRI));
229 if (!InstructionPrinter)
230 return createStringError(
231 make_error_code(std::errc::not_supported),
232 Twine("BOLT-ERROR: no instruction printer for target ", TripleName));
233 InstructionPrinter->setPrintImmHex(true);
235 std::unique_ptr<MCCodeEmitter> MCE(
236 TheTarget->createMCCodeEmitter(*MII, *Ctx));
238 // Make sure we don't miss any output on core dumps.
239 outs().SetUnbuffered();
240 errs().SetUnbuffered();
241 dbgs().SetUnbuffered();
243 auto BC = std::make_unique<BinaryContext>(
244 std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget,
245 std::string(TripleName), std::move(MCE), std::move(MOFI),
246 std::move(AsmInfo), std::move(MII), std::move(STI),
247 std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI),
248 std::move(DisAsm));
250 BC->LSDAEncoding = LSDAEncoding;
252 BC->MAB = std::unique_ptr<MCAsmBackend>(
253 BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions()));
255 BC->setFilename(File->getFileName());
257 BC->HasFixedLoadAddress = !IsPIC;
259 BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>(
260 BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx));
262 if (!BC->SymbolicDisAsm)
263 return createStringError(
264 make_error_code(std::errc::not_supported),
265 Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
267 return std::move(BC);
270 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const {
271 if (opts::HotText &&
272 (SymbolName == "__hot_start" || SymbolName == "__hot_end"))
273 return true;
275 if (opts::HotData &&
276 (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end"))
277 return true;
279 if (SymbolName == "_end")
280 return true;
282 return false;
285 std::unique_ptr<MCObjectWriter>
286 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) {
287 return MAB->createObjectWriter(OS);
290 bool BinaryContext::validateObjectNesting() const {
291 auto Itr = BinaryDataMap.begin();
292 auto End = BinaryDataMap.end();
293 bool Valid = true;
294 while (Itr != End) {
295 auto Next = std::next(Itr);
296 while (Next != End &&
297 Itr->second->getSection() == Next->second->getSection() &&
298 Itr->second->containsRange(Next->second->getAddress(),
299 Next->second->getSize())) {
300 if (Next->second->Parent != Itr->second) {
301 errs() << "BOLT-WARNING: object nesting incorrect for:\n"
302 << "BOLT-WARNING: " << *Itr->second << "\n"
303 << "BOLT-WARNING: " << *Next->second << "\n";
304 Valid = false;
306 ++Next;
308 Itr = Next;
310 return Valid;
313 bool BinaryContext::validateHoles() const {
314 bool Valid = true;
315 for (BinarySection &Section : sections()) {
316 for (const Relocation &Rel : Section.relocations()) {
317 uint64_t RelAddr = Rel.Offset + Section.getAddress();
318 const BinaryData *BD = getBinaryDataContainingAddress(RelAddr);
319 if (!BD) {
320 errs() << "BOLT-WARNING: no BinaryData found for relocation at address"
321 << " 0x" << Twine::utohexstr(RelAddr) << " in "
322 << Section.getName() << "\n";
323 Valid = false;
324 } else if (!BD->getAtomicRoot()) {
325 errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at "
326 << "address 0x" << Twine::utohexstr(RelAddr) << " in "
327 << Section.getName() << "\n";
328 Valid = false;
332 return Valid;
335 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) {
336 const uint64_t Address = GAI->second->getAddress();
337 const uint64_t Size = GAI->second->getSize();
339 auto fixParents = [&](BinaryDataMapType::iterator Itr,
340 BinaryData *NewParent) {
341 BinaryData *OldParent = Itr->second->Parent;
342 Itr->second->Parent = NewParent;
343 ++Itr;
344 while (Itr != BinaryDataMap.end() && OldParent &&
345 Itr->second->Parent == OldParent) {
346 Itr->second->Parent = NewParent;
347 ++Itr;
351 // Check if the previous symbol contains the newly added symbol.
352 if (GAI != BinaryDataMap.begin()) {
353 BinaryData *Prev = std::prev(GAI)->second;
354 while (Prev) {
355 if (Prev->getSection() == GAI->second->getSection() &&
356 Prev->containsRange(Address, Size)) {
357 fixParents(GAI, Prev);
358 } else {
359 fixParents(GAI, nullptr);
361 Prev = Prev->Parent;
365 // Check if the newly added symbol contains any subsequent symbols.
366 if (Size != 0) {
367 BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second;
368 auto Itr = std::next(GAI);
369 while (
370 Itr != BinaryDataMap.end() &&
371 BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) {
372 Itr->second->Parent = BD;
373 ++Itr;
378 iterator_range<BinaryContext::binary_data_iterator>
379 BinaryContext::getSubBinaryData(BinaryData *BD) {
380 auto Start = std::next(BinaryDataMap.find(BD->getAddress()));
381 auto End = Start;
382 while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second))
383 ++End;
384 return make_range(Start, End);
387 std::pair<const MCSymbol *, uint64_t>
388 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF,
389 bool IsPCRel) {
390 if (isAArch64()) {
391 // Check if this is an access to a constant island and create bookkeeping
392 // to keep track of it and emit it later as part of this function.
393 if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address))
394 return std::make_pair(IslandSym, 0);
396 // Detect custom code written in assembly that refers to arbitrary
397 // constant islands from other functions. Write this reference so we
398 // can pull this constant island and emit it as part of this function
399 // too.
400 auto IslandIter = AddressToConstantIslandMap.lower_bound(Address);
402 if (IslandIter != AddressToConstantIslandMap.begin() &&
403 (IslandIter == AddressToConstantIslandMap.end() ||
404 IslandIter->first > Address))
405 --IslandIter;
407 if (IslandIter != AddressToConstantIslandMap.end()) {
408 // Fall-back to referencing the original constant island in the presence
409 // of dynamic relocs, as we currently do not support cloning them.
410 // Notice: we might fail to link because of this, if the original constant
411 // island we are referring would be emitted too far away.
412 if (IslandIter->second->hasDynamicRelocationAtIsland()) {
413 MCSymbol *IslandSym =
414 IslandIter->second->getOrCreateIslandAccess(Address);
415 if (IslandSym)
416 return std::make_pair(IslandSym, 0);
417 } else if (MCSymbol *IslandSym =
418 IslandIter->second->getOrCreateProxyIslandAccess(Address,
419 BF)) {
420 BF.createIslandDependency(IslandSym, IslandIter->second);
421 return std::make_pair(IslandSym, 0);
426 // Note that the address does not necessarily have to reside inside
427 // a section, it could be an absolute address too.
428 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
429 if (Section && Section->isText()) {
430 if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) {
431 if (Address != BF.getAddress()) {
432 // The address could potentially escape. Mark it as another entry
433 // point into the function.
434 if (opts::Verbosity >= 1) {
435 outs() << "BOLT-INFO: potentially escaped address 0x"
436 << Twine::utohexstr(Address) << " in function " << BF << '\n';
438 BF.HasInternalLabelReference = true;
439 return std::make_pair(
440 BF.addEntryPointAtOffset(Address - BF.getAddress()), 0);
442 } else {
443 addInterproceduralReference(&BF, Address);
447 // With relocations, catch jump table references outside of the basic block
448 // containing the indirect jump.
449 if (HasRelocations) {
450 const MemoryContentsType MemType = analyzeMemoryAt(Address, BF);
451 if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) {
452 const MCSymbol *Symbol =
453 getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC);
455 return std::make_pair(Symbol, 0);
459 if (BinaryData *BD = getBinaryDataContainingAddress(Address))
460 return std::make_pair(BD->getSymbol(), Address - BD->getAddress());
462 // TODO: use DWARF info to get size/alignment here?
463 MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat");
464 LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n');
465 return std::make_pair(TargetSymbol, 0);
468 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address,
469 BinaryFunction &BF) {
470 if (!isX86())
471 return MemoryContentsType::UNKNOWN;
473 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
474 if (!Section) {
475 // No section - possibly an absolute address. Since we don't allow
476 // internal function addresses to escape the function scope - we
477 // consider it a tail call.
478 if (opts::Verbosity > 1) {
479 errs() << "BOLT-WARNING: no section for address 0x"
480 << Twine::utohexstr(Address) << " referenced from function " << BF
481 << '\n';
483 return MemoryContentsType::UNKNOWN;
486 if (Section->isVirtual()) {
487 // The contents are filled at runtime.
488 return MemoryContentsType::UNKNOWN;
491 // No support for jump tables in code yet.
492 if (Section->isText())
493 return MemoryContentsType::UNKNOWN;
495 // Start with checking for PIC jump table. We expect non-PIC jump tables
496 // to have high 32 bits set to 0.
497 if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF))
498 return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
500 if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF))
501 return MemoryContentsType::POSSIBLE_JUMP_TABLE;
503 return MemoryContentsType::UNKNOWN;
506 bool BinaryContext::analyzeJumpTable(const uint64_t Address,
507 const JumpTable::JumpTableType Type,
508 const BinaryFunction &BF,
509 const uint64_t NextJTAddress,
510 JumpTable::AddressesType *EntriesAsAddress,
511 bool *HasEntryInFragment) const {
512 // Is one of the targets __builtin_unreachable?
513 bool HasUnreachable = false;
515 // Does one of the entries match function start address?
516 bool HasStartAsEntry = false;
518 // Number of targets other than __builtin_unreachable.
519 uint64_t NumRealEntries = 0;
521 auto addEntryAddress = [&](uint64_t EntryAddress) {
522 if (EntriesAsAddress)
523 EntriesAsAddress->emplace_back(EntryAddress);
526 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
527 if (!Section)
528 return false;
530 // The upper bound is defined by containing object, section limits, and
531 // the next jump table in memory.
532 uint64_t UpperBound = Section->getEndAddress();
533 const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address);
534 if (JumpTableBD && JumpTableBD->getSize()) {
535 assert(JumpTableBD->getEndAddress() <= UpperBound &&
536 "data object cannot cross a section boundary");
537 UpperBound = JumpTableBD->getEndAddress();
539 if (NextJTAddress)
540 UpperBound = std::min(NextJTAddress, UpperBound);
542 LLVM_DEBUG({
543 using JTT = JumpTable::JumpTableType;
544 dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n",
545 Address, BF.getPrintName(),
546 Type == JTT::JTT_PIC ? "PIC" : "Normal");
548 const uint64_t EntrySize = getJumpTableEntrySize(Type);
549 for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize;
550 EntryAddress += EntrySize) {
551 LLVM_DEBUG(dbgs() << " * Checking 0x" << Twine::utohexstr(EntryAddress)
552 << " -> ");
553 // Check if there's a proper relocation against the jump table entry.
554 if (HasRelocations) {
555 if (Type == JumpTable::JTT_PIC &&
556 !DataPCRelocations.count(EntryAddress)) {
557 LLVM_DEBUG(
558 dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n");
559 break;
561 if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) {
562 LLVM_DEBUG(
563 dbgs()
564 << "FAIL: JTT_NORMAL table, no relocation for this address\n");
565 break;
569 const uint64_t Value =
570 (Type == JumpTable::JTT_PIC)
571 ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize)
572 : *getPointerAtAddress(EntryAddress);
574 // __builtin_unreachable() case.
575 if (Value == BF.getAddress() + BF.getSize()) {
576 addEntryAddress(Value);
577 HasUnreachable = true;
578 LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value));
579 continue;
582 // Function start is another special case. It is allowed in the jump table,
583 // but we need at least one another regular entry to distinguish the table
584 // from, e.g. a function pointer array.
585 if (Value == BF.getAddress()) {
586 HasStartAsEntry = true;
587 addEntryAddress(Value);
588 continue;
591 // Function or one of its fragments.
592 const BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value);
593 const bool DoesBelongToFunction =
594 BF.containsAddress(Value) ||
595 (TargetBF && TargetBF->isParentOrChildOf(BF));
596 if (!DoesBelongToFunction) {
597 LLVM_DEBUG({
598 if (!BF.containsAddress(Value)) {
599 dbgs() << "FAIL: function doesn't contain this address\n";
600 if (TargetBF) {
601 dbgs() << " ! function containing this address: "
602 << TargetBF->getPrintName() << '\n';
603 if (TargetBF->isFragment()) {
604 dbgs() << " ! is a fragment";
605 for (BinaryFunction *Parent : TargetBF->ParentFragments)
606 dbgs() << ", parent: " << Parent->getPrintName();
607 dbgs() << '\n';
612 break;
615 // Check there's an instruction at this offset.
616 if (TargetBF->getState() == BinaryFunction::State::Disassembled &&
617 !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) {
618 LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value));
619 break;
622 ++NumRealEntries;
623 LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value));
625 if (TargetBF != &BF && HasEntryInFragment)
626 *HasEntryInFragment = true;
627 addEntryAddress(Value);
630 // It's a jump table if the number of real entries is more than 1, or there's
631 // one real entry and one or more special targets. If there are only multiple
632 // special targets, then it's not a jump table.
633 return NumRealEntries + (HasUnreachable || HasStartAsEntry) >= 2;
636 void BinaryContext::populateJumpTables() {
637 LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size()
638 << '\n');
639 for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE;
640 ++JTI) {
641 JumpTable *JT = JTI->second;
643 bool NonSimpleParent = false;
644 for (BinaryFunction *BF : JT->Parents)
645 NonSimpleParent |= !BF->isSimple();
646 if (NonSimpleParent)
647 continue;
649 uint64_t NextJTAddress = 0;
650 auto NextJTI = std::next(JTI);
651 if (NextJTI != JTE)
652 NextJTAddress = NextJTI->second->getAddress();
654 const bool Success =
655 analyzeJumpTable(JT->getAddress(), JT->Type, *(JT->Parents[0]),
656 NextJTAddress, &JT->EntriesAsAddress, &JT->IsSplit);
657 if (!Success) {
658 LLVM_DEBUG({
659 dbgs() << "failed to analyze ";
660 JT->print(dbgs());
661 if (NextJTI != JTE) {
662 dbgs() << "next ";
663 NextJTI->second->print(dbgs());
666 llvm_unreachable("jump table heuristic failure");
668 for (BinaryFunction *Frag : JT->Parents) {
669 if (JT->IsSplit)
670 Frag->setHasIndirectTargetToSplitFragment(true);
671 for (uint64_t EntryAddress : JT->EntriesAsAddress)
672 // if target is builtin_unreachable
673 if (EntryAddress == Frag->getAddress() + Frag->getSize()) {
674 Frag->IgnoredBranches.emplace_back(EntryAddress - Frag->getAddress(),
675 Frag->getSize());
676 } else if (EntryAddress >= Frag->getAddress() &&
677 EntryAddress < Frag->getAddress() + Frag->getSize()) {
678 Frag->registerReferencedOffset(EntryAddress - Frag->getAddress());
682 // In strict mode, erase PC-relative relocation record. Later we check that
683 // all such records are erased and thus have been accounted for.
684 if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) {
685 for (uint64_t Address = JT->getAddress();
686 Address < JT->getAddress() + JT->getSize();
687 Address += JT->EntrySize) {
688 DataPCRelocations.erase(DataPCRelocations.find(Address));
692 // Mark to skip the function and all its fragments.
693 for (BinaryFunction *Frag : JT->Parents)
694 if (Frag->hasIndirectTargetToSplitFragment())
695 addFragmentsToSkip(Frag);
698 if (opts::StrictMode && DataPCRelocations.size()) {
699 LLVM_DEBUG({
700 dbgs() << DataPCRelocations.size()
701 << " unclaimed PC-relative relocations left in data:\n";
702 for (uint64_t Reloc : DataPCRelocations)
703 dbgs() << Twine::utohexstr(Reloc) << '\n';
705 assert(0 && "unclaimed PC-relative relocations left in data\n");
707 clearList(DataPCRelocations);
710 void BinaryContext::skipMarkedFragments() {
711 std::vector<BinaryFunction *> FragmentQueue;
712 // Copy the functions to FragmentQueue.
713 FragmentQueue.assign(FragmentsToSkip.begin(), FragmentsToSkip.end());
714 auto addToWorklist = [&](BinaryFunction *Function) -> void {
715 if (FragmentsToSkip.count(Function))
716 return;
717 FragmentQueue.push_back(Function);
718 addFragmentsToSkip(Function);
720 // Functions containing split jump tables need to be skipped with all
721 // fragments (transitively).
722 for (size_t I = 0; I != FragmentQueue.size(); I++) {
723 BinaryFunction *BF = FragmentQueue[I];
724 assert(FragmentsToSkip.count(BF) &&
725 "internal error in traversing function fragments");
726 if (opts::Verbosity >= 1)
727 errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n';
728 BF->setSimple(false);
729 BF->setHasIndirectTargetToSplitFragment(true);
731 llvm::for_each(BF->Fragments, addToWorklist);
732 llvm::for_each(BF->ParentFragments, addToWorklist);
734 if (!FragmentsToSkip.empty())
735 errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size() << " function"
736 << (FragmentsToSkip.size() == 1 ? "" : "s")
737 << " due to cold fragments\n";
740 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix,
741 uint64_t Size,
742 uint16_t Alignment,
743 unsigned Flags) {
744 auto Itr = BinaryDataMap.find(Address);
745 if (Itr != BinaryDataMap.end()) {
746 assert(Itr->second->getSize() == Size || !Size);
747 return Itr->second->getSymbol();
750 std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str();
751 assert(!GlobalSymbols.count(Name) && "created name is not unique");
752 return registerNameAtAddress(Name, Address, Size, Alignment, Flags);
755 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) {
756 return Ctx->getOrCreateSymbol(Name);
759 BinaryFunction *BinaryContext::createBinaryFunction(
760 const std::string &Name, BinarySection &Section, uint64_t Address,
761 uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) {
762 auto Result = BinaryFunctions.emplace(
763 Address, BinaryFunction(Name, Section, Address, Size, *this));
764 assert(Result.second == true && "unexpected duplicate function");
765 BinaryFunction *BF = &Result.first->second;
766 registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size,
767 Alignment);
768 setSymbolToFunctionMap(BF->getSymbol(), BF);
769 return BF;
772 const MCSymbol *
773 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
774 JumpTable::JumpTableType Type) {
775 // Two fragments of same function access same jump table
776 if (JumpTable *JT = getJumpTableContainingAddress(Address)) {
777 assert(JT->Type == Type && "jump table types have to match");
778 assert(Address == JT->getAddress() && "unexpected non-empty jump table");
780 // Prevent associating a jump table to a specific fragment twice.
781 // This simple check arises from the assumption: no more than 2 fragments.
782 if (JT->Parents.size() == 1 && JT->Parents[0] != &Function) {
783 assert(JT->Parents[0]->isParentOrChildOf(Function) &&
784 "cannot re-use jump table of a different function");
785 // Duplicate the entry for the parent function for easy access
786 JT->Parents.push_back(&Function);
787 if (opts::Verbosity > 2) {
788 outs() << "BOLT-INFO: Multiple fragments access same jump table: "
789 << JT->Parents[0]->getPrintName() << "; "
790 << Function.getPrintName() << "\n";
791 JT->print(outs());
793 Function.JumpTables.emplace(Address, JT);
794 JT->Parents[0]->setHasIndirectTargetToSplitFragment(true);
795 JT->Parents[1]->setHasIndirectTargetToSplitFragment(true);
798 bool IsJumpTableParent = false;
799 (void)IsJumpTableParent;
800 for (BinaryFunction *Frag : JT->Parents)
801 if (Frag == &Function)
802 IsJumpTableParent = true;
803 assert(IsJumpTableParent &&
804 "cannot re-use jump table of a different function");
805 return JT->getFirstLabel();
808 // Re-use the existing symbol if possible.
809 MCSymbol *JTLabel = nullptr;
810 if (BinaryData *Object = getBinaryDataAtAddress(Address)) {
811 if (!isInternalSymbolName(Object->getSymbol()->getName()))
812 JTLabel = Object->getSymbol();
815 const uint64_t EntrySize = getJumpTableEntrySize(Type);
816 if (!JTLabel) {
817 const std::string JumpTableName = generateJumpTableName(Function, Address);
818 JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize);
821 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName()
822 << " in function " << Function << '\n');
824 JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type,
825 JumpTable::LabelMapType{{0, JTLabel}},
826 *getSectionForAddress(Address));
827 JT->Parents.push_back(&Function);
828 if (opts::Verbosity > 2)
829 JT->print(outs());
830 JumpTables.emplace(Address, JT);
832 // Duplicate the entry for the parent function for easy access.
833 Function.JumpTables.emplace(Address, JT);
834 return JTLabel;
837 std::pair<uint64_t, const MCSymbol *>
838 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT,
839 const MCSymbol *OldLabel) {
840 auto L = scopeLock();
841 unsigned Offset = 0;
842 bool Found = false;
843 for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) {
844 if (Elmt.second != OldLabel)
845 continue;
846 Offset = Elmt.first;
847 Found = true;
848 break;
850 assert(Found && "Label not found");
851 (void)Found;
852 MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT");
853 JumpTable *NewJT =
854 new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type,
855 JumpTable::LabelMapType{{Offset, NewLabel}},
856 *getSectionForAddress(JT->getAddress()));
857 NewJT->Parents = JT->Parents;
858 NewJT->Entries = JT->Entries;
859 NewJT->Counts = JT->Counts;
860 uint64_t JumpTableID = ++DuplicatedJumpTables;
861 // Invert it to differentiate from regular jump tables whose IDs are their
862 // addresses in the input binary memory space
863 JumpTableID = ~JumpTableID;
864 JumpTables.emplace(JumpTableID, NewJT);
865 Function.JumpTables.emplace(JumpTableID, NewJT);
866 return std::make_pair(JumpTableID, NewLabel);
869 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
870 uint64_t Address) {
871 size_t Id;
872 uint64_t Offset = 0;
873 if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) {
874 Offset = Address - JT->getAddress();
875 auto Itr = JT->Labels.find(Offset);
876 if (Itr != JT->Labels.end())
877 return std::string(Itr->second->getName());
878 Id = JumpTableIds.at(JT->getAddress());
879 } else {
880 Id = JumpTableIds[Address] = BF.JumpTables.size();
882 return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) +
883 (Offset ? ("." + std::to_string(Offset)) : ""));
886 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
887 // FIXME: aarch64 support is missing.
888 if (!isX86())
889 return true;
891 if (BF.getSize() == BF.getMaxSize())
892 return true;
894 ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData();
895 assert(FunctionData && "cannot get function as data");
897 uint64_t Offset = BF.getSize();
898 MCInst Instr;
899 uint64_t InstrSize = 0;
900 uint64_t InstrAddress = BF.getAddress() + Offset;
901 using std::placeholders::_1;
903 // Skip instructions that satisfy the predicate condition.
904 auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) {
905 const uint64_t StartOffset = Offset;
906 for (; Offset < BF.getMaxSize();
907 Offset += InstrSize, InstrAddress += InstrSize) {
908 if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset),
909 InstrAddress, nulls()))
910 break;
911 if (!Predicate(Instr))
912 break;
915 return Offset - StartOffset;
918 // Skip a sequence of zero bytes.
919 auto skipZeros = [&]() {
920 const uint64_t StartOffset = Offset;
921 for (; Offset < BF.getMaxSize(); ++Offset)
922 if ((*FunctionData)[Offset] != 0)
923 break;
925 return Offset - StartOffset;
928 // Accept the whole padding area filled with breakpoints.
929 auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1);
930 if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize())
931 return true;
933 auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1);
935 // Some functions have a jump to the next function or to the padding area
936 // inserted after the body.
937 auto isSkipJump = [&](const MCInst &Instr) {
938 uint64_t TargetAddress = 0;
939 if (MIB->isUnconditionalBranch(Instr) &&
940 MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) {
941 if (TargetAddress >= InstrAddress + InstrSize &&
942 TargetAddress <= BF.getAddress() + BF.getMaxSize()) {
943 return true;
946 return false;
949 // Skip over nops, jumps, and zero padding. Allow interleaving (this happens).
950 while (skipInstructions(isNoop) || skipInstructions(isSkipJump) ||
951 skipZeros())
954 if (Offset == BF.getMaxSize())
955 return true;
957 if (opts::Verbosity >= 1) {
958 errs() << "BOLT-WARNING: bad padding at address 0x"
959 << Twine::utohexstr(BF.getAddress() + BF.getSize())
960 << " starting at offset " << (Offset - BF.getSize())
961 << " in function " << BF << '\n'
962 << FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize())
963 << '\n';
966 return false;
969 void BinaryContext::adjustCodePadding() {
970 for (auto &BFI : BinaryFunctions) {
971 BinaryFunction &BF = BFI.second;
972 if (!shouldEmit(BF))
973 continue;
975 if (!hasValidCodePadding(BF)) {
976 if (HasRelocations) {
977 if (opts::Verbosity >= 1) {
978 outs() << "BOLT-INFO: function " << BF
979 << " has invalid padding. Ignoring the function.\n";
981 BF.setIgnored();
982 } else {
983 BF.setMaxSize(BF.getSize());
989 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address,
990 uint64_t Size,
991 uint16_t Alignment,
992 unsigned Flags) {
993 // Register the name with MCContext.
994 MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name);
996 auto GAI = BinaryDataMap.find(Address);
997 BinaryData *BD;
998 if (GAI == BinaryDataMap.end()) {
999 ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address);
1000 BinarySection &Section =
1001 SectionOrErr ? SectionOrErr.get() : absoluteSection();
1002 BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1,
1003 Section, Flags);
1004 GAI = BinaryDataMap.emplace(Address, BD).first;
1005 GlobalSymbols[Name] = BD;
1006 updateObjectNesting(GAI);
1007 } else {
1008 BD = GAI->second;
1009 if (!BD->hasName(Name)) {
1010 GlobalSymbols[Name] = BD;
1011 BD->Symbols.push_back(Symbol);
1015 return Symbol;
1018 const BinaryData *
1019 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const {
1020 auto NI = BinaryDataMap.lower_bound(Address);
1021 auto End = BinaryDataMap.end();
1022 if ((NI != End && Address == NI->first) ||
1023 ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) {
1024 if (NI->second->containsAddress(Address))
1025 return NI->second;
1027 // If this is a sub-symbol, see if a parent data contains the address.
1028 const BinaryData *BD = NI->second->getParent();
1029 while (BD) {
1030 if (BD->containsAddress(Address))
1031 return BD;
1032 BD = BD->getParent();
1035 return nullptr;
1038 BinaryData *BinaryContext::getGOTSymbol() {
1039 // First tries to find a global symbol with that name
1040 BinaryData *GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_");
1041 if (GOTSymBD)
1042 return GOTSymBD;
1044 // This symbol might be hidden from run-time link, so fetch the local
1045 // definition if available.
1046 GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_/1");
1047 if (!GOTSymBD)
1048 return nullptr;
1050 // If the local symbol is not unique, fail
1051 unsigned Index = 2;
1052 SmallString<30> Storage;
1053 while (const BinaryData *BD =
1054 getBinaryDataByName(Twine("_GLOBAL_OFFSET_TABLE_/")
1055 .concat(Twine(Index++))
1056 .toStringRef(Storage)))
1057 if (BD->getAddress() != GOTSymBD->getAddress())
1058 return nullptr;
1060 return GOTSymBD;
1063 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) {
1064 auto NI = BinaryDataMap.find(Address);
1065 assert(NI != BinaryDataMap.end());
1066 if (NI == BinaryDataMap.end())
1067 return false;
1068 // TODO: it's possible that a jump table starts at the same address
1069 // as a larger blob of private data. When we set the size of the
1070 // jump table, it might be smaller than the total blob size. In this
1071 // case we just leave the original size since (currently) it won't really
1072 // affect anything.
1073 assert((!NI->second->Size || NI->second->Size == Size ||
1074 (NI->second->isJumpTable() && NI->second->Size > Size)) &&
1075 "can't change the size of a symbol that has already had its "
1076 "size set");
1077 if (!NI->second->Size) {
1078 NI->second->Size = Size;
1079 updateObjectNesting(NI);
1080 return true;
1082 return false;
1085 void BinaryContext::generateSymbolHashes() {
1086 auto isPadding = [](const BinaryData &BD) {
1087 StringRef Contents = BD.getSection().getContents();
1088 StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize());
1089 return (BD.getName().starts_with("HOLEat") ||
1090 SymData.find_first_not_of(0) == StringRef::npos);
1093 uint64_t NumCollisions = 0;
1094 for (auto &Entry : BinaryDataMap) {
1095 BinaryData &BD = *Entry.second;
1096 StringRef Name = BD.getName();
1098 if (!isInternalSymbolName(Name))
1099 continue;
1101 // First check if a non-anonymous alias exists and move it to the front.
1102 if (BD.getSymbols().size() > 1) {
1103 auto Itr = llvm::find_if(BD.getSymbols(), [&](const MCSymbol *Symbol) {
1104 return !isInternalSymbolName(Symbol->getName());
1106 if (Itr != BD.getSymbols().end()) {
1107 size_t Idx = std::distance(BD.getSymbols().begin(), Itr);
1108 std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]);
1109 continue;
1113 // We have to skip 0 size symbols since they will all collide.
1114 if (BD.getSize() == 0) {
1115 continue;
1118 const uint64_t Hash = BD.getSection().hash(BD);
1119 const size_t Idx = Name.find("0x");
1120 std::string NewName =
1121 (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str();
1122 if (getBinaryDataByName(NewName)) {
1123 // Ignore collisions for symbols that appear to be padding
1124 // (i.e. all zeros or a "hole")
1125 if (!isPadding(BD)) {
1126 if (opts::Verbosity) {
1127 errs() << "BOLT-WARNING: collision detected when hashing " << BD
1128 << " with new name (" << NewName << "), skipping.\n";
1130 ++NumCollisions;
1132 continue;
1134 BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName));
1135 GlobalSymbols[NewName] = &BD;
1137 if (NumCollisions) {
1138 errs() << "BOLT-WARNING: " << NumCollisions
1139 << " collisions detected while hashing binary objects";
1140 if (!opts::Verbosity)
1141 errs() << ". Use -v=1 to see the list.";
1142 errs() << '\n';
1146 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction,
1147 BinaryFunction &Function) const {
1148 assert(TargetFunction.isFragment() && "TargetFunction must be a fragment");
1149 if (TargetFunction.isChildOf(Function))
1150 return true;
1151 TargetFunction.addParentFragment(Function);
1152 Function.addFragment(TargetFunction);
1153 if (!HasRelocations) {
1154 TargetFunction.setSimple(false);
1155 Function.setSimple(false);
1157 if (opts::Verbosity >= 1) {
1158 outs() << "BOLT-INFO: marking " << TargetFunction << " as a fragment of "
1159 << Function << '\n';
1161 return true;
1164 void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF,
1165 MCInst &LoadLowBits,
1166 MCInst &LoadHiBits,
1167 uint64_t Target) {
1168 const MCSymbol *TargetSymbol;
1169 uint64_t Addend = 0;
1170 std::tie(TargetSymbol, Addend) = handleAddressRef(Target, BF,
1171 /*IsPCRel*/ true);
1172 int64_t Val;
1173 MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), Val,
1174 ELF::R_AARCH64_ADR_PREL_PG_HI21);
1175 MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(),
1176 Val, ELF::R_AARCH64_ADD_ABS_LO12_NC);
1179 bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) {
1180 BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address);
1181 if (TargetFunction)
1182 return false;
1184 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1185 assert(Section && "cannot get section for referenced address");
1186 if (!Section->isText())
1187 return false;
1189 bool Ret = false;
1190 StringRef SectionContents = Section->getContents();
1191 uint64_t Offset = Address - Section->getAddress();
1192 const uint64_t MaxSize = SectionContents.size() - Offset;
1193 const uint8_t *Bytes =
1194 reinterpret_cast<const uint8_t *>(SectionContents.data());
1195 ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize);
1197 auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions,
1198 MCInst &Instruction, uint64_t Offset,
1199 uint64_t AbsoluteInstrAddr,
1200 uint64_t TotalSize) -> bool {
1201 MCInst *TargetHiBits, *TargetLowBits;
1202 uint64_t TargetAddress, Count;
1203 Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(),
1204 AbsoluteInstrAddr, Instruction, TargetHiBits,
1205 TargetLowBits, TargetAddress);
1206 if (!Count)
1207 return false;
1209 if (MatchOnly)
1210 return true;
1212 // NOTE The target symbol was created during disassemble's
1213 // handleExternalReference
1214 const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, "FUNCat");
1215 BinaryFunction *Veneer = createBinaryFunction(VeneerSymbol->getName().str(),
1216 *Section, Address, TotalSize);
1217 addAdrpAddRelocAArch64(*Veneer, *TargetLowBits, *TargetHiBits,
1218 TargetAddress);
1219 MIB->addAnnotation(Instruction, "AArch64Veneer", true);
1220 Veneer->addInstruction(Offset, std::move(Instruction));
1221 --Count;
1222 for (auto It = Instructions.rbegin(); Count != 0; ++It, --Count) {
1223 MIB->addAnnotation(It->second, "AArch64Veneer", true);
1224 Veneer->addInstruction(It->first, std::move(It->second));
1227 Veneer->getOrCreateLocalLabel(Address);
1228 Veneer->setMaxSize(TotalSize);
1229 Veneer->updateState(BinaryFunction::State::Disassembled);
1230 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" << Address
1231 << "\n");
1232 return true;
1235 uint64_t Size = 0, TotalSize = 0;
1236 BinaryFunction::InstrMapType VeneerInstructions;
1237 for (Offset = 0; Offset < MaxSize; Offset += Size) {
1238 MCInst Instruction;
1239 const uint64_t AbsoluteInstrAddr = Address + Offset;
1240 if (!SymbolicDisAsm->getInstruction(Instruction, Size, Data.slice(Offset),
1241 AbsoluteInstrAddr, nulls()))
1242 break;
1244 TotalSize += Size;
1245 if (MIB->isBranch(Instruction)) {
1246 Ret = matchVeneer(VeneerInstructions, Instruction, Offset,
1247 AbsoluteInstrAddr, TotalSize);
1248 break;
1251 VeneerInstructions.emplace(Offset, std::move(Instruction));
1254 return Ret;
1257 void BinaryContext::processInterproceduralReferences() {
1258 for (const std::pair<BinaryFunction *, uint64_t> &It :
1259 InterproceduralReferences) {
1260 BinaryFunction &Function = *It.first;
1261 uint64_t Address = It.second;
1262 if (!Address || Function.isIgnored())
1263 continue;
1265 BinaryFunction *TargetFunction =
1266 getBinaryFunctionContainingAddress(Address);
1267 if (&Function == TargetFunction)
1268 continue;
1270 if (TargetFunction) {
1271 if (TargetFunction->isFragment() &&
1272 !TargetFunction->isChildOf(Function)) {
1273 errs() << "BOLT-WARNING: interprocedural reference between unrelated "
1274 "fragments: "
1275 << Function.getPrintName() << " and "
1276 << TargetFunction->getPrintName() << '\n';
1278 if (uint64_t Offset = Address - TargetFunction->getAddress())
1279 TargetFunction->addEntryPointAtOffset(Offset);
1281 continue;
1284 // Check if address falls in function padding space - this could be
1285 // unmarked data in code. In this case adjust the padding space size.
1286 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1287 assert(Section && "cannot get section for referenced address");
1289 if (!Section->isText())
1290 continue;
1292 // PLT requires special handling and could be ignored in this context.
1293 StringRef SectionName = Section->getName();
1294 if (SectionName == ".plt" || SectionName == ".plt.got")
1295 continue;
1297 // Check if it is aarch64 veneer written at Address
1298 if (isAArch64() && handleAArch64Veneer(Address))
1299 continue;
1301 if (opts::processAllFunctions()) {
1302 errs() << "BOLT-ERROR: cannot process binaries with unmarked "
1303 << "object in code at address 0x" << Twine::utohexstr(Address)
1304 << " belonging to section " << SectionName << " in current mode\n";
1305 exit(1);
1308 TargetFunction = getBinaryFunctionContainingAddress(Address,
1309 /*CheckPastEnd=*/false,
1310 /*UseMaxSize=*/true);
1311 // We are not going to overwrite non-simple functions, but for simple
1312 // ones - adjust the padding size.
1313 if (TargetFunction && TargetFunction->isSimple()) {
1314 errs() << "BOLT-WARNING: function " << *TargetFunction
1315 << " has an object detected in a padding region at address 0x"
1316 << Twine::utohexstr(Address) << '\n';
1317 TargetFunction->setMaxSize(TargetFunction->getSize());
1321 InterproceduralReferences.clear();
1324 void BinaryContext::postProcessSymbolTable() {
1325 fixBinaryDataHoles();
1326 bool Valid = true;
1327 for (auto &Entry : BinaryDataMap) {
1328 BinaryData *BD = Entry.second;
1329 if ((BD->getName().starts_with("SYMBOLat") ||
1330 BD->getName().starts_with("DATAat")) &&
1331 !BD->getParent() && !BD->getSize() && !BD->isAbsolute() &&
1332 BD->getSection()) {
1333 errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n";
1334 Valid = false;
1337 assert(Valid);
1338 (void)Valid;
1339 generateSymbolHashes();
1342 void BinaryContext::foldFunction(BinaryFunction &ChildBF,
1343 BinaryFunction &ParentBF) {
1344 assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() &&
1345 "cannot merge functions with multiple entry points");
1347 std::unique_lock<llvm::sys::RWMutex> WriteCtxLock(CtxMutex, std::defer_lock);
1348 std::unique_lock<llvm::sys::RWMutex> WriteSymbolMapLock(
1349 SymbolToFunctionMapMutex, std::defer_lock);
1351 const StringRef ChildName = ChildBF.getOneName();
1353 // Move symbols over and update bookkeeping info.
1354 for (MCSymbol *Symbol : ChildBF.getSymbols()) {
1355 ParentBF.getSymbols().push_back(Symbol);
1356 WriteSymbolMapLock.lock();
1357 SymbolToFunctionMap[Symbol] = &ParentBF;
1358 WriteSymbolMapLock.unlock();
1359 // NB: there's no need to update BinaryDataMap and GlobalSymbols.
1361 ChildBF.getSymbols().clear();
1363 // Move other names the child function is known under.
1364 llvm::move(ChildBF.Aliases, std::back_inserter(ParentBF.Aliases));
1365 ChildBF.Aliases.clear();
1367 if (HasRelocations) {
1368 // Merge execution counts of ChildBF into those of ParentBF.
1369 // Without relocations, we cannot reliably merge profiles as both functions
1370 // continue to exist and either one can be executed.
1371 ChildBF.mergeProfileDataInto(ParentBF);
1373 std::shared_lock<llvm::sys::RWMutex> ReadBfsLock(BinaryFunctionsMutex,
1374 std::defer_lock);
1375 std::unique_lock<llvm::sys::RWMutex> WriteBfsLock(BinaryFunctionsMutex,
1376 std::defer_lock);
1377 // Remove ChildBF from the global set of functions in relocs mode.
1378 ReadBfsLock.lock();
1379 auto FI = BinaryFunctions.find(ChildBF.getAddress());
1380 ReadBfsLock.unlock();
1382 assert(FI != BinaryFunctions.end() && "function not found");
1383 assert(&ChildBF == &FI->second && "function mismatch");
1385 WriteBfsLock.lock();
1386 ChildBF.clearDisasmState();
1387 FI = BinaryFunctions.erase(FI);
1388 WriteBfsLock.unlock();
1390 } else {
1391 // In non-relocation mode we keep the function, but rename it.
1392 std::string NewName = "__ICF_" + ChildName.str();
1394 WriteCtxLock.lock();
1395 ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName));
1396 WriteCtxLock.unlock();
1398 ChildBF.setFolded(&ParentBF);
1401 ParentBF.setHasFunctionsFoldedInto();
1404 void BinaryContext::fixBinaryDataHoles() {
1405 assert(validateObjectNesting() && "object nesting inconsistency detected");
1407 for (BinarySection &Section : allocatableSections()) {
1408 std::vector<std::pair<uint64_t, uint64_t>> Holes;
1410 auto isNotHole = [&Section](const binary_data_iterator &Itr) {
1411 BinaryData *BD = Itr->second;
1412 bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() &&
1413 (BD->getName().starts_with("SYMBOLat0x") ||
1414 BD->getName().starts_with("DATAat0x") ||
1415 BD->getName().starts_with("ANONYMOUS")));
1416 return !isHole && BD->getSection() == Section && !BD->getParent();
1419 auto BDStart = BinaryDataMap.begin();
1420 auto BDEnd = BinaryDataMap.end();
1421 auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd);
1422 auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd);
1424 uint64_t EndAddress = Section.getAddress();
1426 while (Itr != End) {
1427 if (Itr->second->getAddress() > EndAddress) {
1428 uint64_t Gap = Itr->second->getAddress() - EndAddress;
1429 Holes.emplace_back(EndAddress, Gap);
1431 EndAddress = Itr->second->getEndAddress();
1432 ++Itr;
1435 if (EndAddress < Section.getEndAddress())
1436 Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress);
1438 // If there is already a symbol at the start of the hole, grow that symbol
1439 // to cover the rest. Otherwise, create a new symbol to cover the hole.
1440 for (std::pair<uint64_t, uint64_t> &Hole : Holes) {
1441 BinaryData *BD = getBinaryDataAtAddress(Hole.first);
1442 if (BD) {
1443 // BD->getSection() can be != Section if there are sections that
1444 // overlap. In this case it is probably safe to just skip the holes
1445 // since the overlapping section will not(?) have any symbols in it.
1446 if (BD->getSection() == Section)
1447 setBinaryDataSize(Hole.first, Hole.second);
1448 } else {
1449 getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1);
1454 assert(validateObjectNesting() && "object nesting inconsistency detected");
1455 assert(validateHoles() && "top level hole detected in object map");
1458 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const {
1459 const BinarySection *CurrentSection = nullptr;
1460 bool FirstSection = true;
1462 for (auto &Entry : BinaryDataMap) {
1463 const BinaryData *BD = Entry.second;
1464 const BinarySection &Section = BD->getSection();
1465 if (FirstSection || Section != *CurrentSection) {
1466 uint64_t Address, Size;
1467 StringRef Name = Section.getName();
1468 if (Section) {
1469 Address = Section.getAddress();
1470 Size = Section.getSize();
1471 } else {
1472 Address = BD->getAddress();
1473 Size = BD->getSize();
1475 OS << "BOLT-INFO: Section " << Name << ", "
1476 << "0x" + Twine::utohexstr(Address) << ":"
1477 << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n";
1478 CurrentSection = &Section;
1479 FirstSection = false;
1482 OS << "BOLT-INFO: ";
1483 const BinaryData *P = BD->getParent();
1484 while (P) {
1485 OS << " ";
1486 P = P->getParent();
1488 OS << *BD << "\n";
1492 Expected<unsigned> BinaryContext::getDwarfFile(
1493 StringRef Directory, StringRef FileName, unsigned FileNumber,
1494 std::optional<MD5::MD5Result> Checksum, std::optional<StringRef> Source,
1495 unsigned CUID, unsigned DWARFVersion) {
1496 DwarfLineTable &Table = DwarfLineTablesCUMap[CUID];
1497 return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion,
1498 FileNumber);
1501 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
1502 const uint32_t SrcCUID,
1503 unsigned FileIndex) {
1504 DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID);
1505 const DWARFDebugLine::LineTable *LineTable =
1506 DwCtx->getLineTableForUnit(SrcUnit);
1507 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1508 LineTable->Prologue.FileNames;
1509 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1510 // means empty dir.
1511 assert(FileIndex > 0 && FileIndex <= FileNames.size() &&
1512 "FileIndex out of range for the compilation unit.");
1513 StringRef Dir = "";
1514 if (FileNames[FileIndex - 1].DirIdx != 0) {
1515 if (std::optional<const char *> DirName = dwarf::toString(
1516 LineTable->Prologue
1517 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) {
1518 Dir = *DirName;
1521 StringRef FileName = "";
1522 if (std::optional<const char *> FName =
1523 dwarf::toString(FileNames[FileIndex - 1].Name))
1524 FileName = *FName;
1525 assert(FileName != "");
1526 DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID);
1527 return cantFail(getDwarfFile(Dir, FileName, 0, std::nullopt, std::nullopt,
1528 DestCUID, DstUnit->getVersion()));
1531 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() {
1532 std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size());
1533 llvm::transform(llvm::make_second_range(BinaryFunctions),
1534 SortedFunctions.begin(),
1535 [](BinaryFunction &BF) { return &BF; });
1537 llvm::stable_sort(SortedFunctions,
1538 [](const BinaryFunction *A, const BinaryFunction *B) {
1539 if (A->hasValidIndex() && B->hasValidIndex()) {
1540 return A->getIndex() < B->getIndex();
1542 return A->hasValidIndex();
1544 return SortedFunctions;
1547 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() {
1548 std::vector<BinaryFunction *> AllFunctions;
1549 AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size());
1550 llvm::transform(llvm::make_second_range(BinaryFunctions),
1551 std::back_inserter(AllFunctions),
1552 [](BinaryFunction &BF) { return &BF; });
1553 llvm::copy(InjectedBinaryFunctions, std::back_inserter(AllFunctions));
1555 return AllFunctions;
1558 std::optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) {
1559 auto Iter = DWOCUs.find(DWOId);
1560 if (Iter == DWOCUs.end())
1561 return std::nullopt;
1563 return Iter->second;
1566 DWARFContext *BinaryContext::getDWOContext() const {
1567 if (DWOCUs.empty())
1568 return nullptr;
1569 return &DWOCUs.begin()->second->getContext();
1572 /// Handles DWO sections that can either be in .o, .dwo or .dwp files.
1573 void BinaryContext::preprocessDWODebugInfo() {
1574 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1575 DWARFUnit *const DwarfUnit = CU.get();
1576 if (std::optional<uint64_t> DWOId = DwarfUnit->getDWOId()) {
1577 DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit();
1578 if (!DWOCU->isDWOUnit()) {
1579 std::string DWOName = dwarf::toString(
1580 DwarfUnit->getUnitDIE().find(
1581 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
1582 "");
1583 outs() << "BOLT-WARNING: Debug Fission: DWO debug information for "
1584 << DWOName
1585 << " was not retrieved and won't be updated. Please check "
1586 "relative path.\n";
1587 continue;
1589 DWOCUs[*DWOId] = DWOCU;
1592 if (!DWOCUs.empty())
1593 outs() << "BOLT-INFO: processing split DWARF\n";
1596 void BinaryContext::preprocessDebugInfo() {
1597 struct CURange {
1598 uint64_t LowPC;
1599 uint64_t HighPC;
1600 DWARFUnit *Unit;
1602 bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; }
1605 // Building a map of address ranges to CUs similar to .debug_aranges and use
1606 // it to assign CU to functions.
1607 std::vector<CURange> AllRanges;
1608 AllRanges.reserve(DwCtx->getNumCompileUnits());
1609 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1610 Expected<DWARFAddressRangesVector> RangesOrError =
1611 CU->getUnitDIE().getAddressRanges();
1612 if (!RangesOrError) {
1613 consumeError(RangesOrError.takeError());
1614 continue;
1616 for (DWARFAddressRange &Range : *RangesOrError) {
1617 // Parts of the debug info could be invalidated due to corresponding code
1618 // being removed from the binary by the linker. Hence we check if the
1619 // address is a valid one.
1620 if (containsAddress(Range.LowPC))
1621 AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()});
1624 ContainsDwarf5 |= CU->getVersion() >= 5;
1625 ContainsDwarfLegacy |= CU->getVersion() < 5;
1628 llvm::sort(AllRanges);
1629 for (auto &KV : BinaryFunctions) {
1630 const uint64_t FunctionAddress = KV.first;
1631 BinaryFunction &Function = KV.second;
1633 auto It = llvm::partition_point(
1634 AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; });
1635 if (It != AllRanges.end() && It->LowPC <= FunctionAddress)
1636 Function.setDWARFUnit(It->Unit);
1639 // Discover units with debug info that needs to be updated.
1640 for (const auto &KV : BinaryFunctions) {
1641 const BinaryFunction &BF = KV.second;
1642 if (shouldEmit(BF) && BF.getDWARFUnit())
1643 ProcessedCUs.insert(BF.getDWARFUnit());
1646 // Clear debug info for functions from units that we are not going to process.
1647 for (auto &KV : BinaryFunctions) {
1648 BinaryFunction &BF = KV.second;
1649 if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit()))
1650 BF.setDWARFUnit(nullptr);
1653 if (opts::Verbosity >= 1) {
1654 outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of "
1655 << DwCtx->getNumCompileUnits() << " CUs will be updated\n";
1658 preprocessDWODebugInfo();
1660 // Populate MCContext with DWARF files from all units.
1661 StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix();
1662 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1663 const uint64_t CUID = CU->getOffset();
1664 DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID);
1665 BinaryLineTable.setLabel(Ctx->getOrCreateSymbol(
1666 GlobalPrefix + "line_table_start" + Twine(CUID)));
1668 if (!ProcessedCUs.count(CU.get()))
1669 continue;
1671 const DWARFDebugLine::LineTable *LineTable =
1672 DwCtx->getLineTableForUnit(CU.get());
1673 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1674 LineTable->Prologue.FileNames;
1676 uint16_t DwarfVersion = LineTable->Prologue.getVersion();
1677 if (DwarfVersion >= 5) {
1678 std::optional<MD5::MD5Result> Checksum;
1679 if (LineTable->Prologue.ContentTypes.HasMD5)
1680 Checksum = LineTable->Prologue.FileNames[0].Checksum;
1681 std::optional<const char *> Name =
1682 dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1683 if (std::optional<uint64_t> DWOID = CU->getDWOId()) {
1684 auto Iter = DWOCUs.find(*DWOID);
1685 assert(Iter != DWOCUs.end() && "DWO CU was not found.");
1686 Name = dwarf::toString(
1687 Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1689 BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum,
1690 std::nullopt);
1693 BinaryLineTable.setDwarfVersion(DwarfVersion);
1695 // Assign a unique label to every line table, one per CU.
1696 // Make sure empty debug line tables are registered too.
1697 if (FileNames.empty()) {
1698 cantFail(getDwarfFile("", "<unknown>", 0, std::nullopt, std::nullopt,
1699 CUID, DwarfVersion));
1700 continue;
1702 const uint32_t Offset = DwarfVersion < 5 ? 1 : 0;
1703 for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) {
1704 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1705 // means empty dir.
1706 StringRef Dir = "";
1707 if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5)
1708 if (std::optional<const char *> DirName = dwarf::toString(
1709 LineTable->Prologue
1710 .IncludeDirectories[FileNames[I].DirIdx - Offset]))
1711 Dir = *DirName;
1712 StringRef FileName = "";
1713 if (std::optional<const char *> FName =
1714 dwarf::toString(FileNames[I].Name))
1715 FileName = *FName;
1716 assert(FileName != "");
1717 std::optional<MD5::MD5Result> Checksum;
1718 if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5)
1719 Checksum = LineTable->Prologue.FileNames[I].Checksum;
1720 cantFail(getDwarfFile(Dir, FileName, 0, Checksum, std::nullopt, CUID,
1721 DwarfVersion));
1726 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const {
1727 if (Function.isPseudo())
1728 return false;
1730 if (opts::processAllFunctions())
1731 return true;
1733 if (Function.isIgnored())
1734 return false;
1736 // In relocation mode we will emit non-simple functions with CFG.
1737 // If the function does not have a CFG it should be marked as ignored.
1738 return HasRelocations || Function.isSimple();
1741 void BinaryContext::dump(const MCInst &Inst) const {
1742 if (LLVM_UNLIKELY(!InstPrinter)) {
1743 dbgs() << "Cannot dump for InstPrinter is not initialized.\n";
1744 return;
1746 InstPrinter->printInst(&Inst, 0, "", *STI, dbgs());
1747 dbgs() << "\n";
1750 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
1751 uint32_t Operation = Inst.getOperation();
1752 switch (Operation) {
1753 case MCCFIInstruction::OpSameValue:
1754 OS << "OpSameValue Reg" << Inst.getRegister();
1755 break;
1756 case MCCFIInstruction::OpRememberState:
1757 OS << "OpRememberState";
1758 break;
1759 case MCCFIInstruction::OpRestoreState:
1760 OS << "OpRestoreState";
1761 break;
1762 case MCCFIInstruction::OpOffset:
1763 OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1764 break;
1765 case MCCFIInstruction::OpDefCfaRegister:
1766 OS << "OpDefCfaRegister Reg" << Inst.getRegister();
1767 break;
1768 case MCCFIInstruction::OpDefCfaOffset:
1769 OS << "OpDefCfaOffset " << Inst.getOffset();
1770 break;
1771 case MCCFIInstruction::OpDefCfa:
1772 OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset();
1773 break;
1774 case MCCFIInstruction::OpRelOffset:
1775 OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1776 break;
1777 case MCCFIInstruction::OpAdjustCfaOffset:
1778 OS << "OfAdjustCfaOffset " << Inst.getOffset();
1779 break;
1780 case MCCFIInstruction::OpEscape:
1781 OS << "OpEscape";
1782 break;
1783 case MCCFIInstruction::OpRestore:
1784 OS << "OpRestore Reg" << Inst.getRegister();
1785 break;
1786 case MCCFIInstruction::OpUndefined:
1787 OS << "OpUndefined Reg" << Inst.getRegister();
1788 break;
1789 case MCCFIInstruction::OpRegister:
1790 OS << "OpRegister Reg" << Inst.getRegister() << " Reg"
1791 << Inst.getRegister2();
1792 break;
1793 case MCCFIInstruction::OpWindowSave:
1794 OS << "OpWindowSave";
1795 break;
1796 case MCCFIInstruction::OpGnuArgsSize:
1797 OS << "OpGnuArgsSize";
1798 break;
1799 default:
1800 OS << "Op#" << Operation;
1801 break;
1805 MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const {
1806 // For aarch64 and riscv, the ABI defines mapping symbols so we identify data
1807 // in the code section (see IHI0056B). $x identifies a symbol starting code or
1808 // the end of a data chunk inside code, $d identifies start of data.
1809 if ((!isAArch64() && !isRISCV()) || ELFSymbolRef(Symbol).getSize())
1810 return MarkerSymType::NONE;
1812 Expected<StringRef> NameOrError = Symbol.getName();
1813 Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType();
1815 if (!TypeOrError || !NameOrError)
1816 return MarkerSymType::NONE;
1818 if (*TypeOrError != SymbolRef::ST_Unknown)
1819 return MarkerSymType::NONE;
1821 if (*NameOrError == "$x" || NameOrError->starts_with("$x."))
1822 return MarkerSymType::CODE;
1824 // $x<ISA>
1825 if (isRISCV() && NameOrError->starts_with("$x"))
1826 return MarkerSymType::CODE;
1828 if (*NameOrError == "$d" || NameOrError->starts_with("$d."))
1829 return MarkerSymType::DATA;
1831 return MarkerSymType::NONE;
1834 bool BinaryContext::isMarker(const SymbolRef &Symbol) const {
1835 return getMarkerType(Symbol) != MarkerSymType::NONE;
1838 static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction,
1839 const BinaryFunction *Function,
1840 DWARFContext *DwCtx) {
1841 DebugLineTableRowRef RowRef =
1842 DebugLineTableRowRef::fromSMLoc(Instruction.getLoc());
1843 if (RowRef == DebugLineTableRowRef::NULL_ROW)
1844 return;
1846 const DWARFDebugLine::LineTable *LineTable;
1847 if (Function && Function->getDWARFUnit() &&
1848 Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) {
1849 LineTable = Function->getDWARFLineTable();
1850 } else {
1851 LineTable = DwCtx->getLineTableForUnit(
1852 DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex));
1854 assert(LineTable && "line table expected for instruction with debug info");
1856 const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1];
1857 StringRef FileName = "";
1858 if (std::optional<const char *> FName =
1859 dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name))
1860 FileName = *FName;
1861 OS << " # debug line " << FileName << ":" << Row.Line;
1862 if (Row.Column)
1863 OS << ":" << Row.Column;
1864 if (Row.Discriminator)
1865 OS << " discriminator:" << Row.Discriminator;
1868 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
1869 uint64_t Offset,
1870 const BinaryFunction *Function,
1871 bool PrintMCInst, bool PrintMemData,
1872 bool PrintRelocations,
1873 StringRef Endl) const {
1874 OS << format(" %08" PRIx64 ": ", Offset);
1875 if (MIB->isCFI(Instruction)) {
1876 uint32_t Offset = Instruction.getOperand(0).getImm();
1877 OS << "\t!CFI\t$" << Offset << "\t; ";
1878 if (Function)
1879 printCFI(OS, *Function->getCFIFor(Instruction));
1880 OS << Endl;
1881 return;
1883 InstPrinter->printInst(&Instruction, 0, "", *STI, OS);
1884 if (MIB->isCall(Instruction)) {
1885 if (MIB->isTailCall(Instruction))
1886 OS << " # TAILCALL ";
1887 if (MIB->isInvoke(Instruction)) {
1888 const std::optional<MCPlus::MCLandingPad> EHInfo =
1889 MIB->getEHInfo(Instruction);
1890 OS << " # handler: ";
1891 if (EHInfo->first)
1892 OS << *EHInfo->first;
1893 else
1894 OS << '0';
1895 OS << "; action: " << EHInfo->second;
1896 const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction);
1897 if (GnuArgsSize >= 0)
1898 OS << "; GNU_args_size = " << GnuArgsSize;
1900 } else if (MIB->isIndirectBranch(Instruction)) {
1901 if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) {
1902 OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress);
1903 } else {
1904 OS << " # UNKNOWN CONTROL FLOW";
1907 if (std::optional<uint32_t> Offset = MIB->getOffset(Instruction))
1908 OS << " # Offset: " << *Offset;
1909 if (std::optional<uint32_t> Size = MIB->getSize(Instruction))
1910 OS << " # Size: " << *Size;
1911 if (MCSymbol *Label = MIB->getLabel(Instruction))
1912 OS << " # Label: " << *Label;
1914 MIB->printAnnotations(Instruction, OS);
1916 if (opts::PrintDebugInfo)
1917 printDebugInfo(OS, Instruction, Function, DwCtx.get());
1919 if ((opts::PrintRelocations || PrintRelocations) && Function) {
1920 const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1);
1921 Function->printRelocations(OS, Offset, Size);
1924 OS << Endl;
1926 if (PrintMCInst) {
1927 Instruction.dump_pretty(OS, InstPrinter.get());
1928 OS << Endl;
1932 std::optional<uint64_t>
1933 BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress,
1934 uint64_t FileOffset) const {
1935 // Find a segment with a matching file offset.
1936 for (auto &KV : SegmentMapInfo) {
1937 const SegmentInfo &SegInfo = KV.second;
1938 // FileOffset is got from perf event,
1939 // and it is equal to alignDown(SegInfo.FileOffset, pagesize).
1940 // If the pagesize is not equal to SegInfo.Alignment.
1941 // FileOffset and SegInfo.FileOffset should be aligned first,
1942 // and then judge whether they are equal.
1943 if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) ==
1944 alignDown(FileOffset, SegInfo.Alignment)) {
1945 // The function's offset from base address in VAS is aligned by pagesize
1946 // instead of SegInfo.Alignment. Pagesize can't be got from perf events.
1947 // However, The ELF document says that SegInfo.FileOffset should equal
1948 // to SegInfo.Address, modulo the pagesize.
1949 // Reference: https://refspecs.linuxfoundation.org/elf/elf.pdf
1951 // So alignDown(SegInfo.Address, pagesize) can be calculated by:
1952 // alignDown(SegInfo.Address, pagesize)
1953 // = SegInfo.Address - (SegInfo.Address % pagesize)
1954 // = SegInfo.Address - (SegInfo.FileOffset % pagesize)
1955 // = SegInfo.Address - SegInfo.FileOffset +
1956 // alignDown(SegInfo.FileOffset, pagesize)
1957 // = SegInfo.Address - SegInfo.FileOffset + FileOffset
1958 return MMapAddress - (SegInfo.Address - SegInfo.FileOffset + FileOffset);
1962 return std::nullopt;
1965 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) {
1966 auto SI = AddressToSection.upper_bound(Address);
1967 if (SI != AddressToSection.begin()) {
1968 --SI;
1969 uint64_t UpperBound = SI->first + SI->second->getSize();
1970 if (!SI->second->getSize())
1971 UpperBound += 1;
1972 if (UpperBound > Address)
1973 return *SI->second;
1975 return std::make_error_code(std::errc::bad_address);
1978 ErrorOr<StringRef>
1979 BinaryContext::getSectionNameForAddress(uint64_t Address) const {
1980 if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address))
1981 return Section->getName();
1982 return std::make_error_code(std::errc::bad_address);
1985 BinarySection &BinaryContext::registerSection(BinarySection *Section) {
1986 auto Res = Sections.insert(Section);
1987 (void)Res;
1988 assert(Res.second && "can't register the same section twice.");
1990 // Only register allocatable sections in the AddressToSection map.
1991 if (Section->isAllocatable() && Section->getAddress())
1992 AddressToSection.insert(std::make_pair(Section->getAddress(), Section));
1993 NameToSection.insert(
1994 std::make_pair(std::string(Section->getName()), Section));
1995 if (Section->hasSectionRef())
1996 SectionRefToBinarySection.insert(
1997 std::make_pair(Section->getSectionRef(), Section));
1999 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n");
2000 return *Section;
2003 BinarySection &BinaryContext::registerSection(SectionRef Section) {
2004 return registerSection(new BinarySection(*this, Section));
2007 BinarySection &
2008 BinaryContext::registerSection(const Twine &SectionName,
2009 const BinarySection &OriginalSection) {
2010 return registerSection(
2011 new BinarySection(*this, SectionName, OriginalSection));
2014 BinarySection &
2015 BinaryContext::registerOrUpdateSection(const Twine &Name, unsigned ELFType,
2016 unsigned ELFFlags, uint8_t *Data,
2017 uint64_t Size, unsigned Alignment) {
2018 auto NamedSections = getSectionByName(Name);
2019 if (NamedSections.begin() != NamedSections.end()) {
2020 assert(std::next(NamedSections.begin()) == NamedSections.end() &&
2021 "can only update unique sections");
2022 BinarySection *Section = NamedSections.begin()->second;
2024 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> ");
2025 const bool Flag = Section->isAllocatable();
2026 (void)Flag;
2027 Section->update(Data, Size, Alignment, ELFType, ELFFlags);
2028 LLVM_DEBUG(dbgs() << *Section << "\n");
2029 // FIXME: Fix section flags/attributes for MachO.
2030 if (isELF())
2031 assert(Flag == Section->isAllocatable() &&
2032 "can't change section allocation status");
2033 return *Section;
2036 return registerSection(
2037 new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags));
2040 void BinaryContext::deregisterSectionName(const BinarySection &Section) {
2041 auto NameRange = NameToSection.equal_range(Section.getName().str());
2042 while (NameRange.first != NameRange.second) {
2043 if (NameRange.first->second == &Section) {
2044 NameToSection.erase(NameRange.first);
2045 break;
2047 ++NameRange.first;
2051 void BinaryContext::deregisterUnusedSections() {
2052 ErrorOr<BinarySection &> AbsSection = getUniqueSectionByName("<absolute>");
2053 for (auto SI = Sections.begin(); SI != Sections.end();) {
2054 BinarySection *Section = *SI;
2055 // We check getOutputData() instead of getOutputSize() because sometimes
2056 // zero-sized .text.cold sections are allocated.
2057 if (Section->hasSectionRef() || Section->getOutputData() ||
2058 (AbsSection && Section == &AbsSection.get())) {
2059 ++SI;
2060 continue;
2063 LLVM_DEBUG(dbgs() << "LLVM-DEBUG: deregistering " << Section->getName()
2064 << '\n';);
2065 deregisterSectionName(*Section);
2066 SI = Sections.erase(SI);
2067 delete Section;
2071 bool BinaryContext::deregisterSection(BinarySection &Section) {
2072 BinarySection *SectionPtr = &Section;
2073 auto Itr = Sections.find(SectionPtr);
2074 if (Itr != Sections.end()) {
2075 auto Range = AddressToSection.equal_range(SectionPtr->getAddress());
2076 while (Range.first != Range.second) {
2077 if (Range.first->second == SectionPtr) {
2078 AddressToSection.erase(Range.first);
2079 break;
2081 ++Range.first;
2084 deregisterSectionName(*SectionPtr);
2085 Sections.erase(Itr);
2086 delete SectionPtr;
2087 return true;
2089 return false;
2092 void BinaryContext::renameSection(BinarySection &Section,
2093 const Twine &NewName) {
2094 auto Itr = Sections.find(&Section);
2095 assert(Itr != Sections.end() && "Section must exist to be renamed.");
2096 Sections.erase(Itr);
2098 deregisterSectionName(Section);
2100 Section.Name = NewName.str();
2101 Section.setOutputName(Section.Name);
2103 NameToSection.insert(std::make_pair(Section.Name, &Section));
2105 // Reinsert with the new name.
2106 Sections.insert(&Section);
2109 void BinaryContext::printSections(raw_ostream &OS) const {
2110 for (BinarySection *const &Section : Sections)
2111 OS << "BOLT-INFO: " << *Section << "\n";
2114 BinarySection &BinaryContext::absoluteSection() {
2115 if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>"))
2116 return *Section;
2117 return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u);
2120 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address,
2121 size_t Size) const {
2122 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2123 if (!Section)
2124 return std::make_error_code(std::errc::bad_address);
2126 if (Section->isVirtual())
2127 return 0;
2129 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
2130 AsmInfo->getCodePointerSize());
2131 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
2132 return DE.getUnsigned(&ValueOffset, Size);
2135 ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address,
2136 size_t Size) const {
2137 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2138 if (!Section)
2139 return std::make_error_code(std::errc::bad_address);
2141 if (Section->isVirtual())
2142 return 0;
2144 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
2145 AsmInfo->getCodePointerSize());
2146 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
2147 return DE.getSigned(&ValueOffset, Size);
2150 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol,
2151 uint64_t Type, uint64_t Addend,
2152 uint64_t Value) {
2153 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2154 assert(Section && "cannot find section for address");
2155 Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend,
2156 Value);
2159 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol,
2160 uint64_t Type, uint64_t Addend,
2161 uint64_t Value) {
2162 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2163 assert(Section && "cannot find section for address");
2164 Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type,
2165 Addend, Value);
2168 bool BinaryContext::removeRelocationAt(uint64_t Address) {
2169 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2170 assert(Section && "cannot find section for address");
2171 return Section->removeRelocationAt(Address - Section->getAddress());
2174 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) const {
2175 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2176 if (!Section)
2177 return nullptr;
2179 return Section->getRelocationAt(Address - Section->getAddress());
2182 const Relocation *
2183 BinaryContext::getDynamicRelocationAt(uint64_t Address) const {
2184 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2185 if (!Section)
2186 return nullptr;
2188 return Section->getDynamicRelocationAt(Address - Section->getAddress());
2191 void BinaryContext::markAmbiguousRelocations(BinaryData &BD,
2192 const uint64_t Address) {
2193 auto setImmovable = [&](BinaryData &BD) {
2194 BinaryData *Root = BD.getAtomicRoot();
2195 LLVM_DEBUG(if (Root->isMoveable()) {
2196 dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable "
2197 << "due to ambiguous relocation referencing 0x"
2198 << Twine::utohexstr(Address) << '\n';
2200 Root->setIsMoveable(false);
2203 if (Address == BD.getAddress()) {
2204 setImmovable(BD);
2206 // Set previous symbol as immovable
2207 BinaryData *Prev = getBinaryDataContainingAddress(Address - 1);
2208 if (Prev && Prev->getEndAddress() == BD.getAddress())
2209 setImmovable(*Prev);
2212 if (Address == BD.getEndAddress()) {
2213 setImmovable(BD);
2215 // Set next symbol as immovable
2216 BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress());
2217 if (Next && Next->getAddress() == BD.getEndAddress())
2218 setImmovable(*Next);
2222 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol,
2223 uint64_t *EntryDesc) {
2224 std::shared_lock<llvm::sys::RWMutex> Lock(SymbolToFunctionMapMutex);
2225 auto BFI = SymbolToFunctionMap.find(Symbol);
2226 if (BFI == SymbolToFunctionMap.end())
2227 return nullptr;
2229 BinaryFunction *BF = BFI->second;
2230 if (EntryDesc)
2231 *EntryDesc = BF->getEntryIDForSymbol(Symbol);
2233 return BF;
2236 void BinaryContext::exitWithBugReport(StringRef Message,
2237 const BinaryFunction &Function) const {
2238 errs() << "=======================================\n";
2239 errs() << "BOLT is unable to proceed because it couldn't properly understand "
2240 "this function.\n";
2241 errs() << "If you are running the most recent version of BOLT, you may "
2242 "want to "
2243 "report this and paste this dump.\nPlease check that there is no "
2244 "sensitive contents being shared in this dump.\n";
2245 errs() << "\nOffending function: " << Function.getPrintName() << "\n\n";
2246 ScopedPrinter SP(errs());
2247 SP.printBinaryBlock("Function contents", *Function.getData());
2248 errs() << "\n";
2249 Function.dump();
2250 errs() << "ERROR: " << Message;
2251 errs() << "\n=======================================\n";
2252 exit(1);
2255 BinaryFunction *
2256 BinaryContext::createInjectedBinaryFunction(const std::string &Name,
2257 bool IsSimple) {
2258 InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple));
2259 BinaryFunction *BF = InjectedBinaryFunctions.back();
2260 setSymbolToFunctionMap(BF->getSymbol(), BF);
2261 BF->CurrentState = BinaryFunction::State::CFG;
2262 return BF;
2265 std::pair<size_t, size_t>
2266 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
2267 // Adjust branch instruction to match the current layout.
2268 if (FixBranches)
2269 BF.fixBranches();
2271 // Create local MC context to isolate the effect of ephemeral code emission.
2272 IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter();
2273 MCContext *LocalCtx = MCEInstance.LocalCtx.get();
2274 MCAsmBackend *MAB =
2275 TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions());
2277 SmallString<256> Code;
2278 raw_svector_ostream VecOS(Code);
2280 std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS);
2281 std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
2282 *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW),
2283 std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI,
2284 /*RelaxAll=*/false,
2285 /*IncrementalLinkerCompatible=*/false,
2286 /*DWARFMustBeAtTheEnd=*/false));
2288 Streamer->initSections(false, *STI);
2290 MCSection *Section = MCEInstance.LocalMOFI->getTextSection();
2291 Section->setHasInstructions(true);
2293 // Create symbols in the LocalCtx so that they get destroyed with it.
2294 MCSymbol *StartLabel = LocalCtx->createTempSymbol();
2295 MCSymbol *EndLabel = LocalCtx->createTempSymbol();
2297 Streamer->switchSection(Section);
2298 Streamer->emitLabel(StartLabel);
2299 emitFunctionBody(*Streamer, BF, BF.getLayout().getMainFragment(),
2300 /*EmitCodeOnly=*/true);
2301 Streamer->emitLabel(EndLabel);
2303 using LabelRange = std::pair<const MCSymbol *, const MCSymbol *>;
2304 SmallVector<LabelRange> SplitLabels;
2305 for (FunctionFragment &FF : BF.getLayout().getSplitFragments()) {
2306 MCSymbol *const SplitStartLabel = LocalCtx->createTempSymbol();
2307 MCSymbol *const SplitEndLabel = LocalCtx->createTempSymbol();
2308 SplitLabels.emplace_back(SplitStartLabel, SplitEndLabel);
2310 MCSectionELF *const SplitSection = LocalCtx->getELFSection(
2311 BF.getCodeSectionName(FF.getFragmentNum()), ELF::SHT_PROGBITS,
2312 ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
2313 SplitSection->setHasInstructions(true);
2314 Streamer->switchSection(SplitSection);
2316 Streamer->emitLabel(SplitStartLabel);
2317 emitFunctionBody(*Streamer, BF, FF, /*EmitCodeOnly=*/true);
2318 Streamer->emitLabel(SplitEndLabel);
2319 // To avoid calling MCObjectStreamer::flushPendingLabels() which is
2320 // private
2321 Streamer->emitBytes(StringRef(""));
2322 Streamer->switchSection(Section);
2325 // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or
2326 // MCStreamer::Finish(), which does more than we want
2327 Streamer->emitBytes(StringRef(""));
2329 MCAssembler &Assembler =
2330 static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler();
2331 MCAsmLayout Layout(Assembler);
2332 Assembler.layout(Layout);
2334 // Obtain fragment sizes.
2335 std::vector<uint64_t> FragmentSizes;
2336 // Main fragment size.
2337 const uint64_t HotSize =
2338 Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel);
2339 FragmentSizes.push_back(HotSize);
2340 // Split fragment sizes.
2341 uint64_t ColdSize = 0;
2342 for (const auto &Labels : SplitLabels) {
2343 uint64_t Size = Layout.getSymbolOffset(*Labels.second) -
2344 Layout.getSymbolOffset(*Labels.first);
2345 FragmentSizes.push_back(Size);
2346 ColdSize += Size;
2349 // Populate new start and end offsets of each basic block.
2350 uint64_t FragmentIndex = 0;
2351 for (FunctionFragment &FF : BF.getLayout().fragments()) {
2352 BinaryBasicBlock *PrevBB = nullptr;
2353 for (BinaryBasicBlock *BB : FF) {
2354 const uint64_t BBStartOffset = Layout.getSymbolOffset(*(BB->getLabel()));
2355 BB->setOutputStartAddress(BBStartOffset);
2356 if (PrevBB)
2357 PrevBB->setOutputEndAddress(BBStartOffset);
2358 PrevBB = BB;
2360 if (PrevBB)
2361 PrevBB->setOutputEndAddress(FragmentSizes[FragmentIndex]);
2362 FragmentIndex++;
2365 // Clean-up the effect of the code emission.
2366 for (const MCSymbol &Symbol : Assembler.symbols()) {
2367 MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol);
2368 MutableSymbol->setUndefined();
2369 MutableSymbol->setIsRegistered(false);
2372 return std::make_pair(HotSize, ColdSize);
2375 bool BinaryContext::validateInstructionEncoding(
2376 ArrayRef<uint8_t> InputSequence) const {
2377 MCInst Inst;
2378 uint64_t InstSize;
2379 DisAsm->getInstruction(Inst, InstSize, InputSequence, 0, nulls());
2380 assert(InstSize == InputSequence.size() &&
2381 "Disassembled instruction size does not match the sequence.");
2383 SmallString<256> Code;
2384 SmallVector<MCFixup, 4> Fixups;
2386 MCE->encodeInstruction(Inst, Code, Fixups, *STI);
2387 auto OutputSequence = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size());
2388 if (InputSequence != OutputSequence) {
2389 if (opts::Verbosity > 1) {
2390 errs() << "BOLT-WARNING: mismatched encoding detected\n"
2391 << " input: " << InputSequence << '\n'
2392 << " output: " << OutputSequence << '\n';
2394 return false;
2397 return true;
2400 uint64_t BinaryContext::getHotThreshold() const {
2401 static uint64_t Threshold = 0;
2402 if (Threshold == 0) {
2403 Threshold = std::max(
2404 (uint64_t)opts::ExecutionCountThreshold,
2405 NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1);
2407 return Threshold;
2410 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress(
2411 uint64_t Address, bool CheckPastEnd, bool UseMaxSize) {
2412 auto FI = BinaryFunctions.upper_bound(Address);
2413 if (FI == BinaryFunctions.begin())
2414 return nullptr;
2415 --FI;
2417 const uint64_t UsedSize =
2418 UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize();
2420 if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0))
2421 return nullptr;
2423 return &FI->second;
2426 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) {
2427 // First, try to find a function starting at the given address. If the
2428 // function was folded, this will get us the original folded function if it
2429 // wasn't removed from the list, e.g. in non-relocation mode.
2430 auto BFI = BinaryFunctions.find(Address);
2431 if (BFI != BinaryFunctions.end())
2432 return &BFI->second;
2434 // We might have folded the function matching the object at the given
2435 // address. In such case, we look for a function matching the symbol
2436 // registered at the original address. The new function (the one that the
2437 // original was folded into) will hold the symbol.
2438 if (const BinaryData *BD = getBinaryDataAtAddress(Address)) {
2439 uint64_t EntryID = 0;
2440 BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID);
2441 if (BF && EntryID == 0)
2442 return BF;
2444 return nullptr;
2447 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges(
2448 const DWARFAddressRangesVector &InputRanges) const {
2449 DebugAddressRangesVector OutputRanges;
2451 for (const DWARFAddressRange Range : InputRanges) {
2452 auto BFI = BinaryFunctions.lower_bound(Range.LowPC);
2453 while (BFI != BinaryFunctions.end()) {
2454 const BinaryFunction &Function = BFI->second;
2455 if (Function.getAddress() >= Range.HighPC)
2456 break;
2457 const DebugAddressRangesVector FunctionRanges =
2458 Function.getOutputAddressRanges();
2459 llvm::move(FunctionRanges, std::back_inserter(OutputRanges));
2460 std::advance(BFI, 1);
2464 return OutputRanges;
2467 } // namespace bolt
2468 } // namespace llvm