Bump version to 19.1.0 (final)
[llvm-project.git] / bolt / lib / Core / BinaryContext.cpp
blob6a1106f23e48578db939511c74176caf1ff3a699
1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the BinaryContext class.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Core/BinaryContext.h"
14 #include "bolt/Core/BinaryEmitter.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "bolt/Utils/Utils.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
21 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
22 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
23 #include "llvm/MC/MCAssembler.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
26 #include "llvm/MC/MCInstPrinter.h"
27 #include "llvm/MC/MCObjectStreamer.h"
28 #include "llvm/MC/MCObjectWriter.h"
29 #include "llvm/MC/MCRegisterInfo.h"
30 #include "llvm/MC/MCSectionELF.h"
31 #include "llvm/MC/MCStreamer.h"
32 #include "llvm/MC/MCSubtargetInfo.h"
33 #include "llvm/MC/MCSymbol.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Error.h"
36 #include "llvm/Support/Regex.h"
37 #include <algorithm>
38 #include <functional>
39 #include <iterator>
40 #include <unordered_set>
42 using namespace llvm;
44 #undef DEBUG_TYPE
45 #define DEBUG_TYPE "bolt"
47 namespace opts {
49 cl::opt<bool> NoHugePages("no-huge-pages",
50 cl::desc("use regular size pages for code alignment"),
51 cl::Hidden, cl::cat(BoltCategory));
53 static cl::opt<bool>
54 PrintDebugInfo("print-debug-info",
55 cl::desc("print debug info when printing functions"),
56 cl::Hidden,
57 cl::ZeroOrMore,
58 cl::cat(BoltCategory));
60 cl::opt<bool> PrintRelocations(
61 "print-relocations",
62 cl::desc("print relocations when printing functions/objects"), cl::Hidden,
63 cl::cat(BoltCategory));
65 static cl::opt<bool>
66 PrintMemData("print-mem-data",
67 cl::desc("print memory data annotations when printing functions"),
68 cl::Hidden,
69 cl::ZeroOrMore,
70 cl::cat(BoltCategory));
72 cl::opt<std::string> CompDirOverride(
73 "comp-dir-override",
74 cl::desc("overrides DW_AT_comp_dir, and provides an alternative base "
75 "location, which is used with DW_AT_dwo_name to construct a path "
76 "to *.dwo files."),
77 cl::Hidden, cl::init(""), cl::cat(BoltCategory));
78 } // namespace opts
80 namespace llvm {
81 namespace bolt {
83 char BOLTError::ID = 0;
85 BOLTError::BOLTError(bool IsFatal, const Twine &S)
86 : IsFatal(IsFatal), Msg(S.str()) {}
88 void BOLTError::log(raw_ostream &OS) const {
89 if (IsFatal)
90 OS << "FATAL ";
91 StringRef ErrMsg = StringRef(Msg);
92 // Prepend our error prefix if it is missing
93 if (ErrMsg.empty()) {
94 OS << "BOLT-ERROR\n";
95 } else {
96 if (!ErrMsg.starts_with("BOLT-ERROR"))
97 OS << "BOLT-ERROR: ";
98 OS << ErrMsg << "\n";
102 std::error_code BOLTError::convertToErrorCode() const {
103 return inconvertibleErrorCode();
106 Error createNonFatalBOLTError(const Twine &S) {
107 return make_error<BOLTError>(/*IsFatal*/ false, S);
110 Error createFatalBOLTError(const Twine &S) {
111 return make_error<BOLTError>(/*IsFatal*/ true, S);
114 void BinaryContext::logBOLTErrorsAndQuitOnFatal(Error E) {
115 handleAllErrors(Error(std::move(E)), [&](const BOLTError &E) {
116 if (!E.getMessage().empty())
117 E.log(this->errs());
118 if (E.isFatal())
119 exit(1);
123 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx,
124 std::unique_ptr<DWARFContext> DwCtx,
125 std::unique_ptr<Triple> TheTriple,
126 const Target *TheTarget, std::string TripleName,
127 std::unique_ptr<MCCodeEmitter> MCE,
128 std::unique_ptr<MCObjectFileInfo> MOFI,
129 std::unique_ptr<const MCAsmInfo> AsmInfo,
130 std::unique_ptr<const MCInstrInfo> MII,
131 std::unique_ptr<const MCSubtargetInfo> STI,
132 std::unique_ptr<MCInstPrinter> InstPrinter,
133 std::unique_ptr<const MCInstrAnalysis> MIA,
134 std::unique_ptr<MCPlusBuilder> MIB,
135 std::unique_ptr<const MCRegisterInfo> MRI,
136 std::unique_ptr<MCDisassembler> DisAsm,
137 JournalingStreams Logger)
138 : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)),
139 TheTriple(std::move(TheTriple)), TheTarget(TheTarget),
140 TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)),
141 AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)),
142 InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)),
143 MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)),
144 Logger(Logger), InitialDynoStats(isAArch64()) {
145 Relocation::Arch = this->TheTriple->getArch();
146 RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86;
147 PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize;
150 BinaryContext::~BinaryContext() {
151 for (BinarySection *Section : Sections)
152 delete Section;
153 for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions)
154 delete InjectedFunction;
155 for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables)
156 delete JTI.second;
157 clearBinaryData();
160 /// Create BinaryContext for a given architecture \p ArchName and
161 /// triple \p TripleName.
162 Expected<std::unique_ptr<BinaryContext>> BinaryContext::createBinaryContext(
163 Triple TheTriple, StringRef InputFileName, SubtargetFeatures *Features,
164 bool IsPIC, std::unique_ptr<DWARFContext> DwCtx, JournalingStreams Logger) {
165 StringRef ArchName = "";
166 std::string FeaturesStr = "";
167 switch (TheTriple.getArch()) {
168 case llvm::Triple::x86_64:
169 if (Features)
170 return createFatalBOLTError(
171 "x86_64 target does not use SubtargetFeatures");
172 ArchName = "x86-64";
173 FeaturesStr = "+nopl";
174 break;
175 case llvm::Triple::aarch64:
176 if (Features)
177 return createFatalBOLTError(
178 "AArch64 target does not use SubtargetFeatures");
179 ArchName = "aarch64";
180 FeaturesStr = "+all";
181 break;
182 case llvm::Triple::riscv64: {
183 ArchName = "riscv64";
184 if (!Features)
185 return createFatalBOLTError("RISCV target needs SubtargetFeatures");
186 // We rely on relaxation for some transformations (e.g., promoting all calls
187 // to PseudoCALL and then making JITLink relax them). Since the relax
188 // feature is not stored in the object file, we manually enable it.
189 Features->AddFeature("relax");
190 FeaturesStr = Features->getString();
191 break;
193 default:
194 return createStringError(std::errc::not_supported,
195 "BOLT-ERROR: Unrecognized machine in ELF file");
198 const std::string TripleName = TheTriple.str();
200 std::string Error;
201 const Target *TheTarget =
202 TargetRegistry::lookupTarget(std::string(ArchName), TheTriple, Error);
203 if (!TheTarget)
204 return createStringError(make_error_code(std::errc::not_supported),
205 Twine("BOLT-ERROR: ", Error));
207 std::unique_ptr<const MCRegisterInfo> MRI(
208 TheTarget->createMCRegInfo(TripleName));
209 if (!MRI)
210 return createStringError(
211 make_error_code(std::errc::not_supported),
212 Twine("BOLT-ERROR: no register info for target ", TripleName));
214 // Set up disassembler.
215 std::unique_ptr<MCAsmInfo> AsmInfo(
216 TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions()));
217 if (!AsmInfo)
218 return createStringError(
219 make_error_code(std::errc::not_supported),
220 Twine("BOLT-ERROR: no assembly info for target ", TripleName));
221 // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump
222 // we want to emit such names as using @PLT without double quotes to convey
223 // variant kind to the assembler. BOLT doesn't rely on the linker so we can
224 // override the default AsmInfo behavior to emit names the way we want.
225 AsmInfo->setAllowAtInName(true);
227 std::unique_ptr<const MCSubtargetInfo> STI(
228 TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr));
229 if (!STI)
230 return createStringError(
231 make_error_code(std::errc::not_supported),
232 Twine("BOLT-ERROR: no subtarget info for target ", TripleName));
234 std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
235 if (!MII)
236 return createStringError(
237 make_error_code(std::errc::not_supported),
238 Twine("BOLT-ERROR: no instruction info for target ", TripleName));
240 std::unique_ptr<MCContext> Ctx(
241 new MCContext(TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
242 std::unique_ptr<MCObjectFileInfo> MOFI(
243 TheTarget->createMCObjectFileInfo(*Ctx, IsPIC));
244 Ctx->setObjectFileInfo(MOFI.get());
245 // We do not support X86 Large code model. Change this in the future.
246 bool Large = false;
247 if (TheTriple.getArch() == llvm::Triple::aarch64)
248 Large = true;
249 unsigned LSDAEncoding =
250 Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
251 if (IsPIC) {
252 LSDAEncoding = dwarf::DW_EH_PE_pcrel |
253 (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
256 std::unique_ptr<MCDisassembler> DisAsm(
257 TheTarget->createMCDisassembler(*STI, *Ctx));
259 if (!DisAsm)
260 return createStringError(
261 make_error_code(std::errc::not_supported),
262 Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
264 std::unique_ptr<const MCInstrAnalysis> MIA(
265 TheTarget->createMCInstrAnalysis(MII.get()));
266 if (!MIA)
267 return createStringError(
268 make_error_code(std::errc::not_supported),
269 Twine("BOLT-ERROR: failed to create instruction analysis for target ",
270 TripleName));
272 int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
273 std::unique_ptr<MCInstPrinter> InstructionPrinter(
274 TheTarget->createMCInstPrinter(TheTriple, AsmPrinterVariant, *AsmInfo,
275 *MII, *MRI));
276 if (!InstructionPrinter)
277 return createStringError(
278 make_error_code(std::errc::not_supported),
279 Twine("BOLT-ERROR: no instruction printer for target ", TripleName));
280 InstructionPrinter->setPrintImmHex(true);
282 std::unique_ptr<MCCodeEmitter> MCE(
283 TheTarget->createMCCodeEmitter(*MII, *Ctx));
285 auto BC = std::make_unique<BinaryContext>(
286 std::move(Ctx), std::move(DwCtx), std::make_unique<Triple>(TheTriple),
287 TheTarget, std::string(TripleName), std::move(MCE), std::move(MOFI),
288 std::move(AsmInfo), std::move(MII), std::move(STI),
289 std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI),
290 std::move(DisAsm), Logger);
292 BC->LSDAEncoding = LSDAEncoding;
294 BC->MAB = std::unique_ptr<MCAsmBackend>(
295 BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions()));
297 BC->setFilename(InputFileName);
299 BC->HasFixedLoadAddress = !IsPIC;
301 BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>(
302 BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx));
304 if (!BC->SymbolicDisAsm)
305 return createStringError(
306 make_error_code(std::errc::not_supported),
307 Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
309 return std::move(BC);
312 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const {
313 if (opts::HotText &&
314 (SymbolName == "__hot_start" || SymbolName == "__hot_end"))
315 return true;
317 if (opts::HotData &&
318 (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end"))
319 return true;
321 if (SymbolName == "_end")
322 return true;
324 return false;
327 std::unique_ptr<MCObjectWriter>
328 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) {
329 return MAB->createObjectWriter(OS);
332 bool BinaryContext::validateObjectNesting() const {
333 auto Itr = BinaryDataMap.begin();
334 auto End = BinaryDataMap.end();
335 bool Valid = true;
336 while (Itr != End) {
337 auto Next = std::next(Itr);
338 while (Next != End &&
339 Itr->second->getSection() == Next->second->getSection() &&
340 Itr->second->containsRange(Next->second->getAddress(),
341 Next->second->getSize())) {
342 if (Next->second->Parent != Itr->second) {
343 this->errs() << "BOLT-WARNING: object nesting incorrect for:\n"
344 << "BOLT-WARNING: " << *Itr->second << "\n"
345 << "BOLT-WARNING: " << *Next->second << "\n";
346 Valid = false;
348 ++Next;
350 Itr = Next;
352 return Valid;
355 bool BinaryContext::validateHoles() const {
356 bool Valid = true;
357 for (BinarySection &Section : sections()) {
358 for (const Relocation &Rel : Section.relocations()) {
359 uint64_t RelAddr = Rel.Offset + Section.getAddress();
360 const BinaryData *BD = getBinaryDataContainingAddress(RelAddr);
361 if (!BD) {
362 this->errs()
363 << "BOLT-WARNING: no BinaryData found for relocation at address"
364 << " 0x" << Twine::utohexstr(RelAddr) << " in " << Section.getName()
365 << "\n";
366 Valid = false;
367 } else if (!BD->getAtomicRoot()) {
368 this->errs()
369 << "BOLT-WARNING: no atomic BinaryData found for relocation at "
370 << "address 0x" << Twine::utohexstr(RelAddr) << " in "
371 << Section.getName() << "\n";
372 Valid = false;
376 return Valid;
379 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) {
380 const uint64_t Address = GAI->second->getAddress();
381 const uint64_t Size = GAI->second->getSize();
383 auto fixParents = [&](BinaryDataMapType::iterator Itr,
384 BinaryData *NewParent) {
385 BinaryData *OldParent = Itr->second->Parent;
386 Itr->second->Parent = NewParent;
387 ++Itr;
388 while (Itr != BinaryDataMap.end() && OldParent &&
389 Itr->second->Parent == OldParent) {
390 Itr->second->Parent = NewParent;
391 ++Itr;
395 // Check if the previous symbol contains the newly added symbol.
396 if (GAI != BinaryDataMap.begin()) {
397 BinaryData *Prev = std::prev(GAI)->second;
398 while (Prev) {
399 if (Prev->getSection() == GAI->second->getSection() &&
400 Prev->containsRange(Address, Size)) {
401 fixParents(GAI, Prev);
402 } else {
403 fixParents(GAI, nullptr);
405 Prev = Prev->Parent;
409 // Check if the newly added symbol contains any subsequent symbols.
410 if (Size != 0) {
411 BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second;
412 auto Itr = std::next(GAI);
413 while (
414 Itr != BinaryDataMap.end() &&
415 BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) {
416 Itr->second->Parent = BD;
417 ++Itr;
422 iterator_range<BinaryContext::binary_data_iterator>
423 BinaryContext::getSubBinaryData(BinaryData *BD) {
424 auto Start = std::next(BinaryDataMap.find(BD->getAddress()));
425 auto End = Start;
426 while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second))
427 ++End;
428 return make_range(Start, End);
431 std::pair<const MCSymbol *, uint64_t>
432 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF,
433 bool IsPCRel) {
434 if (isAArch64()) {
435 // Check if this is an access to a constant island and create bookkeeping
436 // to keep track of it and emit it later as part of this function.
437 if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address))
438 return std::make_pair(IslandSym, 0);
440 // Detect custom code written in assembly that refers to arbitrary
441 // constant islands from other functions. Write this reference so we
442 // can pull this constant island and emit it as part of this function
443 // too.
444 auto IslandIter = AddressToConstantIslandMap.lower_bound(Address);
446 if (IslandIter != AddressToConstantIslandMap.begin() &&
447 (IslandIter == AddressToConstantIslandMap.end() ||
448 IslandIter->first > Address))
449 --IslandIter;
451 if (IslandIter != AddressToConstantIslandMap.end()) {
452 // Fall-back to referencing the original constant island in the presence
453 // of dynamic relocs, as we currently do not support cloning them.
454 // Notice: we might fail to link because of this, if the original constant
455 // island we are referring would be emitted too far away.
456 if (IslandIter->second->hasDynamicRelocationAtIsland()) {
457 MCSymbol *IslandSym =
458 IslandIter->second->getOrCreateIslandAccess(Address);
459 if (IslandSym)
460 return std::make_pair(IslandSym, 0);
461 } else if (MCSymbol *IslandSym =
462 IslandIter->second->getOrCreateProxyIslandAccess(Address,
463 BF)) {
464 BF.createIslandDependency(IslandSym, IslandIter->second);
465 return std::make_pair(IslandSym, 0);
470 // Note that the address does not necessarily have to reside inside
471 // a section, it could be an absolute address too.
472 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
473 if (Section && Section->isText()) {
474 if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) {
475 if (Address != BF.getAddress()) {
476 // The address could potentially escape. Mark it as another entry
477 // point into the function.
478 if (opts::Verbosity >= 1) {
479 this->outs() << "BOLT-INFO: potentially escaped address 0x"
480 << Twine::utohexstr(Address) << " in function " << BF
481 << '\n';
483 BF.HasInternalLabelReference = true;
484 return std::make_pair(
485 BF.addEntryPointAtOffset(Address - BF.getAddress()), 0);
487 } else {
488 addInterproceduralReference(&BF, Address);
492 // With relocations, catch jump table references outside of the basic block
493 // containing the indirect jump.
494 if (HasRelocations) {
495 const MemoryContentsType MemType = analyzeMemoryAt(Address, BF);
496 if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) {
497 const MCSymbol *Symbol =
498 getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC);
500 return std::make_pair(Symbol, 0);
504 if (BinaryData *BD = getBinaryDataContainingAddress(Address))
505 return std::make_pair(BD->getSymbol(), Address - BD->getAddress());
507 // TODO: use DWARF info to get size/alignment here?
508 MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat");
509 LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n');
510 return std::make_pair(TargetSymbol, 0);
513 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address,
514 BinaryFunction &BF) {
515 if (!isX86())
516 return MemoryContentsType::UNKNOWN;
518 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
519 if (!Section) {
520 // No section - possibly an absolute address. Since we don't allow
521 // internal function addresses to escape the function scope - we
522 // consider it a tail call.
523 if (opts::Verbosity > 1) {
524 this->errs() << "BOLT-WARNING: no section for address 0x"
525 << Twine::utohexstr(Address) << " referenced from function "
526 << BF << '\n';
528 return MemoryContentsType::UNKNOWN;
531 if (Section->isVirtual()) {
532 // The contents are filled at runtime.
533 return MemoryContentsType::UNKNOWN;
536 // No support for jump tables in code yet.
537 if (Section->isText())
538 return MemoryContentsType::UNKNOWN;
540 // Start with checking for PIC jump table. We expect non-PIC jump tables
541 // to have high 32 bits set to 0.
542 if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF))
543 return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
545 if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF))
546 return MemoryContentsType::POSSIBLE_JUMP_TABLE;
548 return MemoryContentsType::UNKNOWN;
551 bool BinaryContext::analyzeJumpTable(const uint64_t Address,
552 const JumpTable::JumpTableType Type,
553 const BinaryFunction &BF,
554 const uint64_t NextJTAddress,
555 JumpTable::AddressesType *EntriesAsAddress,
556 bool *HasEntryInFragment) const {
557 // Target address of __builtin_unreachable.
558 const uint64_t UnreachableAddress = BF.getAddress() + BF.getSize();
560 // Is one of the targets __builtin_unreachable?
561 bool HasUnreachable = false;
563 // Does one of the entries match function start address?
564 bool HasStartAsEntry = false;
566 // Number of targets other than __builtin_unreachable.
567 uint64_t NumRealEntries = 0;
569 // Size of the jump table without trailing __builtin_unreachable entries.
570 size_t TrimmedSize = 0;
572 auto addEntryAddress = [&](uint64_t EntryAddress, bool Unreachable = false) {
573 if (!EntriesAsAddress)
574 return;
575 EntriesAsAddress->emplace_back(EntryAddress);
576 if (!Unreachable)
577 TrimmedSize = EntriesAsAddress->size();
580 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
581 if (!Section)
582 return false;
584 // The upper bound is defined by containing object, section limits, and
585 // the next jump table in memory.
586 uint64_t UpperBound = Section->getEndAddress();
587 const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address);
588 if (JumpTableBD && JumpTableBD->getSize()) {
589 assert(JumpTableBD->getEndAddress() <= UpperBound &&
590 "data object cannot cross a section boundary");
591 UpperBound = JumpTableBD->getEndAddress();
593 if (NextJTAddress)
594 UpperBound = std::min(NextJTAddress, UpperBound);
596 LLVM_DEBUG({
597 using JTT = JumpTable::JumpTableType;
598 dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n",
599 Address, BF.getPrintName(),
600 Type == JTT::JTT_PIC ? "PIC" : "Normal");
602 const uint64_t EntrySize = getJumpTableEntrySize(Type);
603 for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize;
604 EntryAddress += EntrySize) {
605 LLVM_DEBUG(dbgs() << " * Checking 0x" << Twine::utohexstr(EntryAddress)
606 << " -> ");
607 // Check if there's a proper relocation against the jump table entry.
608 if (HasRelocations) {
609 if (Type == JumpTable::JTT_PIC &&
610 !DataPCRelocations.count(EntryAddress)) {
611 LLVM_DEBUG(
612 dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n");
613 break;
615 if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) {
616 LLVM_DEBUG(
617 dbgs()
618 << "FAIL: JTT_NORMAL table, no relocation for this address\n");
619 break;
623 const uint64_t Value =
624 (Type == JumpTable::JTT_PIC)
625 ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize)
626 : *getPointerAtAddress(EntryAddress);
628 // __builtin_unreachable() case.
629 if (Value == UnreachableAddress) {
630 addEntryAddress(Value, /*Unreachable*/ true);
631 HasUnreachable = true;
632 LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value));
633 continue;
636 // Function start is another special case. It is allowed in the jump table,
637 // but we need at least one another regular entry to distinguish the table
638 // from, e.g. a function pointer array.
639 if (Value == BF.getAddress()) {
640 HasStartAsEntry = true;
641 addEntryAddress(Value);
642 continue;
645 // Function or one of its fragments.
646 const BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value);
647 const bool DoesBelongToFunction =
648 BF.containsAddress(Value) ||
649 (TargetBF && TargetBF->isParentOrChildOf(BF));
650 if (!DoesBelongToFunction) {
651 LLVM_DEBUG({
652 if (!BF.containsAddress(Value)) {
653 dbgs() << "FAIL: function doesn't contain this address\n";
654 if (TargetBF) {
655 dbgs() << " ! function containing this address: "
656 << TargetBF->getPrintName() << '\n';
657 if (TargetBF->isFragment()) {
658 dbgs() << " ! is a fragment";
659 for (BinaryFunction *Parent : TargetBF->ParentFragments)
660 dbgs() << ", parent: " << Parent->getPrintName();
661 dbgs() << '\n';
666 break;
669 // Check there's an instruction at this offset.
670 if (TargetBF->getState() == BinaryFunction::State::Disassembled &&
671 !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) {
672 LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value));
673 break;
676 ++NumRealEntries;
677 LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value));
679 if (TargetBF != &BF && HasEntryInFragment)
680 *HasEntryInFragment = true;
681 addEntryAddress(Value);
684 // Trim direct/normal jump table to exclude trailing unreachable entries that
685 // can collide with a function address.
686 if (Type == JumpTable::JTT_NORMAL && EntriesAsAddress &&
687 TrimmedSize != EntriesAsAddress->size() &&
688 getBinaryFunctionAtAddress(UnreachableAddress))
689 EntriesAsAddress->resize(TrimmedSize);
691 // It's a jump table if the number of real entries is more than 1, or there's
692 // one real entry and one or more special targets. If there are only multiple
693 // special targets, then it's not a jump table.
694 return NumRealEntries + (HasUnreachable || HasStartAsEntry) >= 2;
697 void BinaryContext::populateJumpTables() {
698 LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size()
699 << '\n');
700 for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE;
701 ++JTI) {
702 JumpTable *JT = JTI->second;
704 bool NonSimpleParent = false;
705 for (BinaryFunction *BF : JT->Parents)
706 NonSimpleParent |= !BF->isSimple();
707 if (NonSimpleParent)
708 continue;
710 uint64_t NextJTAddress = 0;
711 auto NextJTI = std::next(JTI);
712 if (NextJTI != JTE)
713 NextJTAddress = NextJTI->second->getAddress();
715 const bool Success =
716 analyzeJumpTable(JT->getAddress(), JT->Type, *(JT->Parents[0]),
717 NextJTAddress, &JT->EntriesAsAddress, &JT->IsSplit);
718 if (!Success) {
719 LLVM_DEBUG({
720 dbgs() << "failed to analyze ";
721 JT->print(dbgs());
722 if (NextJTI != JTE) {
723 dbgs() << "next ";
724 NextJTI->second->print(dbgs());
727 llvm_unreachable("jump table heuristic failure");
729 for (BinaryFunction *Frag : JT->Parents) {
730 if (JT->IsSplit)
731 Frag->setHasIndirectTargetToSplitFragment(true);
732 for (uint64_t EntryAddress : JT->EntriesAsAddress)
733 // if target is builtin_unreachable
734 if (EntryAddress == Frag->getAddress() + Frag->getSize()) {
735 Frag->IgnoredBranches.emplace_back(EntryAddress - Frag->getAddress(),
736 Frag->getSize());
737 } else if (EntryAddress >= Frag->getAddress() &&
738 EntryAddress < Frag->getAddress() + Frag->getSize()) {
739 Frag->registerReferencedOffset(EntryAddress - Frag->getAddress());
743 // In strict mode, erase PC-relative relocation record. Later we check that
744 // all such records are erased and thus have been accounted for.
745 if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) {
746 for (uint64_t Address = JT->getAddress();
747 Address < JT->getAddress() + JT->getSize();
748 Address += JT->EntrySize) {
749 DataPCRelocations.erase(DataPCRelocations.find(Address));
753 // Mark to skip the function and all its fragments.
754 for (BinaryFunction *Frag : JT->Parents)
755 if (Frag->hasIndirectTargetToSplitFragment())
756 addFragmentsToSkip(Frag);
759 if (opts::StrictMode && DataPCRelocations.size()) {
760 LLVM_DEBUG({
761 dbgs() << DataPCRelocations.size()
762 << " unclaimed PC-relative relocations left in data:\n";
763 for (uint64_t Reloc : DataPCRelocations)
764 dbgs() << Twine::utohexstr(Reloc) << '\n';
766 assert(0 && "unclaimed PC-relative relocations left in data\n");
768 clearList(DataPCRelocations);
771 void BinaryContext::skipMarkedFragments() {
772 std::vector<BinaryFunction *> FragmentQueue;
773 // Copy the functions to FragmentQueue.
774 FragmentQueue.assign(FragmentsToSkip.begin(), FragmentsToSkip.end());
775 auto addToWorklist = [&](BinaryFunction *Function) -> void {
776 if (FragmentsToSkip.count(Function))
777 return;
778 FragmentQueue.push_back(Function);
779 addFragmentsToSkip(Function);
781 // Functions containing split jump tables need to be skipped with all
782 // fragments (transitively).
783 for (size_t I = 0; I != FragmentQueue.size(); I++) {
784 BinaryFunction *BF = FragmentQueue[I];
785 assert(FragmentsToSkip.count(BF) &&
786 "internal error in traversing function fragments");
787 if (opts::Verbosity >= 1)
788 this->errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n';
789 BF->setSimple(false);
790 BF->setHasIndirectTargetToSplitFragment(true);
792 llvm::for_each(BF->Fragments, addToWorklist);
793 llvm::for_each(BF->ParentFragments, addToWorklist);
795 if (!FragmentsToSkip.empty())
796 this->errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size()
797 << " function" << (FragmentsToSkip.size() == 1 ? "" : "s")
798 << " due to cold fragments\n";
801 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix,
802 uint64_t Size,
803 uint16_t Alignment,
804 unsigned Flags) {
805 auto Itr = BinaryDataMap.find(Address);
806 if (Itr != BinaryDataMap.end()) {
807 assert(Itr->second->getSize() == Size || !Size);
808 return Itr->second->getSymbol();
811 std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str();
812 assert(!GlobalSymbols.count(Name) && "created name is not unique");
813 return registerNameAtAddress(Name, Address, Size, Alignment, Flags);
816 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) {
817 return Ctx->getOrCreateSymbol(Name);
820 BinaryFunction *BinaryContext::createBinaryFunction(
821 const std::string &Name, BinarySection &Section, uint64_t Address,
822 uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) {
823 auto Result = BinaryFunctions.emplace(
824 Address, BinaryFunction(Name, Section, Address, Size, *this));
825 assert(Result.second == true && "unexpected duplicate function");
826 BinaryFunction *BF = &Result.first->second;
827 registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size,
828 Alignment);
829 setSymbolToFunctionMap(BF->getSymbol(), BF);
830 return BF;
833 const MCSymbol *
834 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
835 JumpTable::JumpTableType Type) {
836 // Two fragments of same function access same jump table
837 if (JumpTable *JT = getJumpTableContainingAddress(Address)) {
838 assert(JT->Type == Type && "jump table types have to match");
839 assert(Address == JT->getAddress() && "unexpected non-empty jump table");
841 // Prevent associating a jump table to a specific fragment twice.
842 // This simple check arises from the assumption: no more than 2 fragments.
843 if (JT->Parents.size() == 1 && JT->Parents[0] != &Function) {
844 assert(JT->Parents[0]->isParentOrChildOf(Function) &&
845 "cannot re-use jump table of a different function");
846 // Duplicate the entry for the parent function for easy access
847 JT->Parents.push_back(&Function);
848 if (opts::Verbosity > 2) {
849 this->outs() << "BOLT-INFO: Multiple fragments access same jump table: "
850 << JT->Parents[0]->getPrintName() << "; "
851 << Function.getPrintName() << "\n";
852 JT->print(this->outs());
854 Function.JumpTables.emplace(Address, JT);
855 JT->Parents[0]->setHasIndirectTargetToSplitFragment(true);
856 JT->Parents[1]->setHasIndirectTargetToSplitFragment(true);
859 bool IsJumpTableParent = false;
860 (void)IsJumpTableParent;
861 for (BinaryFunction *Frag : JT->Parents)
862 if (Frag == &Function)
863 IsJumpTableParent = true;
864 assert(IsJumpTableParent &&
865 "cannot re-use jump table of a different function");
866 return JT->getFirstLabel();
869 // Re-use the existing symbol if possible.
870 MCSymbol *JTLabel = nullptr;
871 if (BinaryData *Object = getBinaryDataAtAddress(Address)) {
872 if (!isInternalSymbolName(Object->getSymbol()->getName()))
873 JTLabel = Object->getSymbol();
876 const uint64_t EntrySize = getJumpTableEntrySize(Type);
877 if (!JTLabel) {
878 const std::string JumpTableName = generateJumpTableName(Function, Address);
879 JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize);
882 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName()
883 << " in function " << Function << '\n');
885 JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type,
886 JumpTable::LabelMapType{{0, JTLabel}},
887 *getSectionForAddress(Address));
888 JT->Parents.push_back(&Function);
889 if (opts::Verbosity > 2)
890 JT->print(this->outs());
891 JumpTables.emplace(Address, JT);
893 // Duplicate the entry for the parent function for easy access.
894 Function.JumpTables.emplace(Address, JT);
895 return JTLabel;
898 std::pair<uint64_t, const MCSymbol *>
899 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT,
900 const MCSymbol *OldLabel) {
901 auto L = scopeLock();
902 unsigned Offset = 0;
903 bool Found = false;
904 for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) {
905 if (Elmt.second != OldLabel)
906 continue;
907 Offset = Elmt.first;
908 Found = true;
909 break;
911 assert(Found && "Label not found");
912 (void)Found;
913 MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT");
914 JumpTable *NewJT =
915 new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type,
916 JumpTable::LabelMapType{{Offset, NewLabel}},
917 *getSectionForAddress(JT->getAddress()));
918 NewJT->Parents = JT->Parents;
919 NewJT->Entries = JT->Entries;
920 NewJT->Counts = JT->Counts;
921 uint64_t JumpTableID = ++DuplicatedJumpTables;
922 // Invert it to differentiate from regular jump tables whose IDs are their
923 // addresses in the input binary memory space
924 JumpTableID = ~JumpTableID;
925 JumpTables.emplace(JumpTableID, NewJT);
926 Function.JumpTables.emplace(JumpTableID, NewJT);
927 return std::make_pair(JumpTableID, NewLabel);
930 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
931 uint64_t Address) {
932 size_t Id;
933 uint64_t Offset = 0;
934 if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) {
935 Offset = Address - JT->getAddress();
936 auto JTLabelsIt = JT->Labels.find(Offset);
937 if (JTLabelsIt != JT->Labels.end())
938 return std::string(JTLabelsIt->second->getName());
940 auto JTIdsIt = JumpTableIds.find(JT->getAddress());
941 assert(JTIdsIt != JumpTableIds.end());
942 Id = JTIdsIt->second;
943 } else {
944 Id = JumpTableIds[Address] = BF.JumpTables.size();
946 return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) +
947 (Offset ? ("." + std::to_string(Offset)) : ""));
950 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
951 // FIXME: aarch64 support is missing.
952 if (!isX86())
953 return true;
955 if (BF.getSize() == BF.getMaxSize())
956 return true;
958 ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData();
959 assert(FunctionData && "cannot get function as data");
961 uint64_t Offset = BF.getSize();
962 MCInst Instr;
963 uint64_t InstrSize = 0;
964 uint64_t InstrAddress = BF.getAddress() + Offset;
965 using std::placeholders::_1;
967 // Skip instructions that satisfy the predicate condition.
968 auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) {
969 const uint64_t StartOffset = Offset;
970 for (; Offset < BF.getMaxSize();
971 Offset += InstrSize, InstrAddress += InstrSize) {
972 if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset),
973 InstrAddress, nulls()))
974 break;
975 if (!Predicate(Instr))
976 break;
979 return Offset - StartOffset;
982 // Skip a sequence of zero bytes.
983 auto skipZeros = [&]() {
984 const uint64_t StartOffset = Offset;
985 for (; Offset < BF.getMaxSize(); ++Offset)
986 if ((*FunctionData)[Offset] != 0)
987 break;
989 return Offset - StartOffset;
992 // Accept the whole padding area filled with breakpoints.
993 auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1);
994 if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize())
995 return true;
997 auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1);
999 // Some functions have a jump to the next function or to the padding area
1000 // inserted after the body.
1001 auto isSkipJump = [&](const MCInst &Instr) {
1002 uint64_t TargetAddress = 0;
1003 if (MIB->isUnconditionalBranch(Instr) &&
1004 MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) {
1005 if (TargetAddress >= InstrAddress + InstrSize &&
1006 TargetAddress <= BF.getAddress() + BF.getMaxSize()) {
1007 return true;
1010 return false;
1013 // Skip over nops, jumps, and zero padding. Allow interleaving (this happens).
1014 while (skipInstructions(isNoop) || skipInstructions(isSkipJump) ||
1015 skipZeros())
1018 if (Offset == BF.getMaxSize())
1019 return true;
1021 if (opts::Verbosity >= 1) {
1022 this->errs() << "BOLT-WARNING: bad padding at address 0x"
1023 << Twine::utohexstr(BF.getAddress() + BF.getSize())
1024 << " starting at offset " << (Offset - BF.getSize())
1025 << " in function " << BF << '\n'
1026 << FunctionData->slice(BF.getSize(),
1027 BF.getMaxSize() - BF.getSize())
1028 << '\n';
1031 return false;
1034 void BinaryContext::adjustCodePadding() {
1035 for (auto &BFI : BinaryFunctions) {
1036 BinaryFunction &BF = BFI.second;
1037 if (!shouldEmit(BF))
1038 continue;
1040 if (!hasValidCodePadding(BF)) {
1041 if (HasRelocations) {
1042 if (opts::Verbosity >= 1) {
1043 this->outs() << "BOLT-INFO: function " << BF
1044 << " has invalid padding. Ignoring the function.\n";
1046 BF.setIgnored();
1047 } else {
1048 BF.setMaxSize(BF.getSize());
1054 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address,
1055 uint64_t Size,
1056 uint16_t Alignment,
1057 unsigned Flags) {
1058 // Register the name with MCContext.
1059 MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name);
1061 auto GAI = BinaryDataMap.find(Address);
1062 BinaryData *BD;
1063 if (GAI == BinaryDataMap.end()) {
1064 ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address);
1065 BinarySection &Section =
1066 SectionOrErr ? SectionOrErr.get() : absoluteSection();
1067 BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1,
1068 Section, Flags);
1069 GAI = BinaryDataMap.emplace(Address, BD).first;
1070 GlobalSymbols[Name] = BD;
1071 updateObjectNesting(GAI);
1072 } else {
1073 BD = GAI->second;
1074 if (!BD->hasName(Name)) {
1075 GlobalSymbols[Name] = BD;
1076 BD->Symbols.push_back(Symbol);
1080 return Symbol;
1083 const BinaryData *
1084 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const {
1085 auto NI = BinaryDataMap.lower_bound(Address);
1086 auto End = BinaryDataMap.end();
1087 if ((NI != End && Address == NI->first) ||
1088 ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) {
1089 if (NI->second->containsAddress(Address))
1090 return NI->second;
1092 // If this is a sub-symbol, see if a parent data contains the address.
1093 const BinaryData *BD = NI->second->getParent();
1094 while (BD) {
1095 if (BD->containsAddress(Address))
1096 return BD;
1097 BD = BD->getParent();
1100 return nullptr;
1103 BinaryData *BinaryContext::getGOTSymbol() {
1104 // First tries to find a global symbol with that name
1105 BinaryData *GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_");
1106 if (GOTSymBD)
1107 return GOTSymBD;
1109 // This symbol might be hidden from run-time link, so fetch the local
1110 // definition if available.
1111 GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_/1");
1112 if (!GOTSymBD)
1113 return nullptr;
1115 // If the local symbol is not unique, fail
1116 unsigned Index = 2;
1117 SmallString<30> Storage;
1118 while (const BinaryData *BD =
1119 getBinaryDataByName(Twine("_GLOBAL_OFFSET_TABLE_/")
1120 .concat(Twine(Index++))
1121 .toStringRef(Storage)))
1122 if (BD->getAddress() != GOTSymBD->getAddress())
1123 return nullptr;
1125 return GOTSymBD;
1128 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) {
1129 auto NI = BinaryDataMap.find(Address);
1130 assert(NI != BinaryDataMap.end());
1131 if (NI == BinaryDataMap.end())
1132 return false;
1133 // TODO: it's possible that a jump table starts at the same address
1134 // as a larger blob of private data. When we set the size of the
1135 // jump table, it might be smaller than the total blob size. In this
1136 // case we just leave the original size since (currently) it won't really
1137 // affect anything.
1138 assert((!NI->second->Size || NI->second->Size == Size ||
1139 (NI->second->isJumpTable() && NI->second->Size > Size)) &&
1140 "can't change the size of a symbol that has already had its "
1141 "size set");
1142 if (!NI->second->Size) {
1143 NI->second->Size = Size;
1144 updateObjectNesting(NI);
1145 return true;
1147 return false;
1150 void BinaryContext::generateSymbolHashes() {
1151 auto isPadding = [](const BinaryData &BD) {
1152 StringRef Contents = BD.getSection().getContents();
1153 StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize());
1154 return (BD.getName().starts_with("HOLEat") ||
1155 SymData.find_first_not_of(0) == StringRef::npos);
1158 uint64_t NumCollisions = 0;
1159 for (auto &Entry : BinaryDataMap) {
1160 BinaryData &BD = *Entry.second;
1161 StringRef Name = BD.getName();
1163 if (!isInternalSymbolName(Name))
1164 continue;
1166 // First check if a non-anonymous alias exists and move it to the front.
1167 if (BD.getSymbols().size() > 1) {
1168 auto Itr = llvm::find_if(BD.getSymbols(), [&](const MCSymbol *Symbol) {
1169 return !isInternalSymbolName(Symbol->getName());
1171 if (Itr != BD.getSymbols().end()) {
1172 size_t Idx = std::distance(BD.getSymbols().begin(), Itr);
1173 std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]);
1174 continue;
1178 // We have to skip 0 size symbols since they will all collide.
1179 if (BD.getSize() == 0) {
1180 continue;
1183 const uint64_t Hash = BD.getSection().hash(BD);
1184 const size_t Idx = Name.find("0x");
1185 std::string NewName =
1186 (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str();
1187 if (getBinaryDataByName(NewName)) {
1188 // Ignore collisions for symbols that appear to be padding
1189 // (i.e. all zeros or a "hole")
1190 if (!isPadding(BD)) {
1191 if (opts::Verbosity) {
1192 this->errs() << "BOLT-WARNING: collision detected when hashing " << BD
1193 << " with new name (" << NewName << "), skipping.\n";
1195 ++NumCollisions;
1197 continue;
1199 BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName));
1200 GlobalSymbols[NewName] = &BD;
1202 if (NumCollisions) {
1203 this->errs() << "BOLT-WARNING: " << NumCollisions
1204 << " collisions detected while hashing binary objects";
1205 if (!opts::Verbosity)
1206 this->errs() << ". Use -v=1 to see the list.";
1207 this->errs() << '\n';
1211 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction,
1212 BinaryFunction &Function) const {
1213 assert(TargetFunction.isFragment() && "TargetFunction must be a fragment");
1214 if (TargetFunction.isChildOf(Function))
1215 return true;
1216 TargetFunction.addParentFragment(Function);
1217 Function.addFragment(TargetFunction);
1218 if (!HasRelocations) {
1219 TargetFunction.setSimple(false);
1220 Function.setSimple(false);
1222 if (opts::Verbosity >= 1) {
1223 this->outs() << "BOLT-INFO: marking " << TargetFunction
1224 << " as a fragment of " << Function << '\n';
1226 return true;
1229 void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF,
1230 MCInst &LoadLowBits,
1231 MCInst &LoadHiBits,
1232 uint64_t Target) {
1233 const MCSymbol *TargetSymbol;
1234 uint64_t Addend = 0;
1235 std::tie(TargetSymbol, Addend) = handleAddressRef(Target, BF,
1236 /*IsPCRel*/ true);
1237 int64_t Val;
1238 MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), Val,
1239 ELF::R_AARCH64_ADR_PREL_PG_HI21);
1240 MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(),
1241 Val, ELF::R_AARCH64_ADD_ABS_LO12_NC);
1244 bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) {
1245 BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address);
1246 if (TargetFunction)
1247 return false;
1249 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1250 assert(Section && "cannot get section for referenced address");
1251 if (!Section->isText())
1252 return false;
1254 bool Ret = false;
1255 StringRef SectionContents = Section->getContents();
1256 uint64_t Offset = Address - Section->getAddress();
1257 const uint64_t MaxSize = SectionContents.size() - Offset;
1258 const uint8_t *Bytes =
1259 reinterpret_cast<const uint8_t *>(SectionContents.data());
1260 ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize);
1262 auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions,
1263 MCInst &Instruction, uint64_t Offset,
1264 uint64_t AbsoluteInstrAddr,
1265 uint64_t TotalSize) -> bool {
1266 MCInst *TargetHiBits, *TargetLowBits;
1267 uint64_t TargetAddress, Count;
1268 Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(),
1269 AbsoluteInstrAddr, Instruction, TargetHiBits,
1270 TargetLowBits, TargetAddress);
1271 if (!Count)
1272 return false;
1274 if (MatchOnly)
1275 return true;
1277 // NOTE The target symbol was created during disassemble's
1278 // handleExternalReference
1279 const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, "FUNCat");
1280 BinaryFunction *Veneer = createBinaryFunction(VeneerSymbol->getName().str(),
1281 *Section, Address, TotalSize);
1282 addAdrpAddRelocAArch64(*Veneer, *TargetLowBits, *TargetHiBits,
1283 TargetAddress);
1284 MIB->addAnnotation(Instruction, "AArch64Veneer", true);
1285 Veneer->addInstruction(Offset, std::move(Instruction));
1286 --Count;
1287 for (auto It = Instructions.rbegin(); Count != 0; ++It, --Count) {
1288 MIB->addAnnotation(It->second, "AArch64Veneer", true);
1289 Veneer->addInstruction(It->first, std::move(It->second));
1292 Veneer->getOrCreateLocalLabel(Address);
1293 Veneer->setMaxSize(TotalSize);
1294 Veneer->updateState(BinaryFunction::State::Disassembled);
1295 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" << Address
1296 << "\n");
1297 return true;
1300 uint64_t Size = 0, TotalSize = 0;
1301 BinaryFunction::InstrMapType VeneerInstructions;
1302 for (Offset = 0; Offset < MaxSize; Offset += Size) {
1303 MCInst Instruction;
1304 const uint64_t AbsoluteInstrAddr = Address + Offset;
1305 if (!SymbolicDisAsm->getInstruction(Instruction, Size, Data.slice(Offset),
1306 AbsoluteInstrAddr, nulls()))
1307 break;
1309 TotalSize += Size;
1310 if (MIB->isBranch(Instruction)) {
1311 Ret = matchVeneer(VeneerInstructions, Instruction, Offset,
1312 AbsoluteInstrAddr, TotalSize);
1313 break;
1316 VeneerInstructions.emplace(Offset, std::move(Instruction));
1319 return Ret;
1322 void BinaryContext::processInterproceduralReferences() {
1323 for (const std::pair<BinaryFunction *, uint64_t> &It :
1324 InterproceduralReferences) {
1325 BinaryFunction &Function = *It.first;
1326 uint64_t Address = It.second;
1327 // Process interprocedural references from ignored functions in BAT mode
1328 // (non-simple in non-relocation mode) to properly register entry points
1329 if (!Address || (Function.isIgnored() && !HasBATSection))
1330 continue;
1332 BinaryFunction *TargetFunction =
1333 getBinaryFunctionContainingAddress(Address);
1334 if (&Function == TargetFunction)
1335 continue;
1337 if (TargetFunction) {
1338 if (TargetFunction->isFragment() &&
1339 !TargetFunction->isChildOf(Function)) {
1340 this->errs()
1341 << "BOLT-WARNING: interprocedural reference between unrelated "
1342 "fragments: "
1343 << Function.getPrintName() << " and "
1344 << TargetFunction->getPrintName() << '\n';
1346 if (uint64_t Offset = Address - TargetFunction->getAddress())
1347 TargetFunction->addEntryPointAtOffset(Offset);
1349 continue;
1352 // Check if address falls in function padding space - this could be
1353 // unmarked data in code. In this case adjust the padding space size.
1354 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1355 assert(Section && "cannot get section for referenced address");
1357 if (!Section->isText())
1358 continue;
1360 // PLT requires special handling and could be ignored in this context.
1361 StringRef SectionName = Section->getName();
1362 if (SectionName == ".plt" || SectionName == ".plt.got")
1363 continue;
1365 // Check if it is aarch64 veneer written at Address
1366 if (isAArch64() && handleAArch64Veneer(Address))
1367 continue;
1369 if (opts::processAllFunctions()) {
1370 this->errs() << "BOLT-ERROR: cannot process binaries with unmarked "
1371 << "object in code at address 0x"
1372 << Twine::utohexstr(Address) << " belonging to section "
1373 << SectionName << " in current mode\n";
1374 exit(1);
1377 TargetFunction = getBinaryFunctionContainingAddress(Address,
1378 /*CheckPastEnd=*/false,
1379 /*UseMaxSize=*/true);
1380 // We are not going to overwrite non-simple functions, but for simple
1381 // ones - adjust the padding size.
1382 if (TargetFunction && TargetFunction->isSimple()) {
1383 this->errs()
1384 << "BOLT-WARNING: function " << *TargetFunction
1385 << " has an object detected in a padding region at address 0x"
1386 << Twine::utohexstr(Address) << '\n';
1387 TargetFunction->setMaxSize(TargetFunction->getSize());
1391 InterproceduralReferences.clear();
1394 void BinaryContext::postProcessSymbolTable() {
1395 fixBinaryDataHoles();
1396 bool Valid = true;
1397 for (auto &Entry : BinaryDataMap) {
1398 BinaryData *BD = Entry.second;
1399 if ((BD->getName().starts_with("SYMBOLat") ||
1400 BD->getName().starts_with("DATAat")) &&
1401 !BD->getParent() && !BD->getSize() && !BD->isAbsolute() &&
1402 BD->getSection()) {
1403 this->errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD
1404 << "\n";
1405 Valid = false;
1408 assert(Valid);
1409 (void)Valid;
1410 generateSymbolHashes();
1413 void BinaryContext::foldFunction(BinaryFunction &ChildBF,
1414 BinaryFunction &ParentBF) {
1415 assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() &&
1416 "cannot merge functions with multiple entry points");
1418 std::unique_lock<llvm::sys::RWMutex> WriteCtxLock(CtxMutex, std::defer_lock);
1419 std::unique_lock<llvm::sys::RWMutex> WriteSymbolMapLock(
1420 SymbolToFunctionMapMutex, std::defer_lock);
1422 const StringRef ChildName = ChildBF.getOneName();
1424 // Move symbols over and update bookkeeping info.
1425 for (MCSymbol *Symbol : ChildBF.getSymbols()) {
1426 ParentBF.getSymbols().push_back(Symbol);
1427 WriteSymbolMapLock.lock();
1428 SymbolToFunctionMap[Symbol] = &ParentBF;
1429 WriteSymbolMapLock.unlock();
1430 // NB: there's no need to update BinaryDataMap and GlobalSymbols.
1432 ChildBF.getSymbols().clear();
1434 // Move other names the child function is known under.
1435 llvm::move(ChildBF.Aliases, std::back_inserter(ParentBF.Aliases));
1436 ChildBF.Aliases.clear();
1438 if (HasRelocations) {
1439 // Merge execution counts of ChildBF into those of ParentBF.
1440 // Without relocations, we cannot reliably merge profiles as both functions
1441 // continue to exist and either one can be executed.
1442 ChildBF.mergeProfileDataInto(ParentBF);
1444 std::shared_lock<llvm::sys::RWMutex> ReadBfsLock(BinaryFunctionsMutex,
1445 std::defer_lock);
1446 std::unique_lock<llvm::sys::RWMutex> WriteBfsLock(BinaryFunctionsMutex,
1447 std::defer_lock);
1448 // Remove ChildBF from the global set of functions in relocs mode.
1449 ReadBfsLock.lock();
1450 auto FI = BinaryFunctions.find(ChildBF.getAddress());
1451 ReadBfsLock.unlock();
1453 assert(FI != BinaryFunctions.end() && "function not found");
1454 assert(&ChildBF == &FI->second && "function mismatch");
1456 WriteBfsLock.lock();
1457 ChildBF.clearDisasmState();
1458 FI = BinaryFunctions.erase(FI);
1459 WriteBfsLock.unlock();
1461 } else {
1462 // In non-relocation mode we keep the function, but rename it.
1463 std::string NewName = "__ICF_" + ChildName.str();
1465 WriteCtxLock.lock();
1466 ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName));
1467 WriteCtxLock.unlock();
1469 ChildBF.setFolded(&ParentBF);
1472 ParentBF.setHasFunctionsFoldedInto();
1475 void BinaryContext::fixBinaryDataHoles() {
1476 assert(validateObjectNesting() && "object nesting inconsistency detected");
1478 for (BinarySection &Section : allocatableSections()) {
1479 std::vector<std::pair<uint64_t, uint64_t>> Holes;
1481 auto isNotHole = [&Section](const binary_data_iterator &Itr) {
1482 BinaryData *BD = Itr->second;
1483 bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() &&
1484 (BD->getName().starts_with("SYMBOLat0x") ||
1485 BD->getName().starts_with("DATAat0x") ||
1486 BD->getName().starts_with("ANONYMOUS")));
1487 return !isHole && BD->getSection() == Section && !BD->getParent();
1490 auto BDStart = BinaryDataMap.begin();
1491 auto BDEnd = BinaryDataMap.end();
1492 auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd);
1493 auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd);
1495 uint64_t EndAddress = Section.getAddress();
1497 while (Itr != End) {
1498 if (Itr->second->getAddress() > EndAddress) {
1499 uint64_t Gap = Itr->second->getAddress() - EndAddress;
1500 Holes.emplace_back(EndAddress, Gap);
1502 EndAddress = Itr->second->getEndAddress();
1503 ++Itr;
1506 if (EndAddress < Section.getEndAddress())
1507 Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress);
1509 // If there is already a symbol at the start of the hole, grow that symbol
1510 // to cover the rest. Otherwise, create a new symbol to cover the hole.
1511 for (std::pair<uint64_t, uint64_t> &Hole : Holes) {
1512 BinaryData *BD = getBinaryDataAtAddress(Hole.first);
1513 if (BD) {
1514 // BD->getSection() can be != Section if there are sections that
1515 // overlap. In this case it is probably safe to just skip the holes
1516 // since the overlapping section will not(?) have any symbols in it.
1517 if (BD->getSection() == Section)
1518 setBinaryDataSize(Hole.first, Hole.second);
1519 } else {
1520 getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1);
1525 assert(validateObjectNesting() && "object nesting inconsistency detected");
1526 assert(validateHoles() && "top level hole detected in object map");
1529 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const {
1530 const BinarySection *CurrentSection = nullptr;
1531 bool FirstSection = true;
1533 for (auto &Entry : BinaryDataMap) {
1534 const BinaryData *BD = Entry.second;
1535 const BinarySection &Section = BD->getSection();
1536 if (FirstSection || Section != *CurrentSection) {
1537 uint64_t Address, Size;
1538 StringRef Name = Section.getName();
1539 if (Section) {
1540 Address = Section.getAddress();
1541 Size = Section.getSize();
1542 } else {
1543 Address = BD->getAddress();
1544 Size = BD->getSize();
1546 OS << "BOLT-INFO: Section " << Name << ", "
1547 << "0x" + Twine::utohexstr(Address) << ":"
1548 << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n";
1549 CurrentSection = &Section;
1550 FirstSection = false;
1553 OS << "BOLT-INFO: ";
1554 const BinaryData *P = BD->getParent();
1555 while (P) {
1556 OS << " ";
1557 P = P->getParent();
1559 OS << *BD << "\n";
1563 Expected<unsigned> BinaryContext::getDwarfFile(
1564 StringRef Directory, StringRef FileName, unsigned FileNumber,
1565 std::optional<MD5::MD5Result> Checksum, std::optional<StringRef> Source,
1566 unsigned CUID, unsigned DWARFVersion) {
1567 DwarfLineTable &Table = DwarfLineTablesCUMap[CUID];
1568 return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion,
1569 FileNumber);
1572 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
1573 const uint32_t SrcCUID,
1574 unsigned FileIndex) {
1575 DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID);
1576 const DWARFDebugLine::LineTable *LineTable =
1577 DwCtx->getLineTableForUnit(SrcUnit);
1578 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1579 LineTable->Prologue.FileNames;
1580 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1581 // means empty dir.
1582 assert(FileIndex > 0 && FileIndex <= FileNames.size() &&
1583 "FileIndex out of range for the compilation unit.");
1584 StringRef Dir = "";
1585 if (FileNames[FileIndex - 1].DirIdx != 0) {
1586 if (std::optional<const char *> DirName = dwarf::toString(
1587 LineTable->Prologue
1588 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) {
1589 Dir = *DirName;
1592 StringRef FileName = "";
1593 if (std::optional<const char *> FName =
1594 dwarf::toString(FileNames[FileIndex - 1].Name))
1595 FileName = *FName;
1596 assert(FileName != "");
1597 DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID);
1598 return cantFail(getDwarfFile(Dir, FileName, 0, std::nullopt, std::nullopt,
1599 DestCUID, DstUnit->getVersion()));
1602 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() {
1603 std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size());
1604 llvm::transform(llvm::make_second_range(BinaryFunctions),
1605 SortedFunctions.begin(),
1606 [](BinaryFunction &BF) { return &BF; });
1608 llvm::stable_sort(SortedFunctions,
1609 [](const BinaryFunction *A, const BinaryFunction *B) {
1610 if (A->hasValidIndex() && B->hasValidIndex()) {
1611 return A->getIndex() < B->getIndex();
1613 return A->hasValidIndex();
1615 return SortedFunctions;
1618 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() {
1619 std::vector<BinaryFunction *> AllFunctions;
1620 AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size());
1621 llvm::transform(llvm::make_second_range(BinaryFunctions),
1622 std::back_inserter(AllFunctions),
1623 [](BinaryFunction &BF) { return &BF; });
1624 llvm::copy(InjectedBinaryFunctions, std::back_inserter(AllFunctions));
1626 return AllFunctions;
1629 std::optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) {
1630 auto Iter = DWOCUs.find(DWOId);
1631 if (Iter == DWOCUs.end())
1632 return std::nullopt;
1634 return Iter->second;
1637 DWARFContext *BinaryContext::getDWOContext() const {
1638 if (DWOCUs.empty())
1639 return nullptr;
1640 return &DWOCUs.begin()->second->getContext();
1643 /// Handles DWO sections that can either be in .o, .dwo or .dwp files.
1644 void BinaryContext::preprocessDWODebugInfo() {
1645 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1646 DWARFUnit *const DwarfUnit = CU.get();
1647 if (std::optional<uint64_t> DWOId = DwarfUnit->getDWOId()) {
1648 std::string DWOName = dwarf::toString(
1649 DwarfUnit->getUnitDIE().find(
1650 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
1651 "");
1652 SmallString<16> AbsolutePath;
1653 if (!opts::CompDirOverride.empty()) {
1654 sys::path::append(AbsolutePath, opts::CompDirOverride);
1655 sys::path::append(AbsolutePath, DWOName);
1657 DWARFUnit *DWOCU =
1658 DwarfUnit->getNonSkeletonUnitDIE(false, AbsolutePath).getDwarfUnit();
1659 if (!DWOCU->isDWOUnit()) {
1660 this->outs()
1661 << "BOLT-WARNING: Debug Fission: DWO debug information for "
1662 << DWOName
1663 << " was not retrieved and won't be updated. Please check "
1664 "relative path.\n";
1665 continue;
1667 DWOCUs[*DWOId] = DWOCU;
1670 if (!DWOCUs.empty())
1671 this->outs() << "BOLT-INFO: processing split DWARF\n";
1674 void BinaryContext::preprocessDebugInfo() {
1675 struct CURange {
1676 uint64_t LowPC;
1677 uint64_t HighPC;
1678 DWARFUnit *Unit;
1680 bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; }
1683 // Building a map of address ranges to CUs similar to .debug_aranges and use
1684 // it to assign CU to functions.
1685 std::vector<CURange> AllRanges;
1686 AllRanges.reserve(DwCtx->getNumCompileUnits());
1687 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1688 Expected<DWARFAddressRangesVector> RangesOrError =
1689 CU->getUnitDIE().getAddressRanges();
1690 if (!RangesOrError) {
1691 consumeError(RangesOrError.takeError());
1692 continue;
1694 for (DWARFAddressRange &Range : *RangesOrError) {
1695 // Parts of the debug info could be invalidated due to corresponding code
1696 // being removed from the binary by the linker. Hence we check if the
1697 // address is a valid one.
1698 if (containsAddress(Range.LowPC))
1699 AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()});
1702 ContainsDwarf5 |= CU->getVersion() >= 5;
1703 ContainsDwarfLegacy |= CU->getVersion() < 5;
1706 llvm::sort(AllRanges);
1707 for (auto &KV : BinaryFunctions) {
1708 const uint64_t FunctionAddress = KV.first;
1709 BinaryFunction &Function = KV.second;
1711 auto It = llvm::partition_point(
1712 AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; });
1713 if (It != AllRanges.end() && It->LowPC <= FunctionAddress)
1714 Function.setDWARFUnit(It->Unit);
1717 // Discover units with debug info that needs to be updated.
1718 for (const auto &KV : BinaryFunctions) {
1719 const BinaryFunction &BF = KV.second;
1720 if (shouldEmit(BF) && BF.getDWARFUnit())
1721 ProcessedCUs.insert(BF.getDWARFUnit());
1724 // Clear debug info for functions from units that we are not going to process.
1725 for (auto &KV : BinaryFunctions) {
1726 BinaryFunction &BF = KV.second;
1727 if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit()))
1728 BF.setDWARFUnit(nullptr);
1731 if (opts::Verbosity >= 1) {
1732 this->outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of "
1733 << DwCtx->getNumCompileUnits() << " CUs will be updated\n";
1736 preprocessDWODebugInfo();
1738 // Populate MCContext with DWARF files from all units.
1739 StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix();
1740 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1741 const uint64_t CUID = CU->getOffset();
1742 DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID);
1743 BinaryLineTable.setLabel(Ctx->getOrCreateSymbol(
1744 GlobalPrefix + "line_table_start" + Twine(CUID)));
1746 if (!ProcessedCUs.count(CU.get()))
1747 continue;
1749 const DWARFDebugLine::LineTable *LineTable =
1750 DwCtx->getLineTableForUnit(CU.get());
1751 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1752 LineTable->Prologue.FileNames;
1754 uint16_t DwarfVersion = LineTable->Prologue.getVersion();
1755 if (DwarfVersion >= 5) {
1756 std::optional<MD5::MD5Result> Checksum;
1757 if (LineTable->Prologue.ContentTypes.HasMD5)
1758 Checksum = LineTable->Prologue.FileNames[0].Checksum;
1759 std::optional<const char *> Name =
1760 dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1761 if (std::optional<uint64_t> DWOID = CU->getDWOId()) {
1762 auto Iter = DWOCUs.find(*DWOID);
1763 assert(Iter != DWOCUs.end() && "DWO CU was not found.");
1764 Name = dwarf::toString(
1765 Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1767 BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum,
1768 std::nullopt);
1771 BinaryLineTable.setDwarfVersion(DwarfVersion);
1773 // Assign a unique label to every line table, one per CU.
1774 // Make sure empty debug line tables are registered too.
1775 if (FileNames.empty()) {
1776 cantFail(getDwarfFile("", "<unknown>", 0, std::nullopt, std::nullopt,
1777 CUID, DwarfVersion));
1778 continue;
1780 const uint32_t Offset = DwarfVersion < 5 ? 1 : 0;
1781 for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) {
1782 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1783 // means empty dir.
1784 StringRef Dir = "";
1785 if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5)
1786 if (std::optional<const char *> DirName = dwarf::toString(
1787 LineTable->Prologue
1788 .IncludeDirectories[FileNames[I].DirIdx - Offset]))
1789 Dir = *DirName;
1790 StringRef FileName = "";
1791 if (std::optional<const char *> FName =
1792 dwarf::toString(FileNames[I].Name))
1793 FileName = *FName;
1794 assert(FileName != "");
1795 std::optional<MD5::MD5Result> Checksum;
1796 if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5)
1797 Checksum = LineTable->Prologue.FileNames[I].Checksum;
1798 cantFail(getDwarfFile(Dir, FileName, 0, Checksum, std::nullopt, CUID,
1799 DwarfVersion));
1804 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const {
1805 if (Function.isPseudo())
1806 return false;
1808 if (opts::processAllFunctions())
1809 return true;
1811 if (Function.isIgnored())
1812 return false;
1814 // In relocation mode we will emit non-simple functions with CFG.
1815 // If the function does not have a CFG it should be marked as ignored.
1816 return HasRelocations || Function.isSimple();
1819 void BinaryContext::dump(const MCInst &Inst) const {
1820 if (LLVM_UNLIKELY(!InstPrinter)) {
1821 dbgs() << "Cannot dump for InstPrinter is not initialized.\n";
1822 return;
1824 InstPrinter->printInst(&Inst, 0, "", *STI, dbgs());
1825 dbgs() << "\n";
1828 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
1829 uint32_t Operation = Inst.getOperation();
1830 switch (Operation) {
1831 case MCCFIInstruction::OpSameValue:
1832 OS << "OpSameValue Reg" << Inst.getRegister();
1833 break;
1834 case MCCFIInstruction::OpRememberState:
1835 OS << "OpRememberState";
1836 break;
1837 case MCCFIInstruction::OpRestoreState:
1838 OS << "OpRestoreState";
1839 break;
1840 case MCCFIInstruction::OpOffset:
1841 OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1842 break;
1843 case MCCFIInstruction::OpDefCfaRegister:
1844 OS << "OpDefCfaRegister Reg" << Inst.getRegister();
1845 break;
1846 case MCCFIInstruction::OpDefCfaOffset:
1847 OS << "OpDefCfaOffset " << Inst.getOffset();
1848 break;
1849 case MCCFIInstruction::OpDefCfa:
1850 OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset();
1851 break;
1852 case MCCFIInstruction::OpRelOffset:
1853 OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1854 break;
1855 case MCCFIInstruction::OpAdjustCfaOffset:
1856 OS << "OfAdjustCfaOffset " << Inst.getOffset();
1857 break;
1858 case MCCFIInstruction::OpEscape:
1859 OS << "OpEscape";
1860 break;
1861 case MCCFIInstruction::OpRestore:
1862 OS << "OpRestore Reg" << Inst.getRegister();
1863 break;
1864 case MCCFIInstruction::OpUndefined:
1865 OS << "OpUndefined Reg" << Inst.getRegister();
1866 break;
1867 case MCCFIInstruction::OpRegister:
1868 OS << "OpRegister Reg" << Inst.getRegister() << " Reg"
1869 << Inst.getRegister2();
1870 break;
1871 case MCCFIInstruction::OpWindowSave:
1872 OS << "OpWindowSave";
1873 break;
1874 case MCCFIInstruction::OpGnuArgsSize:
1875 OS << "OpGnuArgsSize";
1876 break;
1877 default:
1878 OS << "Op#" << Operation;
1879 break;
1883 MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const {
1884 // For aarch64 and riscv, the ABI defines mapping symbols so we identify data
1885 // in the code section (see IHI0056B). $x identifies a symbol starting code or
1886 // the end of a data chunk inside code, $d identifies start of data.
1887 if (isX86() || ELFSymbolRef(Symbol).getSize())
1888 return MarkerSymType::NONE;
1890 Expected<StringRef> NameOrError = Symbol.getName();
1891 Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType();
1893 if (!TypeOrError || !NameOrError)
1894 return MarkerSymType::NONE;
1896 if (*TypeOrError != SymbolRef::ST_Unknown)
1897 return MarkerSymType::NONE;
1899 if (*NameOrError == "$x" || NameOrError->starts_with("$x."))
1900 return MarkerSymType::CODE;
1902 // $x<ISA>
1903 if (isRISCV() && NameOrError->starts_with("$x"))
1904 return MarkerSymType::CODE;
1906 if (*NameOrError == "$d" || NameOrError->starts_with("$d."))
1907 return MarkerSymType::DATA;
1909 return MarkerSymType::NONE;
1912 bool BinaryContext::isMarker(const SymbolRef &Symbol) const {
1913 return getMarkerType(Symbol) != MarkerSymType::NONE;
1916 static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction,
1917 const BinaryFunction *Function,
1918 DWARFContext *DwCtx) {
1919 DebugLineTableRowRef RowRef =
1920 DebugLineTableRowRef::fromSMLoc(Instruction.getLoc());
1921 if (RowRef == DebugLineTableRowRef::NULL_ROW)
1922 return;
1924 const DWARFDebugLine::LineTable *LineTable;
1925 if (Function && Function->getDWARFUnit() &&
1926 Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) {
1927 LineTable = Function->getDWARFLineTable();
1928 } else {
1929 LineTable = DwCtx->getLineTableForUnit(
1930 DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex));
1932 assert(LineTable && "line table expected for instruction with debug info");
1934 const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1];
1935 StringRef FileName = "";
1936 if (std::optional<const char *> FName =
1937 dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name))
1938 FileName = *FName;
1939 OS << " # debug line " << FileName << ":" << Row.Line;
1940 if (Row.Column)
1941 OS << ":" << Row.Column;
1942 if (Row.Discriminator)
1943 OS << " discriminator:" << Row.Discriminator;
1946 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
1947 uint64_t Offset,
1948 const BinaryFunction *Function,
1949 bool PrintMCInst, bool PrintMemData,
1950 bool PrintRelocations,
1951 StringRef Endl) const {
1952 OS << format(" %08" PRIx64 ": ", Offset);
1953 if (MIB->isCFI(Instruction)) {
1954 uint32_t Offset = Instruction.getOperand(0).getImm();
1955 OS << "\t!CFI\t$" << Offset << "\t; ";
1956 if (Function)
1957 printCFI(OS, *Function->getCFIFor(Instruction));
1958 OS << Endl;
1959 return;
1961 if (std::optional<uint32_t> DynamicID =
1962 MIB->getDynamicBranchID(Instruction)) {
1963 OS << "\tjit\t" << MIB->getTargetSymbol(Instruction)->getName()
1964 << " # ID: " << DynamicID;
1965 } else {
1966 InstPrinter->printInst(&Instruction, 0, "", *STI, OS);
1968 if (MIB->isCall(Instruction)) {
1969 if (MIB->isTailCall(Instruction))
1970 OS << " # TAILCALL ";
1971 if (MIB->isInvoke(Instruction)) {
1972 const std::optional<MCPlus::MCLandingPad> EHInfo =
1973 MIB->getEHInfo(Instruction);
1974 OS << " # handler: ";
1975 if (EHInfo->first)
1976 OS << *EHInfo->first;
1977 else
1978 OS << '0';
1979 OS << "; action: " << EHInfo->second;
1980 const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction);
1981 if (GnuArgsSize >= 0)
1982 OS << "; GNU_args_size = " << GnuArgsSize;
1984 } else if (MIB->isIndirectBranch(Instruction)) {
1985 if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) {
1986 OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress);
1987 } else {
1988 OS << " # UNKNOWN CONTROL FLOW";
1991 if (std::optional<uint32_t> Offset = MIB->getOffset(Instruction))
1992 OS << " # Offset: " << *Offset;
1993 if (std::optional<uint32_t> Size = MIB->getSize(Instruction))
1994 OS << " # Size: " << *Size;
1995 if (MCSymbol *Label = MIB->getInstLabel(Instruction))
1996 OS << " # Label: " << *Label;
1998 MIB->printAnnotations(Instruction, OS);
2000 if (opts::PrintDebugInfo)
2001 printDebugInfo(OS, Instruction, Function, DwCtx.get());
2003 if ((opts::PrintRelocations || PrintRelocations) && Function) {
2004 const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1);
2005 Function->printRelocations(OS, Offset, Size);
2008 OS << Endl;
2010 if (PrintMCInst) {
2011 Instruction.dump_pretty(OS, InstPrinter.get());
2012 OS << Endl;
2016 std::optional<uint64_t>
2017 BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress,
2018 uint64_t FileOffset) const {
2019 // Find a segment with a matching file offset.
2020 for (auto &KV : SegmentMapInfo) {
2021 const SegmentInfo &SegInfo = KV.second;
2022 // FileOffset is got from perf event,
2023 // and it is equal to alignDown(SegInfo.FileOffset, pagesize).
2024 // If the pagesize is not equal to SegInfo.Alignment.
2025 // FileOffset and SegInfo.FileOffset should be aligned first,
2026 // and then judge whether they are equal.
2027 if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) ==
2028 alignDown(FileOffset, SegInfo.Alignment)) {
2029 // The function's offset from base address in VAS is aligned by pagesize
2030 // instead of SegInfo.Alignment. Pagesize can't be got from perf events.
2031 // However, The ELF document says that SegInfo.FileOffset should equal
2032 // to SegInfo.Address, modulo the pagesize.
2033 // Reference: https://refspecs.linuxfoundation.org/elf/elf.pdf
2035 // So alignDown(SegInfo.Address, pagesize) can be calculated by:
2036 // alignDown(SegInfo.Address, pagesize)
2037 // = SegInfo.Address - (SegInfo.Address % pagesize)
2038 // = SegInfo.Address - (SegInfo.FileOffset % pagesize)
2039 // = SegInfo.Address - SegInfo.FileOffset +
2040 // alignDown(SegInfo.FileOffset, pagesize)
2041 // = SegInfo.Address - SegInfo.FileOffset + FileOffset
2042 return MMapAddress - (SegInfo.Address - SegInfo.FileOffset + FileOffset);
2046 return std::nullopt;
2049 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) {
2050 auto SI = AddressToSection.upper_bound(Address);
2051 if (SI != AddressToSection.begin()) {
2052 --SI;
2053 uint64_t UpperBound = SI->first + SI->second->getSize();
2054 if (!SI->second->getSize())
2055 UpperBound += 1;
2056 if (UpperBound > Address)
2057 return *SI->second;
2059 return std::make_error_code(std::errc::bad_address);
2062 ErrorOr<StringRef>
2063 BinaryContext::getSectionNameForAddress(uint64_t Address) const {
2064 if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address))
2065 return Section->getName();
2066 return std::make_error_code(std::errc::bad_address);
2069 BinarySection &BinaryContext::registerSection(BinarySection *Section) {
2070 auto Res = Sections.insert(Section);
2071 (void)Res;
2072 assert(Res.second && "can't register the same section twice.");
2074 // Only register allocatable sections in the AddressToSection map.
2075 if (Section->isAllocatable() && Section->getAddress())
2076 AddressToSection.insert(std::make_pair(Section->getAddress(), Section));
2077 NameToSection.insert(
2078 std::make_pair(std::string(Section->getName()), Section));
2079 if (Section->hasSectionRef())
2080 SectionRefToBinarySection.insert(
2081 std::make_pair(Section->getSectionRef(), Section));
2083 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n");
2084 return *Section;
2087 BinarySection &BinaryContext::registerSection(SectionRef Section) {
2088 return registerSection(new BinarySection(*this, Section));
2091 BinarySection &
2092 BinaryContext::registerSection(const Twine &SectionName,
2093 const BinarySection &OriginalSection) {
2094 return registerSection(
2095 new BinarySection(*this, SectionName, OriginalSection));
2098 BinarySection &
2099 BinaryContext::registerOrUpdateSection(const Twine &Name, unsigned ELFType,
2100 unsigned ELFFlags, uint8_t *Data,
2101 uint64_t Size, unsigned Alignment) {
2102 auto NamedSections = getSectionByName(Name);
2103 if (NamedSections.begin() != NamedSections.end()) {
2104 assert(std::next(NamedSections.begin()) == NamedSections.end() &&
2105 "can only update unique sections");
2106 BinarySection *Section = NamedSections.begin()->second;
2108 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> ");
2109 const bool Flag = Section->isAllocatable();
2110 (void)Flag;
2111 Section->update(Data, Size, Alignment, ELFType, ELFFlags);
2112 LLVM_DEBUG(dbgs() << *Section << "\n");
2113 // FIXME: Fix section flags/attributes for MachO.
2114 if (isELF())
2115 assert(Flag == Section->isAllocatable() &&
2116 "can't change section allocation status");
2117 return *Section;
2120 return registerSection(
2121 new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags));
2124 void BinaryContext::deregisterSectionName(const BinarySection &Section) {
2125 auto NameRange = NameToSection.equal_range(Section.getName().str());
2126 while (NameRange.first != NameRange.second) {
2127 if (NameRange.first->second == &Section) {
2128 NameToSection.erase(NameRange.first);
2129 break;
2131 ++NameRange.first;
2135 void BinaryContext::deregisterUnusedSections() {
2136 ErrorOr<BinarySection &> AbsSection = getUniqueSectionByName("<absolute>");
2137 for (auto SI = Sections.begin(); SI != Sections.end();) {
2138 BinarySection *Section = *SI;
2139 // We check getOutputData() instead of getOutputSize() because sometimes
2140 // zero-sized .text.cold sections are allocated.
2141 if (Section->hasSectionRef() || Section->getOutputData() ||
2142 (AbsSection && Section == &AbsSection.get())) {
2143 ++SI;
2144 continue;
2147 LLVM_DEBUG(dbgs() << "LLVM-DEBUG: deregistering " << Section->getName()
2148 << '\n';);
2149 deregisterSectionName(*Section);
2150 SI = Sections.erase(SI);
2151 delete Section;
2155 bool BinaryContext::deregisterSection(BinarySection &Section) {
2156 BinarySection *SectionPtr = &Section;
2157 auto Itr = Sections.find(SectionPtr);
2158 if (Itr != Sections.end()) {
2159 auto Range = AddressToSection.equal_range(SectionPtr->getAddress());
2160 while (Range.first != Range.second) {
2161 if (Range.first->second == SectionPtr) {
2162 AddressToSection.erase(Range.first);
2163 break;
2165 ++Range.first;
2168 deregisterSectionName(*SectionPtr);
2169 Sections.erase(Itr);
2170 delete SectionPtr;
2171 return true;
2173 return false;
2176 void BinaryContext::renameSection(BinarySection &Section,
2177 const Twine &NewName) {
2178 auto Itr = Sections.find(&Section);
2179 assert(Itr != Sections.end() && "Section must exist to be renamed.");
2180 Sections.erase(Itr);
2182 deregisterSectionName(Section);
2184 Section.Name = NewName.str();
2185 Section.setOutputName(Section.Name);
2187 NameToSection.insert(std::make_pair(Section.Name, &Section));
2189 // Reinsert with the new name.
2190 Sections.insert(&Section);
2193 void BinaryContext::printSections(raw_ostream &OS) const {
2194 for (BinarySection *const &Section : Sections)
2195 OS << "BOLT-INFO: " << *Section << "\n";
2198 BinarySection &BinaryContext::absoluteSection() {
2199 if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>"))
2200 return *Section;
2201 return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u);
2204 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address,
2205 size_t Size) const {
2206 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2207 if (!Section)
2208 return std::make_error_code(std::errc::bad_address);
2210 if (Section->isVirtual())
2211 return 0;
2213 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
2214 AsmInfo->getCodePointerSize());
2215 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
2216 return DE.getUnsigned(&ValueOffset, Size);
2219 ErrorOr<int64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address,
2220 size_t Size) const {
2221 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2222 if (!Section)
2223 return std::make_error_code(std::errc::bad_address);
2225 if (Section->isVirtual())
2226 return 0;
2228 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
2229 AsmInfo->getCodePointerSize());
2230 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
2231 return DE.getSigned(&ValueOffset, Size);
2234 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol,
2235 uint64_t Type, uint64_t Addend,
2236 uint64_t Value) {
2237 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2238 assert(Section && "cannot find section for address");
2239 Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend,
2240 Value);
2243 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol,
2244 uint64_t Type, uint64_t Addend,
2245 uint64_t Value) {
2246 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2247 assert(Section && "cannot find section for address");
2248 Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type,
2249 Addend, Value);
2252 bool BinaryContext::removeRelocationAt(uint64_t Address) {
2253 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2254 assert(Section && "cannot find section for address");
2255 return Section->removeRelocationAt(Address - Section->getAddress());
2258 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) const {
2259 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2260 if (!Section)
2261 return nullptr;
2263 return Section->getRelocationAt(Address - Section->getAddress());
2266 const Relocation *
2267 BinaryContext::getDynamicRelocationAt(uint64_t Address) const {
2268 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2269 if (!Section)
2270 return nullptr;
2272 return Section->getDynamicRelocationAt(Address - Section->getAddress());
2275 void BinaryContext::markAmbiguousRelocations(BinaryData &BD,
2276 const uint64_t Address) {
2277 auto setImmovable = [&](BinaryData &BD) {
2278 BinaryData *Root = BD.getAtomicRoot();
2279 LLVM_DEBUG(if (Root->isMoveable()) {
2280 dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable "
2281 << "due to ambiguous relocation referencing 0x"
2282 << Twine::utohexstr(Address) << '\n';
2284 Root->setIsMoveable(false);
2287 if (Address == BD.getAddress()) {
2288 setImmovable(BD);
2290 // Set previous symbol as immovable
2291 BinaryData *Prev = getBinaryDataContainingAddress(Address - 1);
2292 if (Prev && Prev->getEndAddress() == BD.getAddress())
2293 setImmovable(*Prev);
2296 if (Address == BD.getEndAddress()) {
2297 setImmovable(BD);
2299 // Set next symbol as immovable
2300 BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress());
2301 if (Next && Next->getAddress() == BD.getEndAddress())
2302 setImmovable(*Next);
2306 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol,
2307 uint64_t *EntryDesc) {
2308 std::shared_lock<llvm::sys::RWMutex> Lock(SymbolToFunctionMapMutex);
2309 auto BFI = SymbolToFunctionMap.find(Symbol);
2310 if (BFI == SymbolToFunctionMap.end())
2311 return nullptr;
2313 BinaryFunction *BF = BFI->second;
2314 if (EntryDesc)
2315 *EntryDesc = BF->getEntryIDForSymbol(Symbol);
2317 return BF;
2320 std::string
2321 BinaryContext::generateBugReportMessage(StringRef Message,
2322 const BinaryFunction &Function) const {
2323 std::string Msg;
2324 raw_string_ostream SS(Msg);
2325 SS << "=======================================\n";
2326 SS << "BOLT is unable to proceed because it couldn't properly understand "
2327 "this function.\n";
2328 SS << "If you are running the most recent version of BOLT, you may "
2329 "want to "
2330 "report this and paste this dump.\nPlease check that there is no "
2331 "sensitive contents being shared in this dump.\n";
2332 SS << "\nOffending function: " << Function.getPrintName() << "\n\n";
2333 ScopedPrinter SP(SS);
2334 SP.printBinaryBlock("Function contents", *Function.getData());
2335 SS << "\n";
2336 const_cast<BinaryFunction &>(Function).print(SS, "");
2337 SS << "ERROR: " << Message;
2338 SS << "\n=======================================\n";
2339 return Msg;
2342 BinaryFunction *
2343 BinaryContext::createInjectedBinaryFunction(const std::string &Name,
2344 bool IsSimple) {
2345 InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple));
2346 BinaryFunction *BF = InjectedBinaryFunctions.back();
2347 setSymbolToFunctionMap(BF->getSymbol(), BF);
2348 BF->CurrentState = BinaryFunction::State::CFG;
2349 return BF;
2352 std::pair<size_t, size_t>
2353 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
2354 // Adjust branch instruction to match the current layout.
2355 if (FixBranches)
2356 BF.fixBranches();
2358 // Create local MC context to isolate the effect of ephemeral code emission.
2359 IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter();
2360 MCContext *LocalCtx = MCEInstance.LocalCtx.get();
2361 MCAsmBackend *MAB =
2362 TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions());
2364 SmallString<256> Code;
2365 raw_svector_ostream VecOS(Code);
2367 std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS);
2368 std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
2369 *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW),
2370 std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI));
2372 Streamer->initSections(false, *STI);
2374 MCSection *Section = MCEInstance.LocalMOFI->getTextSection();
2375 Section->setHasInstructions(true);
2377 // Create symbols in the LocalCtx so that they get destroyed with it.
2378 MCSymbol *StartLabel = LocalCtx->createTempSymbol();
2379 MCSymbol *EndLabel = LocalCtx->createTempSymbol();
2381 Streamer->switchSection(Section);
2382 Streamer->emitLabel(StartLabel);
2383 emitFunctionBody(*Streamer, BF, BF.getLayout().getMainFragment(),
2384 /*EmitCodeOnly=*/true);
2385 Streamer->emitLabel(EndLabel);
2387 using LabelRange = std::pair<const MCSymbol *, const MCSymbol *>;
2388 SmallVector<LabelRange> SplitLabels;
2389 for (FunctionFragment &FF : BF.getLayout().getSplitFragments()) {
2390 MCSymbol *const SplitStartLabel = LocalCtx->createTempSymbol();
2391 MCSymbol *const SplitEndLabel = LocalCtx->createTempSymbol();
2392 SplitLabels.emplace_back(SplitStartLabel, SplitEndLabel);
2394 MCSectionELF *const SplitSection = LocalCtx->getELFSection(
2395 BF.getCodeSectionName(FF.getFragmentNum()), ELF::SHT_PROGBITS,
2396 ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
2397 SplitSection->setHasInstructions(true);
2398 Streamer->switchSection(SplitSection);
2400 Streamer->emitLabel(SplitStartLabel);
2401 emitFunctionBody(*Streamer, BF, FF, /*EmitCodeOnly=*/true);
2402 Streamer->emitLabel(SplitEndLabel);
2405 MCAssembler &Assembler =
2406 static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler();
2407 Assembler.layout();
2409 // Obtain fragment sizes.
2410 std::vector<uint64_t> FragmentSizes;
2411 // Main fragment size.
2412 const uint64_t HotSize = Assembler.getSymbolOffset(*EndLabel) -
2413 Assembler.getSymbolOffset(*StartLabel);
2414 FragmentSizes.push_back(HotSize);
2415 // Split fragment sizes.
2416 uint64_t ColdSize = 0;
2417 for (const auto &Labels : SplitLabels) {
2418 uint64_t Size = Assembler.getSymbolOffset(*Labels.second) -
2419 Assembler.getSymbolOffset(*Labels.first);
2420 FragmentSizes.push_back(Size);
2421 ColdSize += Size;
2424 // Populate new start and end offsets of each basic block.
2425 uint64_t FragmentIndex = 0;
2426 for (FunctionFragment &FF : BF.getLayout().fragments()) {
2427 BinaryBasicBlock *PrevBB = nullptr;
2428 for (BinaryBasicBlock *BB : FF) {
2429 const uint64_t BBStartOffset =
2430 Assembler.getSymbolOffset(*(BB->getLabel()));
2431 BB->setOutputStartAddress(BBStartOffset);
2432 if (PrevBB)
2433 PrevBB->setOutputEndAddress(BBStartOffset);
2434 PrevBB = BB;
2436 if (PrevBB)
2437 PrevBB->setOutputEndAddress(FragmentSizes[FragmentIndex]);
2438 FragmentIndex++;
2441 // Clean-up the effect of the code emission.
2442 for (const MCSymbol &Symbol : Assembler.symbols()) {
2443 MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol);
2444 MutableSymbol->setUndefined();
2445 MutableSymbol->setIsRegistered(false);
2448 return std::make_pair(HotSize, ColdSize);
2451 bool BinaryContext::validateInstructionEncoding(
2452 ArrayRef<uint8_t> InputSequence) const {
2453 MCInst Inst;
2454 uint64_t InstSize;
2455 DisAsm->getInstruction(Inst, InstSize, InputSequence, 0, nulls());
2456 assert(InstSize == InputSequence.size() &&
2457 "Disassembled instruction size does not match the sequence.");
2459 SmallString<256> Code;
2460 SmallVector<MCFixup, 4> Fixups;
2462 MCE->encodeInstruction(Inst, Code, Fixups, *STI);
2463 auto OutputSequence = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size());
2464 if (InputSequence != OutputSequence) {
2465 if (opts::Verbosity > 1) {
2466 this->errs() << "BOLT-WARNING: mismatched encoding detected\n"
2467 << " input: " << InputSequence << '\n'
2468 << " output: " << OutputSequence << '\n';
2470 return false;
2473 return true;
2476 uint64_t BinaryContext::getHotThreshold() const {
2477 static uint64_t Threshold = 0;
2478 if (Threshold == 0) {
2479 Threshold = std::max(
2480 (uint64_t)opts::ExecutionCountThreshold,
2481 NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1);
2483 return Threshold;
2486 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress(
2487 uint64_t Address, bool CheckPastEnd, bool UseMaxSize) {
2488 auto FI = BinaryFunctions.upper_bound(Address);
2489 if (FI == BinaryFunctions.begin())
2490 return nullptr;
2491 --FI;
2493 const uint64_t UsedSize =
2494 UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize();
2496 if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0))
2497 return nullptr;
2499 return &FI->second;
2502 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) {
2503 // First, try to find a function starting at the given address. If the
2504 // function was folded, this will get us the original folded function if it
2505 // wasn't removed from the list, e.g. in non-relocation mode.
2506 auto BFI = BinaryFunctions.find(Address);
2507 if (BFI != BinaryFunctions.end())
2508 return &BFI->second;
2510 // We might have folded the function matching the object at the given
2511 // address. In such case, we look for a function matching the symbol
2512 // registered at the original address. The new function (the one that the
2513 // original was folded into) will hold the symbol.
2514 if (const BinaryData *BD = getBinaryDataAtAddress(Address)) {
2515 uint64_t EntryID = 0;
2516 BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID);
2517 if (BF && EntryID == 0)
2518 return BF;
2520 return nullptr;
2523 /// Deregister JumpTable registered at a given \p Address and delete it.
2524 void BinaryContext::deleteJumpTable(uint64_t Address) {
2525 assert(JumpTables.count(Address) && "Must have a jump table at address");
2526 JumpTable *JT = JumpTables.at(Address);
2527 for (BinaryFunction *Parent : JT->Parents)
2528 Parent->JumpTables.erase(Address);
2529 JumpTables.erase(Address);
2530 delete JT;
2533 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges(
2534 const DWARFAddressRangesVector &InputRanges) const {
2535 DebugAddressRangesVector OutputRanges;
2537 for (const DWARFAddressRange Range : InputRanges) {
2538 auto BFI = BinaryFunctions.lower_bound(Range.LowPC);
2539 while (BFI != BinaryFunctions.end()) {
2540 const BinaryFunction &Function = BFI->second;
2541 if (Function.getAddress() >= Range.HighPC)
2542 break;
2543 const DebugAddressRangesVector FunctionRanges =
2544 Function.getOutputAddressRanges();
2545 llvm::move(FunctionRanges, std::back_inserter(OutputRanges));
2546 std::advance(BFI, 1);
2550 return OutputRanges;
2553 } // namespace bolt
2554 } // namespace llvm