[nfc][Driver] Remove {{(.exe)?}} from sanitizer test (#121160)
[llvm-project.git] / bolt / lib / Core / BinaryContext.cpp
blobf5e11358daaa32b4ff521246b7cdace01709da38
1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the BinaryContext class.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Core/BinaryContext.h"
14 #include "bolt/Core/BinaryEmitter.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "bolt/Utils/Utils.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
21 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
22 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
23 #include "llvm/MC/MCAssembler.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
26 #include "llvm/MC/MCInstPrinter.h"
27 #include "llvm/MC/MCObjectStreamer.h"
28 #include "llvm/MC/MCObjectWriter.h"
29 #include "llvm/MC/MCRegisterInfo.h"
30 #include "llvm/MC/MCSectionELF.h"
31 #include "llvm/MC/MCStreamer.h"
32 #include "llvm/MC/MCSubtargetInfo.h"
33 #include "llvm/MC/MCSymbol.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Error.h"
36 #include "llvm/Support/Regex.h"
37 #include <algorithm>
38 #include <functional>
39 #include <iterator>
40 #include <unordered_set>
42 using namespace llvm;
44 #undef DEBUG_TYPE
45 #define DEBUG_TYPE "bolt"
47 namespace opts {
49 cl::opt<bool> NoHugePages("no-huge-pages",
50 cl::desc("use regular size pages for code alignment"),
51 cl::Hidden, cl::cat(BoltCategory));
53 static cl::opt<bool>
54 PrintDebugInfo("print-debug-info",
55 cl::desc("print debug info when printing functions"),
56 cl::Hidden,
57 cl::ZeroOrMore,
58 cl::cat(BoltCategory));
60 cl::opt<bool> PrintRelocations(
61 "print-relocations",
62 cl::desc("print relocations when printing functions/objects"), cl::Hidden,
63 cl::cat(BoltCategory));
65 static cl::opt<bool>
66 PrintMemData("print-mem-data",
67 cl::desc("print memory data annotations when printing functions"),
68 cl::Hidden,
69 cl::ZeroOrMore,
70 cl::cat(BoltCategory));
72 cl::opt<std::string> CompDirOverride(
73 "comp-dir-override",
74 cl::desc("overrides DW_AT_comp_dir, and provides an alternative base "
75 "location, which is used with DW_AT_dwo_name to construct a path "
76 "to *.dwo files."),
77 cl::Hidden, cl::init(""), cl::cat(BoltCategory));
78 } // namespace opts
80 namespace llvm {
81 namespace bolt {
83 char BOLTError::ID = 0;
85 BOLTError::BOLTError(bool IsFatal, const Twine &S)
86 : IsFatal(IsFatal), Msg(S.str()) {}
88 void BOLTError::log(raw_ostream &OS) const {
89 if (IsFatal)
90 OS << "FATAL ";
91 StringRef ErrMsg = StringRef(Msg);
92 // Prepend our error prefix if it is missing
93 if (ErrMsg.empty()) {
94 OS << "BOLT-ERROR\n";
95 } else {
96 if (!ErrMsg.starts_with("BOLT-ERROR"))
97 OS << "BOLT-ERROR: ";
98 OS << ErrMsg << "\n";
102 std::error_code BOLTError::convertToErrorCode() const {
103 return inconvertibleErrorCode();
106 Error createNonFatalBOLTError(const Twine &S) {
107 return make_error<BOLTError>(/*IsFatal*/ false, S);
110 Error createFatalBOLTError(const Twine &S) {
111 return make_error<BOLTError>(/*IsFatal*/ true, S);
114 void BinaryContext::logBOLTErrorsAndQuitOnFatal(Error E) {
115 handleAllErrors(Error(std::move(E)), [&](const BOLTError &E) {
116 if (!E.getMessage().empty())
117 E.log(this->errs());
118 if (E.isFatal())
119 exit(1);
123 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx,
124 std::unique_ptr<DWARFContext> DwCtx,
125 std::unique_ptr<Triple> TheTriple,
126 std::shared_ptr<orc::SymbolStringPool> SSP,
127 const Target *TheTarget, std::string TripleName,
128 std::unique_ptr<MCCodeEmitter> MCE,
129 std::unique_ptr<MCObjectFileInfo> MOFI,
130 std::unique_ptr<const MCAsmInfo> AsmInfo,
131 std::unique_ptr<const MCInstrInfo> MII,
132 std::unique_ptr<const MCSubtargetInfo> STI,
133 std::unique_ptr<MCInstPrinter> InstPrinter,
134 std::unique_ptr<const MCInstrAnalysis> MIA,
135 std::unique_ptr<MCPlusBuilder> MIB,
136 std::unique_ptr<const MCRegisterInfo> MRI,
137 std::unique_ptr<MCDisassembler> DisAsm,
138 JournalingStreams Logger)
139 : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)),
140 TheTriple(std::move(TheTriple)), SSP(std::move(SSP)),
141 TheTarget(TheTarget), TripleName(TripleName), MCE(std::move(MCE)),
142 MOFI(std::move(MOFI)), AsmInfo(std::move(AsmInfo)), MII(std::move(MII)),
143 STI(std::move(STI)), InstPrinter(std::move(InstPrinter)),
144 MIA(std::move(MIA)), MIB(std::move(MIB)), MRI(std::move(MRI)),
145 DisAsm(std::move(DisAsm)), Logger(Logger), InitialDynoStats(isAArch64()) {
146 RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86;
147 PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize;
150 BinaryContext::~BinaryContext() {
151 for (BinarySection *Section : Sections)
152 delete Section;
153 for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions)
154 delete InjectedFunction;
155 for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables)
156 delete JTI.second;
157 clearBinaryData();
160 /// Create BinaryContext for a given architecture \p ArchName and
161 /// triple \p TripleName.
162 Expected<std::unique_ptr<BinaryContext>> BinaryContext::createBinaryContext(
163 Triple TheTriple, std::shared_ptr<orc::SymbolStringPool> SSP,
164 StringRef InputFileName, SubtargetFeatures *Features, bool IsPIC,
165 std::unique_ptr<DWARFContext> DwCtx, JournalingStreams Logger) {
166 StringRef ArchName = "";
167 std::string FeaturesStr = "";
168 switch (TheTriple.getArch()) {
169 case llvm::Triple::x86_64:
170 if (Features)
171 return createFatalBOLTError(
172 "x86_64 target does not use SubtargetFeatures");
173 ArchName = "x86-64";
174 FeaturesStr = "+nopl";
175 break;
176 case llvm::Triple::aarch64:
177 if (Features)
178 return createFatalBOLTError(
179 "AArch64 target does not use SubtargetFeatures");
180 ArchName = "aarch64";
181 FeaturesStr = "+all";
182 break;
183 case llvm::Triple::riscv64: {
184 ArchName = "riscv64";
185 if (!Features)
186 return createFatalBOLTError("RISCV target needs SubtargetFeatures");
187 // We rely on relaxation for some transformations (e.g., promoting all calls
188 // to PseudoCALL and then making JITLink relax them). Since the relax
189 // feature is not stored in the object file, we manually enable it.
190 Features->AddFeature("relax");
191 FeaturesStr = Features->getString();
192 break;
194 default:
195 return createStringError(std::errc::not_supported,
196 "BOLT-ERROR: Unrecognized machine in ELF file");
199 const std::string TripleName = TheTriple.str();
201 std::string Error;
202 const Target *TheTarget =
203 TargetRegistry::lookupTarget(std::string(ArchName), TheTriple, Error);
204 if (!TheTarget)
205 return createStringError(make_error_code(std::errc::not_supported),
206 Twine("BOLT-ERROR: ", Error));
208 std::unique_ptr<const MCRegisterInfo> MRI(
209 TheTarget->createMCRegInfo(TripleName));
210 if (!MRI)
211 return createStringError(
212 make_error_code(std::errc::not_supported),
213 Twine("BOLT-ERROR: no register info for target ", TripleName));
215 // Set up disassembler.
216 std::unique_ptr<MCAsmInfo> AsmInfo(
217 TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions()));
218 if (!AsmInfo)
219 return createStringError(
220 make_error_code(std::errc::not_supported),
221 Twine("BOLT-ERROR: no assembly info for target ", TripleName));
222 // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump
223 // we want to emit such names as using @PLT without double quotes to convey
224 // variant kind to the assembler. BOLT doesn't rely on the linker so we can
225 // override the default AsmInfo behavior to emit names the way we want.
226 AsmInfo->setAllowAtInName(true);
228 std::unique_ptr<const MCSubtargetInfo> STI(
229 TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr));
230 if (!STI)
231 return createStringError(
232 make_error_code(std::errc::not_supported),
233 Twine("BOLT-ERROR: no subtarget info for target ", TripleName));
235 std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
236 if (!MII)
237 return createStringError(
238 make_error_code(std::errc::not_supported),
239 Twine("BOLT-ERROR: no instruction info for target ", TripleName));
241 std::unique_ptr<MCContext> Ctx(
242 new MCContext(TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
243 std::unique_ptr<MCObjectFileInfo> MOFI(
244 TheTarget->createMCObjectFileInfo(*Ctx, IsPIC));
245 Ctx->setObjectFileInfo(MOFI.get());
246 // We do not support X86 Large code model. Change this in the future.
247 bool Large = false;
248 if (TheTriple.getArch() == llvm::Triple::aarch64)
249 Large = true;
250 unsigned LSDAEncoding =
251 Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
252 if (IsPIC) {
253 LSDAEncoding = dwarf::DW_EH_PE_pcrel |
254 (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
257 std::unique_ptr<MCDisassembler> DisAsm(
258 TheTarget->createMCDisassembler(*STI, *Ctx));
260 if (!DisAsm)
261 return createStringError(
262 make_error_code(std::errc::not_supported),
263 Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
265 std::unique_ptr<const MCInstrAnalysis> MIA(
266 TheTarget->createMCInstrAnalysis(MII.get()));
267 if (!MIA)
268 return createStringError(
269 make_error_code(std::errc::not_supported),
270 Twine("BOLT-ERROR: failed to create instruction analysis for target ",
271 TripleName));
273 int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
274 std::unique_ptr<MCInstPrinter> InstructionPrinter(
275 TheTarget->createMCInstPrinter(TheTriple, AsmPrinterVariant, *AsmInfo,
276 *MII, *MRI));
277 if (!InstructionPrinter)
278 return createStringError(
279 make_error_code(std::errc::not_supported),
280 Twine("BOLT-ERROR: no instruction printer for target ", TripleName));
281 InstructionPrinter->setPrintImmHex(true);
283 std::unique_ptr<MCCodeEmitter> MCE(
284 TheTarget->createMCCodeEmitter(*MII, *Ctx));
286 auto BC = std::make_unique<BinaryContext>(
287 std::move(Ctx), std::move(DwCtx), std::make_unique<Triple>(TheTriple),
288 std::move(SSP), TheTarget, std::string(TripleName), std::move(MCE),
289 std::move(MOFI), std::move(AsmInfo), std::move(MII), std::move(STI),
290 std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI),
291 std::move(DisAsm), Logger);
293 BC->LSDAEncoding = LSDAEncoding;
295 BC->MAB = std::unique_ptr<MCAsmBackend>(
296 BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions()));
298 BC->setFilename(InputFileName);
300 BC->HasFixedLoadAddress = !IsPIC;
302 BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>(
303 BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx));
305 if (!BC->SymbolicDisAsm)
306 return createStringError(
307 make_error_code(std::errc::not_supported),
308 Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
310 return std::move(BC);
313 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const {
314 if (opts::HotText &&
315 (SymbolName == "__hot_start" || SymbolName == "__hot_end"))
316 return true;
318 if (opts::HotData &&
319 (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end"))
320 return true;
322 if (SymbolName == "_end")
323 return true;
325 return false;
328 std::unique_ptr<MCObjectWriter>
329 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) {
330 return MAB->createObjectWriter(OS);
333 bool BinaryContext::validateObjectNesting() const {
334 auto Itr = BinaryDataMap.begin();
335 auto End = BinaryDataMap.end();
336 bool Valid = true;
337 while (Itr != End) {
338 auto Next = std::next(Itr);
339 while (Next != End &&
340 Itr->second->getSection() == Next->second->getSection() &&
341 Itr->second->containsRange(Next->second->getAddress(),
342 Next->second->getSize())) {
343 if (Next->second->Parent != Itr->second) {
344 this->errs() << "BOLT-WARNING: object nesting incorrect for:\n"
345 << "BOLT-WARNING: " << *Itr->second << "\n"
346 << "BOLT-WARNING: " << *Next->second << "\n";
347 Valid = false;
349 ++Next;
351 Itr = Next;
353 return Valid;
356 bool BinaryContext::validateHoles() const {
357 bool Valid = true;
358 for (BinarySection &Section : sections()) {
359 for (const Relocation &Rel : Section.relocations()) {
360 uint64_t RelAddr = Rel.Offset + Section.getAddress();
361 const BinaryData *BD = getBinaryDataContainingAddress(RelAddr);
362 if (!BD) {
363 this->errs()
364 << "BOLT-WARNING: no BinaryData found for relocation at address"
365 << " 0x" << Twine::utohexstr(RelAddr) << " in " << Section.getName()
366 << "\n";
367 Valid = false;
368 } else if (!BD->getAtomicRoot()) {
369 this->errs()
370 << "BOLT-WARNING: no atomic BinaryData found for relocation at "
371 << "address 0x" << Twine::utohexstr(RelAddr) << " in "
372 << Section.getName() << "\n";
373 Valid = false;
377 return Valid;
380 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) {
381 const uint64_t Address = GAI->second->getAddress();
382 const uint64_t Size = GAI->second->getSize();
384 auto fixParents = [&](BinaryDataMapType::iterator Itr,
385 BinaryData *NewParent) {
386 BinaryData *OldParent = Itr->second->Parent;
387 Itr->second->Parent = NewParent;
388 ++Itr;
389 while (Itr != BinaryDataMap.end() && OldParent &&
390 Itr->second->Parent == OldParent) {
391 Itr->second->Parent = NewParent;
392 ++Itr;
396 // Check if the previous symbol contains the newly added symbol.
397 if (GAI != BinaryDataMap.begin()) {
398 BinaryData *Prev = std::prev(GAI)->second;
399 while (Prev) {
400 if (Prev->getSection() == GAI->second->getSection() &&
401 Prev->containsRange(Address, Size)) {
402 fixParents(GAI, Prev);
403 } else {
404 fixParents(GAI, nullptr);
406 Prev = Prev->Parent;
410 // Check if the newly added symbol contains any subsequent symbols.
411 if (Size != 0) {
412 BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second;
413 auto Itr = std::next(GAI);
414 while (
415 Itr != BinaryDataMap.end() &&
416 BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) {
417 Itr->second->Parent = BD;
418 ++Itr;
423 iterator_range<BinaryContext::binary_data_iterator>
424 BinaryContext::getSubBinaryData(BinaryData *BD) {
425 auto Start = std::next(BinaryDataMap.find(BD->getAddress()));
426 auto End = Start;
427 while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second))
428 ++End;
429 return make_range(Start, End);
432 std::pair<const MCSymbol *, uint64_t>
433 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF,
434 bool IsPCRel) {
435 if (isAArch64()) {
436 // Check if this is an access to a constant island and create bookkeeping
437 // to keep track of it and emit it later as part of this function.
438 if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address))
439 return std::make_pair(IslandSym, 0);
441 // Detect custom code written in assembly that refers to arbitrary
442 // constant islands from other functions. Write this reference so we
443 // can pull this constant island and emit it as part of this function
444 // too.
445 auto IslandIter = AddressToConstantIslandMap.lower_bound(Address);
447 if (IslandIter != AddressToConstantIslandMap.begin() &&
448 (IslandIter == AddressToConstantIslandMap.end() ||
449 IslandIter->first > Address))
450 --IslandIter;
452 if (IslandIter != AddressToConstantIslandMap.end()) {
453 // Fall-back to referencing the original constant island in the presence
454 // of dynamic relocs, as we currently do not support cloning them.
455 // Notice: we might fail to link because of this, if the original constant
456 // island we are referring would be emitted too far away.
457 if (IslandIter->second->hasDynamicRelocationAtIsland()) {
458 MCSymbol *IslandSym =
459 IslandIter->second->getOrCreateIslandAccess(Address);
460 if (IslandSym)
461 return std::make_pair(IslandSym, 0);
462 } else if (MCSymbol *IslandSym =
463 IslandIter->second->getOrCreateProxyIslandAccess(Address,
464 BF)) {
465 BF.createIslandDependency(IslandSym, IslandIter->second);
466 return std::make_pair(IslandSym, 0);
471 // Note that the address does not necessarily have to reside inside
472 // a section, it could be an absolute address too.
473 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
474 if (Section && Section->isText()) {
475 if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) {
476 if (Address != BF.getAddress()) {
477 // The address could potentially escape. Mark it as another entry
478 // point into the function.
479 if (opts::Verbosity >= 1) {
480 this->outs() << "BOLT-INFO: potentially escaped address 0x"
481 << Twine::utohexstr(Address) << " in function " << BF
482 << '\n';
484 BF.HasInternalLabelReference = true;
485 return std::make_pair(
486 BF.addEntryPointAtOffset(Address - BF.getAddress()), 0);
488 } else {
489 addInterproceduralReference(&BF, Address);
493 // With relocations, catch jump table references outside of the basic block
494 // containing the indirect jump.
495 if (HasRelocations) {
496 const MemoryContentsType MemType = analyzeMemoryAt(Address, BF);
497 if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) {
498 const MCSymbol *Symbol =
499 getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC);
501 return std::make_pair(Symbol, 0);
505 if (BinaryData *BD = getBinaryDataContainingAddress(Address))
506 return std::make_pair(BD->getSymbol(), Address - BD->getAddress());
508 // TODO: use DWARF info to get size/alignment here?
509 MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat");
510 LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n');
511 return std::make_pair(TargetSymbol, 0);
514 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address,
515 BinaryFunction &BF) {
516 if (!isX86())
517 return MemoryContentsType::UNKNOWN;
519 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
520 if (!Section) {
521 // No section - possibly an absolute address. Since we don't allow
522 // internal function addresses to escape the function scope - we
523 // consider it a tail call.
524 if (opts::Verbosity > 1) {
525 this->errs() << "BOLT-WARNING: no section for address 0x"
526 << Twine::utohexstr(Address) << " referenced from function "
527 << BF << '\n';
529 return MemoryContentsType::UNKNOWN;
532 if (Section->isVirtual()) {
533 // The contents are filled at runtime.
534 return MemoryContentsType::UNKNOWN;
537 // No support for jump tables in code yet.
538 if (Section->isText())
539 return MemoryContentsType::UNKNOWN;
541 // Start with checking for PIC jump table. We expect non-PIC jump tables
542 // to have high 32 bits set to 0.
543 if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF))
544 return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
546 if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF))
547 return MemoryContentsType::POSSIBLE_JUMP_TABLE;
549 return MemoryContentsType::UNKNOWN;
552 bool BinaryContext::analyzeJumpTable(const uint64_t Address,
553 const JumpTable::JumpTableType Type,
554 const BinaryFunction &BF,
555 const uint64_t NextJTAddress,
556 JumpTable::AddressesType *EntriesAsAddress,
557 bool *HasEntryInFragment) const {
558 // Target address of __builtin_unreachable.
559 const uint64_t UnreachableAddress = BF.getAddress() + BF.getSize();
561 // Is one of the targets __builtin_unreachable?
562 bool HasUnreachable = false;
564 // Does one of the entries match function start address?
565 bool HasStartAsEntry = false;
567 // Number of targets other than __builtin_unreachable.
568 uint64_t NumRealEntries = 0;
570 // Size of the jump table without trailing __builtin_unreachable entries.
571 size_t TrimmedSize = 0;
573 auto addEntryAddress = [&](uint64_t EntryAddress, bool Unreachable = false) {
574 if (!EntriesAsAddress)
575 return;
576 EntriesAsAddress->emplace_back(EntryAddress);
577 if (!Unreachable)
578 TrimmedSize = EntriesAsAddress->size();
581 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
582 if (!Section)
583 return false;
585 // The upper bound is defined by containing object, section limits, and
586 // the next jump table in memory.
587 uint64_t UpperBound = Section->getEndAddress();
588 const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address);
589 if (JumpTableBD && JumpTableBD->getSize()) {
590 assert(JumpTableBD->getEndAddress() <= UpperBound &&
591 "data object cannot cross a section boundary");
592 UpperBound = JumpTableBD->getEndAddress();
594 if (NextJTAddress)
595 UpperBound = std::min(NextJTAddress, UpperBound);
597 LLVM_DEBUG({
598 using JTT = JumpTable::JumpTableType;
599 dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n",
600 Address, BF.getPrintName(),
601 Type == JTT::JTT_PIC ? "PIC" : "Normal");
603 const uint64_t EntrySize = getJumpTableEntrySize(Type);
604 for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize;
605 EntryAddress += EntrySize) {
606 LLVM_DEBUG(dbgs() << " * Checking 0x" << Twine::utohexstr(EntryAddress)
607 << " -> ");
608 // Check if there's a proper relocation against the jump table entry.
609 if (HasRelocations) {
610 if (Type == JumpTable::JTT_PIC &&
611 !DataPCRelocations.count(EntryAddress)) {
612 LLVM_DEBUG(
613 dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n");
614 break;
616 if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) {
617 LLVM_DEBUG(
618 dbgs()
619 << "FAIL: JTT_NORMAL table, no relocation for this address\n");
620 break;
624 const uint64_t Value =
625 (Type == JumpTable::JTT_PIC)
626 ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize)
627 : *getPointerAtAddress(EntryAddress);
629 // __builtin_unreachable() case.
630 if (Value == UnreachableAddress) {
631 addEntryAddress(Value, /*Unreachable*/ true);
632 HasUnreachable = true;
633 LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value));
634 continue;
637 // Function start is another special case. It is allowed in the jump table,
638 // but we need at least one another regular entry to distinguish the table
639 // from, e.g. a function pointer array.
640 if (Value == BF.getAddress()) {
641 HasStartAsEntry = true;
642 addEntryAddress(Value);
643 continue;
646 // Function or one of its fragments.
647 const BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value);
648 const bool DoesBelongToFunction =
649 BF.containsAddress(Value) ||
650 (TargetBF && areRelatedFragments(TargetBF, &BF));
651 if (!DoesBelongToFunction) {
652 LLVM_DEBUG({
653 if (!BF.containsAddress(Value)) {
654 dbgs() << "FAIL: function doesn't contain this address\n";
655 if (TargetBF) {
656 dbgs() << " ! function containing this address: "
657 << TargetBF->getPrintName() << '\n';
658 if (TargetBF->isFragment()) {
659 dbgs() << " ! is a fragment";
660 for (BinaryFunction *Parent : TargetBF->ParentFragments)
661 dbgs() << ", parent: " << Parent->getPrintName();
662 dbgs() << '\n';
667 break;
670 // Check there's an instruction at this offset.
671 if (TargetBF->getState() == BinaryFunction::State::Disassembled &&
672 !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) {
673 LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value));
674 break;
677 ++NumRealEntries;
678 LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value));
680 if (TargetBF != &BF && HasEntryInFragment)
681 *HasEntryInFragment = true;
682 addEntryAddress(Value);
685 // Trim direct/normal jump table to exclude trailing unreachable entries that
686 // can collide with a function address.
687 if (Type == JumpTable::JTT_NORMAL && EntriesAsAddress &&
688 TrimmedSize != EntriesAsAddress->size() &&
689 getBinaryFunctionAtAddress(UnreachableAddress))
690 EntriesAsAddress->resize(TrimmedSize);
692 // It's a jump table if the number of real entries is more than 1, or there's
693 // one real entry and one or more special targets. If there are only multiple
694 // special targets, then it's not a jump table.
695 return NumRealEntries + (HasUnreachable || HasStartAsEntry) >= 2;
698 void BinaryContext::populateJumpTables() {
699 LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size()
700 << '\n');
701 for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE;
702 ++JTI) {
703 JumpTable *JT = JTI->second;
705 bool NonSimpleParent = false;
706 for (BinaryFunction *BF : JT->Parents)
707 NonSimpleParent |= !BF->isSimple();
708 if (NonSimpleParent)
709 continue;
711 uint64_t NextJTAddress = 0;
712 auto NextJTI = std::next(JTI);
713 if (NextJTI != JTE)
714 NextJTAddress = NextJTI->second->getAddress();
716 const bool Success =
717 analyzeJumpTable(JT->getAddress(), JT->Type, *(JT->Parents[0]),
718 NextJTAddress, &JT->EntriesAsAddress, &JT->IsSplit);
719 if (!Success) {
720 LLVM_DEBUG({
721 dbgs() << "failed to analyze ";
722 JT->print(dbgs());
723 if (NextJTI != JTE) {
724 dbgs() << "next ";
725 NextJTI->second->print(dbgs());
728 llvm_unreachable("jump table heuristic failure");
730 for (BinaryFunction *Frag : JT->Parents) {
731 if (JT->IsSplit)
732 Frag->setHasIndirectTargetToSplitFragment(true);
733 for (uint64_t EntryAddress : JT->EntriesAsAddress)
734 // if target is builtin_unreachable
735 if (EntryAddress == Frag->getAddress() + Frag->getSize()) {
736 Frag->IgnoredBranches.emplace_back(EntryAddress - Frag->getAddress(),
737 Frag->getSize());
738 } else if (EntryAddress >= Frag->getAddress() &&
739 EntryAddress < Frag->getAddress() + Frag->getSize()) {
740 Frag->registerReferencedOffset(EntryAddress - Frag->getAddress());
744 // In strict mode, erase PC-relative relocation record. Later we check that
745 // all such records are erased and thus have been accounted for.
746 if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) {
747 for (uint64_t Address = JT->getAddress();
748 Address < JT->getAddress() + JT->getSize();
749 Address += JT->EntrySize) {
750 DataPCRelocations.erase(DataPCRelocations.find(Address));
754 // Mark to skip the function and all its fragments.
755 for (BinaryFunction *Frag : JT->Parents)
756 if (Frag->hasIndirectTargetToSplitFragment())
757 addFragmentsToSkip(Frag);
760 if (opts::StrictMode && DataPCRelocations.size()) {
761 LLVM_DEBUG({
762 dbgs() << DataPCRelocations.size()
763 << " unclaimed PC-relative relocations left in data:\n";
764 for (uint64_t Reloc : DataPCRelocations)
765 dbgs() << Twine::utohexstr(Reloc) << '\n';
767 assert(0 && "unclaimed PC-relative relocations left in data\n");
769 clearList(DataPCRelocations);
772 void BinaryContext::skipMarkedFragments() {
773 std::vector<BinaryFunction *> FragmentQueue;
774 // Copy the functions to FragmentQueue.
775 FragmentQueue.assign(FragmentsToSkip.begin(), FragmentsToSkip.end());
776 auto addToWorklist = [&](BinaryFunction *Function) -> void {
777 if (FragmentsToSkip.count(Function))
778 return;
779 FragmentQueue.push_back(Function);
780 addFragmentsToSkip(Function);
782 // Functions containing split jump tables need to be skipped with all
783 // fragments (transitively).
784 for (size_t I = 0; I != FragmentQueue.size(); I++) {
785 BinaryFunction *BF = FragmentQueue[I];
786 assert(FragmentsToSkip.count(BF) &&
787 "internal error in traversing function fragments");
788 if (opts::Verbosity >= 1)
789 this->errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n';
790 BF->setSimple(false);
791 BF->setHasIndirectTargetToSplitFragment(true);
793 llvm::for_each(BF->Fragments, addToWorklist);
794 llvm::for_each(BF->ParentFragments, addToWorklist);
796 if (!FragmentsToSkip.empty())
797 this->errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size()
798 << " function" << (FragmentsToSkip.size() == 1 ? "" : "s")
799 << " due to cold fragments\n";
802 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix,
803 uint64_t Size,
804 uint16_t Alignment,
805 unsigned Flags) {
806 auto Itr = BinaryDataMap.find(Address);
807 if (Itr != BinaryDataMap.end()) {
808 assert(Itr->second->getSize() == Size || !Size);
809 return Itr->second->getSymbol();
812 std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str();
813 assert(!GlobalSymbols.count(Name) && "created name is not unique");
814 return registerNameAtAddress(Name, Address, Size, Alignment, Flags);
817 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) {
818 return Ctx->getOrCreateSymbol(Name);
821 BinaryFunction *BinaryContext::createBinaryFunction(
822 const std::string &Name, BinarySection &Section, uint64_t Address,
823 uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) {
824 auto Result = BinaryFunctions.emplace(
825 Address, BinaryFunction(Name, Section, Address, Size, *this));
826 assert(Result.second == true && "unexpected duplicate function");
827 BinaryFunction *BF = &Result.first->second;
828 registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size,
829 Alignment);
830 setSymbolToFunctionMap(BF->getSymbol(), BF);
831 return BF;
834 const MCSymbol *
835 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
836 JumpTable::JumpTableType Type) {
837 // Two fragments of same function access same jump table
838 if (JumpTable *JT = getJumpTableContainingAddress(Address)) {
839 assert(JT->Type == Type && "jump table types have to match");
840 assert(Address == JT->getAddress() && "unexpected non-empty jump table");
842 // Prevent associating a jump table to a specific fragment twice.
843 if (!llvm::is_contained(JT->Parents, &Function)) {
844 assert(llvm::all_of(JT->Parents,
845 [&](const BinaryFunction *BF) {
846 return areRelatedFragments(&Function, BF);
847 }) &&
848 "cannot re-use jump table of a different function");
849 // Duplicate the entry for the parent function for easy access
850 JT->Parents.push_back(&Function);
851 if (opts::Verbosity > 2) {
852 this->outs() << "BOLT-INFO: Multiple fragments access same jump table: "
853 << JT->Parents[0]->getPrintName() << "; "
854 << Function.getPrintName() << "\n";
855 JT->print(this->outs());
857 Function.JumpTables.emplace(Address, JT);
858 for (BinaryFunction *Parent : JT->Parents)
859 Parent->setHasIndirectTargetToSplitFragment(true);
862 bool IsJumpTableParent = false;
863 (void)IsJumpTableParent;
864 for (BinaryFunction *Frag : JT->Parents)
865 if (Frag == &Function)
866 IsJumpTableParent = true;
867 assert(IsJumpTableParent &&
868 "cannot re-use jump table of a different function");
869 return JT->getFirstLabel();
872 // Re-use the existing symbol if possible.
873 MCSymbol *JTLabel = nullptr;
874 if (BinaryData *Object = getBinaryDataAtAddress(Address)) {
875 if (!isInternalSymbolName(Object->getSymbol()->getName()))
876 JTLabel = Object->getSymbol();
879 const uint64_t EntrySize = getJumpTableEntrySize(Type);
880 if (!JTLabel) {
881 const std::string JumpTableName = generateJumpTableName(Function, Address);
882 JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize);
885 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName()
886 << " in function " << Function << '\n');
888 JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type,
889 JumpTable::LabelMapType{{0, JTLabel}},
890 *getSectionForAddress(Address));
891 JT->Parents.push_back(&Function);
892 if (opts::Verbosity > 2)
893 JT->print(this->outs());
894 JumpTables.emplace(Address, JT);
896 // Duplicate the entry for the parent function for easy access.
897 Function.JumpTables.emplace(Address, JT);
898 return JTLabel;
901 std::pair<uint64_t, const MCSymbol *>
902 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT,
903 const MCSymbol *OldLabel) {
904 auto L = scopeLock();
905 unsigned Offset = 0;
906 bool Found = false;
907 for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) {
908 if (Elmt.second != OldLabel)
909 continue;
910 Offset = Elmt.first;
911 Found = true;
912 break;
914 assert(Found && "Label not found");
915 (void)Found;
916 MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT");
917 JumpTable *NewJT =
918 new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type,
919 JumpTable::LabelMapType{{Offset, NewLabel}},
920 *getSectionForAddress(JT->getAddress()));
921 NewJT->Parents = JT->Parents;
922 NewJT->Entries = JT->Entries;
923 NewJT->Counts = JT->Counts;
924 uint64_t JumpTableID = ++DuplicatedJumpTables;
925 // Invert it to differentiate from regular jump tables whose IDs are their
926 // addresses in the input binary memory space
927 JumpTableID = ~JumpTableID;
928 JumpTables.emplace(JumpTableID, NewJT);
929 Function.JumpTables.emplace(JumpTableID, NewJT);
930 return std::make_pair(JumpTableID, NewLabel);
933 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
934 uint64_t Address) {
935 size_t Id;
936 uint64_t Offset = 0;
937 if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) {
938 Offset = Address - JT->getAddress();
939 auto JTLabelsIt = JT->Labels.find(Offset);
940 if (JTLabelsIt != JT->Labels.end())
941 return std::string(JTLabelsIt->second->getName());
943 auto JTIdsIt = JumpTableIds.find(JT->getAddress());
944 assert(JTIdsIt != JumpTableIds.end());
945 Id = JTIdsIt->second;
946 } else {
947 Id = JumpTableIds[Address] = BF.JumpTables.size();
949 return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) +
950 (Offset ? ("." + std::to_string(Offset)) : ""));
953 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
954 // FIXME: aarch64 support is missing.
955 if (!isX86())
956 return true;
958 if (BF.getSize() == BF.getMaxSize())
959 return true;
961 ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData();
962 assert(FunctionData && "cannot get function as data");
964 uint64_t Offset = BF.getSize();
965 MCInst Instr;
966 uint64_t InstrSize = 0;
967 uint64_t InstrAddress = BF.getAddress() + Offset;
968 using std::placeholders::_1;
970 // Skip instructions that satisfy the predicate condition.
971 auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) {
972 const uint64_t StartOffset = Offset;
973 for (; Offset < BF.getMaxSize();
974 Offset += InstrSize, InstrAddress += InstrSize) {
975 if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset),
976 InstrAddress, nulls()))
977 break;
978 if (!Predicate(Instr))
979 break;
982 return Offset - StartOffset;
985 // Skip a sequence of zero bytes.
986 auto skipZeros = [&]() {
987 const uint64_t StartOffset = Offset;
988 for (; Offset < BF.getMaxSize(); ++Offset)
989 if ((*FunctionData)[Offset] != 0)
990 break;
992 return Offset - StartOffset;
995 // Accept the whole padding area filled with breakpoints.
996 auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1);
997 if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize())
998 return true;
1000 auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1);
1002 // Some functions have a jump to the next function or to the padding area
1003 // inserted after the body.
1004 auto isSkipJump = [&](const MCInst &Instr) {
1005 uint64_t TargetAddress = 0;
1006 if (MIB->isUnconditionalBranch(Instr) &&
1007 MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) {
1008 if (TargetAddress >= InstrAddress + InstrSize &&
1009 TargetAddress <= BF.getAddress() + BF.getMaxSize()) {
1010 return true;
1013 return false;
1016 // Skip over nops, jumps, and zero padding. Allow interleaving (this happens).
1017 while (skipInstructions(isNoop) || skipInstructions(isSkipJump) ||
1018 skipZeros())
1021 if (Offset == BF.getMaxSize())
1022 return true;
1024 if (opts::Verbosity >= 1) {
1025 this->errs() << "BOLT-WARNING: bad padding at address 0x"
1026 << Twine::utohexstr(BF.getAddress() + BF.getSize())
1027 << " starting at offset " << (Offset - BF.getSize())
1028 << " in function " << BF << '\n'
1029 << FunctionData->slice(BF.getSize(),
1030 BF.getMaxSize() - BF.getSize())
1031 << '\n';
1034 return false;
1037 void BinaryContext::adjustCodePadding() {
1038 for (auto &BFI : BinaryFunctions) {
1039 BinaryFunction &BF = BFI.second;
1040 if (!shouldEmit(BF))
1041 continue;
1043 if (!hasValidCodePadding(BF)) {
1044 if (HasRelocations) {
1045 if (opts::Verbosity >= 1) {
1046 this->outs() << "BOLT-INFO: function " << BF
1047 << " has invalid padding. Ignoring the function.\n";
1049 BF.setIgnored();
1050 } else {
1051 BF.setMaxSize(BF.getSize());
1057 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address,
1058 uint64_t Size,
1059 uint16_t Alignment,
1060 unsigned Flags) {
1061 // Register the name with MCContext.
1062 MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name);
1064 auto GAI = BinaryDataMap.find(Address);
1065 BinaryData *BD;
1066 if (GAI == BinaryDataMap.end()) {
1067 ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address);
1068 BinarySection &Section =
1069 SectionOrErr ? SectionOrErr.get() : absoluteSection();
1070 BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1,
1071 Section, Flags);
1072 GAI = BinaryDataMap.emplace(Address, BD).first;
1073 GlobalSymbols[Name] = BD;
1074 updateObjectNesting(GAI);
1075 } else {
1076 BD = GAI->second;
1077 if (!BD->hasName(Name)) {
1078 GlobalSymbols[Name] = BD;
1079 BD->updateSize(Size);
1080 BD->Symbols.push_back(Symbol);
1084 return Symbol;
1087 const BinaryData *
1088 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const {
1089 auto NI = BinaryDataMap.lower_bound(Address);
1090 auto End = BinaryDataMap.end();
1091 if ((NI != End && Address == NI->first) ||
1092 ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) {
1093 if (NI->second->containsAddress(Address))
1094 return NI->second;
1096 // If this is a sub-symbol, see if a parent data contains the address.
1097 const BinaryData *BD = NI->second->getParent();
1098 while (BD) {
1099 if (BD->containsAddress(Address))
1100 return BD;
1101 BD = BD->getParent();
1104 return nullptr;
1107 BinaryData *BinaryContext::getGOTSymbol() {
1108 // First tries to find a global symbol with that name
1109 BinaryData *GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_");
1110 if (GOTSymBD)
1111 return GOTSymBD;
1113 // This symbol might be hidden from run-time link, so fetch the local
1114 // definition if available.
1115 GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_/1");
1116 if (!GOTSymBD)
1117 return nullptr;
1119 // If the local symbol is not unique, fail
1120 unsigned Index = 2;
1121 SmallString<30> Storage;
1122 while (const BinaryData *BD =
1123 getBinaryDataByName(Twine("_GLOBAL_OFFSET_TABLE_/")
1124 .concat(Twine(Index++))
1125 .toStringRef(Storage)))
1126 if (BD->getAddress() != GOTSymBD->getAddress())
1127 return nullptr;
1129 return GOTSymBD;
1132 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) {
1133 auto NI = BinaryDataMap.find(Address);
1134 assert(NI != BinaryDataMap.end());
1135 if (NI == BinaryDataMap.end())
1136 return false;
1137 // TODO: it's possible that a jump table starts at the same address
1138 // as a larger blob of private data. When we set the size of the
1139 // jump table, it might be smaller than the total blob size. In this
1140 // case we just leave the original size since (currently) it won't really
1141 // affect anything.
1142 assert((!NI->second->Size || NI->second->Size == Size ||
1143 (NI->second->isJumpTable() && NI->second->Size > Size)) &&
1144 "can't change the size of a symbol that has already had its "
1145 "size set");
1146 if (!NI->second->Size) {
1147 NI->second->Size = Size;
1148 updateObjectNesting(NI);
1149 return true;
1151 return false;
1154 void BinaryContext::generateSymbolHashes() {
1155 auto isPadding = [](const BinaryData &BD) {
1156 StringRef Contents = BD.getSection().getContents();
1157 StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize());
1158 return (BD.getName().starts_with("HOLEat") ||
1159 SymData.find_first_not_of(0) == StringRef::npos);
1162 uint64_t NumCollisions = 0;
1163 for (auto &Entry : BinaryDataMap) {
1164 BinaryData &BD = *Entry.second;
1165 StringRef Name = BD.getName();
1167 if (!isInternalSymbolName(Name))
1168 continue;
1170 // First check if a non-anonymous alias exists and move it to the front.
1171 if (BD.getSymbols().size() > 1) {
1172 auto Itr = llvm::find_if(BD.getSymbols(), [&](const MCSymbol *Symbol) {
1173 return !isInternalSymbolName(Symbol->getName());
1175 if (Itr != BD.getSymbols().end()) {
1176 size_t Idx = std::distance(BD.getSymbols().begin(), Itr);
1177 std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]);
1178 continue;
1182 // We have to skip 0 size symbols since they will all collide.
1183 if (BD.getSize() == 0) {
1184 continue;
1187 const uint64_t Hash = BD.getSection().hash(BD);
1188 const size_t Idx = Name.find("0x");
1189 std::string NewName =
1190 (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str();
1191 if (getBinaryDataByName(NewName)) {
1192 // Ignore collisions for symbols that appear to be padding
1193 // (i.e. all zeros or a "hole")
1194 if (!isPadding(BD)) {
1195 if (opts::Verbosity) {
1196 this->errs() << "BOLT-WARNING: collision detected when hashing " << BD
1197 << " with new name (" << NewName << "), skipping.\n";
1199 ++NumCollisions;
1201 continue;
1203 BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName));
1204 GlobalSymbols[NewName] = &BD;
1206 if (NumCollisions) {
1207 this->errs() << "BOLT-WARNING: " << NumCollisions
1208 << " collisions detected while hashing binary objects";
1209 if (!opts::Verbosity)
1210 this->errs() << ". Use -v=1 to see the list.";
1211 this->errs() << '\n';
1215 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction,
1216 BinaryFunction &Function) {
1217 assert(TargetFunction.isFragment() && "TargetFunction must be a fragment");
1218 if (TargetFunction.isChildOf(Function))
1219 return true;
1220 TargetFunction.addParentFragment(Function);
1221 Function.addFragment(TargetFunction);
1222 FragmentClasses.unionSets(&TargetFunction, &Function);
1223 if (!HasRelocations) {
1224 TargetFunction.setSimple(false);
1225 Function.setSimple(false);
1227 if (opts::Verbosity >= 1) {
1228 this->outs() << "BOLT-INFO: marking " << TargetFunction
1229 << " as a fragment of " << Function << '\n';
1231 return true;
1234 void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF,
1235 MCInst &LoadLowBits,
1236 MCInst &LoadHiBits,
1237 uint64_t Target) {
1238 const MCSymbol *TargetSymbol;
1239 uint64_t Addend = 0;
1240 std::tie(TargetSymbol, Addend) = handleAddressRef(Target, BF,
1241 /*IsPCRel*/ true);
1242 int64_t Val;
1243 MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), Val,
1244 ELF::R_AARCH64_ADR_PREL_PG_HI21);
1245 MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(),
1246 Val, ELF::R_AARCH64_ADD_ABS_LO12_NC);
1249 bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) {
1250 BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address);
1251 if (TargetFunction)
1252 return false;
1254 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1255 assert(Section && "cannot get section for referenced address");
1256 if (!Section->isText())
1257 return false;
1259 bool Ret = false;
1260 StringRef SectionContents = Section->getContents();
1261 uint64_t Offset = Address - Section->getAddress();
1262 const uint64_t MaxSize = SectionContents.size() - Offset;
1263 const uint8_t *Bytes =
1264 reinterpret_cast<const uint8_t *>(SectionContents.data());
1265 ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize);
1267 auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions,
1268 MCInst &Instruction, uint64_t Offset,
1269 uint64_t AbsoluteInstrAddr,
1270 uint64_t TotalSize) -> bool {
1271 MCInst *TargetHiBits, *TargetLowBits;
1272 uint64_t TargetAddress, Count;
1273 Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(),
1274 AbsoluteInstrAddr, Instruction, TargetHiBits,
1275 TargetLowBits, TargetAddress);
1276 if (!Count)
1277 return false;
1279 if (MatchOnly)
1280 return true;
1282 // NOTE The target symbol was created during disassemble's
1283 // handleExternalReference
1284 const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, "FUNCat");
1285 BinaryFunction *Veneer = createBinaryFunction(VeneerSymbol->getName().str(),
1286 *Section, Address, TotalSize);
1287 addAdrpAddRelocAArch64(*Veneer, *TargetLowBits, *TargetHiBits,
1288 TargetAddress);
1289 MIB->addAnnotation(Instruction, "AArch64Veneer", true);
1290 Veneer->addInstruction(Offset, std::move(Instruction));
1291 --Count;
1292 for (auto It = Instructions.rbegin(); Count != 0; ++It, --Count) {
1293 MIB->addAnnotation(It->second, "AArch64Veneer", true);
1294 Veneer->addInstruction(It->first, std::move(It->second));
1297 Veneer->getOrCreateLocalLabel(Address);
1298 Veneer->setMaxSize(TotalSize);
1299 Veneer->updateState(BinaryFunction::State::Disassembled);
1300 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x"
1301 << Twine::utohexstr(Address) << "\n");
1302 return true;
1305 uint64_t Size = 0, TotalSize = 0;
1306 BinaryFunction::InstrMapType VeneerInstructions;
1307 for (Offset = 0; Offset < MaxSize; Offset += Size) {
1308 MCInst Instruction;
1309 const uint64_t AbsoluteInstrAddr = Address + Offset;
1310 if (!SymbolicDisAsm->getInstruction(Instruction, Size, Data.slice(Offset),
1311 AbsoluteInstrAddr, nulls()))
1312 break;
1314 TotalSize += Size;
1315 if (MIB->isBranch(Instruction)) {
1316 Ret = matchVeneer(VeneerInstructions, Instruction, Offset,
1317 AbsoluteInstrAddr, TotalSize);
1318 break;
1321 VeneerInstructions.emplace(Offset, std::move(Instruction));
1324 return Ret;
1327 void BinaryContext::processInterproceduralReferences() {
1328 for (const std::pair<BinaryFunction *, uint64_t> &It :
1329 InterproceduralReferences) {
1330 BinaryFunction &Function = *It.first;
1331 uint64_t Address = It.second;
1332 // Process interprocedural references from ignored functions in BAT mode
1333 // (non-simple in non-relocation mode) to properly register entry points
1334 if (!Address || (Function.isIgnored() && !HasBATSection))
1335 continue;
1337 BinaryFunction *TargetFunction =
1338 getBinaryFunctionContainingAddress(Address);
1339 if (&Function == TargetFunction)
1340 continue;
1342 if (TargetFunction) {
1343 if (TargetFunction->isFragment() &&
1344 !areRelatedFragments(TargetFunction, &Function)) {
1345 this->errs()
1346 << "BOLT-WARNING: interprocedural reference between unrelated "
1347 "fragments: "
1348 << Function.getPrintName() << " and "
1349 << TargetFunction->getPrintName() << '\n';
1351 if (uint64_t Offset = Address - TargetFunction->getAddress())
1352 TargetFunction->addEntryPointAtOffset(Offset);
1354 continue;
1357 // Check if address falls in function padding space - this could be
1358 // unmarked data in code. In this case adjust the padding space size.
1359 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1360 assert(Section && "cannot get section for referenced address");
1362 if (!Section->isText())
1363 continue;
1365 // PLT requires special handling and could be ignored in this context.
1366 StringRef SectionName = Section->getName();
1367 if (SectionName == ".plt" || SectionName == ".plt.got")
1368 continue;
1370 // Check if it is aarch64 veneer written at Address
1371 if (isAArch64() && handleAArch64Veneer(Address))
1372 continue;
1374 if (opts::processAllFunctions()) {
1375 this->errs() << "BOLT-ERROR: cannot process binaries with unmarked "
1376 << "object in code at address 0x"
1377 << Twine::utohexstr(Address) << " belonging to section "
1378 << SectionName << " in current mode\n";
1379 exit(1);
1382 TargetFunction = getBinaryFunctionContainingAddress(Address,
1383 /*CheckPastEnd=*/false,
1384 /*UseMaxSize=*/true);
1385 // We are not going to overwrite non-simple functions, but for simple
1386 // ones - adjust the padding size.
1387 if (TargetFunction && TargetFunction->isSimple()) {
1388 this->errs()
1389 << "BOLT-WARNING: function " << *TargetFunction
1390 << " has an object detected in a padding region at address 0x"
1391 << Twine::utohexstr(Address) << '\n';
1392 TargetFunction->setMaxSize(TargetFunction->getSize());
1396 InterproceduralReferences.clear();
1399 void BinaryContext::postProcessSymbolTable() {
1400 fixBinaryDataHoles();
1401 bool Valid = true;
1402 for (auto &Entry : BinaryDataMap) {
1403 BinaryData *BD = Entry.second;
1404 if ((BD->getName().starts_with("SYMBOLat") ||
1405 BD->getName().starts_with("DATAat")) &&
1406 !BD->getParent() && !BD->getSize() && !BD->isAbsolute() &&
1407 BD->getSection()) {
1408 this->errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD
1409 << "\n";
1410 Valid = false;
1413 assert(Valid);
1414 (void)Valid;
1415 generateSymbolHashes();
1418 void BinaryContext::foldFunction(BinaryFunction &ChildBF,
1419 BinaryFunction &ParentBF) {
1420 assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() &&
1421 "cannot merge functions with multiple entry points");
1423 std::unique_lock<llvm::sys::RWMutex> WriteCtxLock(CtxMutex, std::defer_lock);
1424 std::unique_lock<llvm::sys::RWMutex> WriteSymbolMapLock(
1425 SymbolToFunctionMapMutex, std::defer_lock);
1427 const StringRef ChildName = ChildBF.getOneName();
1429 // Move symbols over and update bookkeeping info.
1430 for (MCSymbol *Symbol : ChildBF.getSymbols()) {
1431 ParentBF.getSymbols().push_back(Symbol);
1432 WriteSymbolMapLock.lock();
1433 SymbolToFunctionMap[Symbol] = &ParentBF;
1434 WriteSymbolMapLock.unlock();
1435 // NB: there's no need to update BinaryDataMap and GlobalSymbols.
1437 ChildBF.getSymbols().clear();
1439 // Move other names the child function is known under.
1440 llvm::move(ChildBF.Aliases, std::back_inserter(ParentBF.Aliases));
1441 ChildBF.Aliases.clear();
1443 if (HasRelocations) {
1444 // Merge execution counts of ChildBF into those of ParentBF.
1445 // Without relocations, we cannot reliably merge profiles as both functions
1446 // continue to exist and either one can be executed.
1447 ChildBF.mergeProfileDataInto(ParentBF);
1449 std::shared_lock<llvm::sys::RWMutex> ReadBfsLock(BinaryFunctionsMutex,
1450 std::defer_lock);
1451 std::unique_lock<llvm::sys::RWMutex> WriteBfsLock(BinaryFunctionsMutex,
1452 std::defer_lock);
1453 // Remove ChildBF from the global set of functions in relocs mode.
1454 ReadBfsLock.lock();
1455 auto FI = BinaryFunctions.find(ChildBF.getAddress());
1456 ReadBfsLock.unlock();
1458 assert(FI != BinaryFunctions.end() && "function not found");
1459 assert(&ChildBF == &FI->second && "function mismatch");
1461 WriteBfsLock.lock();
1462 ChildBF.clearDisasmState();
1463 FI = BinaryFunctions.erase(FI);
1464 WriteBfsLock.unlock();
1466 } else {
1467 // In non-relocation mode we keep the function, but rename it.
1468 std::string NewName = "__ICF_" + ChildName.str();
1470 WriteCtxLock.lock();
1471 ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName));
1472 WriteCtxLock.unlock();
1474 ChildBF.setFolded(&ParentBF);
1477 ParentBF.setHasFunctionsFoldedInto();
1480 void BinaryContext::fixBinaryDataHoles() {
1481 assert(validateObjectNesting() && "object nesting inconsistency detected");
1483 for (BinarySection &Section : allocatableSections()) {
1484 std::vector<std::pair<uint64_t, uint64_t>> Holes;
1486 auto isNotHole = [&Section](const binary_data_iterator &Itr) {
1487 BinaryData *BD = Itr->second;
1488 bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() &&
1489 (BD->getName().starts_with("SYMBOLat0x") ||
1490 BD->getName().starts_with("DATAat0x") ||
1491 BD->getName().starts_with("ANONYMOUS")));
1492 return !isHole && BD->getSection() == Section && !BD->getParent();
1495 auto BDStart = BinaryDataMap.begin();
1496 auto BDEnd = BinaryDataMap.end();
1497 auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd);
1498 auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd);
1500 uint64_t EndAddress = Section.getAddress();
1502 while (Itr != End) {
1503 if (Itr->second->getAddress() > EndAddress) {
1504 uint64_t Gap = Itr->second->getAddress() - EndAddress;
1505 Holes.emplace_back(EndAddress, Gap);
1507 EndAddress = Itr->second->getEndAddress();
1508 ++Itr;
1511 if (EndAddress < Section.getEndAddress())
1512 Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress);
1514 // If there is already a symbol at the start of the hole, grow that symbol
1515 // to cover the rest. Otherwise, create a new symbol to cover the hole.
1516 for (std::pair<uint64_t, uint64_t> &Hole : Holes) {
1517 BinaryData *BD = getBinaryDataAtAddress(Hole.first);
1518 if (BD) {
1519 // BD->getSection() can be != Section if there are sections that
1520 // overlap. In this case it is probably safe to just skip the holes
1521 // since the overlapping section will not(?) have any symbols in it.
1522 if (BD->getSection() == Section)
1523 setBinaryDataSize(Hole.first, Hole.second);
1524 } else {
1525 getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1);
1530 assert(validateObjectNesting() && "object nesting inconsistency detected");
1531 assert(validateHoles() && "top level hole detected in object map");
1534 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const {
1535 const BinarySection *CurrentSection = nullptr;
1536 bool FirstSection = true;
1538 for (auto &Entry : BinaryDataMap) {
1539 const BinaryData *BD = Entry.second;
1540 const BinarySection &Section = BD->getSection();
1541 if (FirstSection || Section != *CurrentSection) {
1542 uint64_t Address, Size;
1543 StringRef Name = Section.getName();
1544 if (Section) {
1545 Address = Section.getAddress();
1546 Size = Section.getSize();
1547 } else {
1548 Address = BD->getAddress();
1549 Size = BD->getSize();
1551 OS << "BOLT-INFO: Section " << Name << ", "
1552 << "0x" + Twine::utohexstr(Address) << ":"
1553 << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n";
1554 CurrentSection = &Section;
1555 FirstSection = false;
1558 OS << "BOLT-INFO: ";
1559 const BinaryData *P = BD->getParent();
1560 while (P) {
1561 OS << " ";
1562 P = P->getParent();
1564 OS << *BD << "\n";
1568 Expected<unsigned> BinaryContext::getDwarfFile(
1569 StringRef Directory, StringRef FileName, unsigned FileNumber,
1570 std::optional<MD5::MD5Result> Checksum, std::optional<StringRef> Source,
1571 unsigned CUID, unsigned DWARFVersion) {
1572 DwarfLineTable &Table = DwarfLineTablesCUMap[CUID];
1573 return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion,
1574 FileNumber);
1577 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
1578 const uint32_t SrcCUID,
1579 unsigned FileIndex) {
1580 DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID);
1581 const DWARFDebugLine::LineTable *LineTable =
1582 DwCtx->getLineTableForUnit(SrcUnit);
1583 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1584 LineTable->Prologue.FileNames;
1585 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1586 // means empty dir.
1587 assert(FileIndex > 0 && FileIndex <= FileNames.size() &&
1588 "FileIndex out of range for the compilation unit.");
1589 StringRef Dir = "";
1590 if (FileNames[FileIndex - 1].DirIdx != 0) {
1591 if (std::optional<const char *> DirName = dwarf::toString(
1592 LineTable->Prologue
1593 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) {
1594 Dir = *DirName;
1597 StringRef FileName = "";
1598 if (std::optional<const char *> FName =
1599 dwarf::toString(FileNames[FileIndex - 1].Name))
1600 FileName = *FName;
1601 assert(FileName != "");
1602 DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID);
1603 return cantFail(getDwarfFile(Dir, FileName, 0, std::nullopt, std::nullopt,
1604 DestCUID, DstUnit->getVersion()));
1607 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() {
1608 std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size());
1609 llvm::transform(llvm::make_second_range(BinaryFunctions),
1610 SortedFunctions.begin(),
1611 [](BinaryFunction &BF) { return &BF; });
1613 llvm::stable_sort(SortedFunctions, compareBinaryFunctionByIndex);
1614 return SortedFunctions;
1617 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() {
1618 std::vector<BinaryFunction *> AllFunctions;
1619 AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size());
1620 llvm::transform(llvm::make_second_range(BinaryFunctions),
1621 std::back_inserter(AllFunctions),
1622 [](BinaryFunction &BF) { return &BF; });
1623 llvm::copy(InjectedBinaryFunctions, std::back_inserter(AllFunctions));
1625 return AllFunctions;
1628 std::optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) {
1629 auto Iter = DWOCUs.find(DWOId);
1630 if (Iter == DWOCUs.end())
1631 return std::nullopt;
1633 return Iter->second;
1636 DWARFContext *BinaryContext::getDWOContext() const {
1637 if (DWOCUs.empty())
1638 return nullptr;
1639 return &DWOCUs.begin()->second->getContext();
1642 /// Handles DWO sections that can either be in .o, .dwo or .dwp files.
1643 void BinaryContext::preprocessDWODebugInfo() {
1644 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1645 DWARFUnit *const DwarfUnit = CU.get();
1646 if (std::optional<uint64_t> DWOId = DwarfUnit->getDWOId()) {
1647 std::string DWOName = dwarf::toString(
1648 DwarfUnit->getUnitDIE().find(
1649 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
1650 "");
1651 SmallString<16> AbsolutePath;
1652 if (!opts::CompDirOverride.empty()) {
1653 sys::path::append(AbsolutePath, opts::CompDirOverride);
1654 sys::path::append(AbsolutePath, DWOName);
1656 DWARFUnit *DWOCU =
1657 DwarfUnit->getNonSkeletonUnitDIE(false, AbsolutePath).getDwarfUnit();
1658 if (!DWOCU->isDWOUnit()) {
1659 this->outs()
1660 << "BOLT-WARNING: Debug Fission: DWO debug information for "
1661 << DWOName
1662 << " was not retrieved and won't be updated. Please check "
1663 "relative path.\n";
1664 continue;
1666 DWOCUs[*DWOId] = DWOCU;
1669 if (!DWOCUs.empty())
1670 this->outs() << "BOLT-INFO: processing split DWARF\n";
1673 void BinaryContext::preprocessDebugInfo() {
1674 struct CURange {
1675 uint64_t LowPC;
1676 uint64_t HighPC;
1677 DWARFUnit *Unit;
1679 bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; }
1682 // Building a map of address ranges to CUs similar to .debug_aranges and use
1683 // it to assign CU to functions.
1684 std::vector<CURange> AllRanges;
1685 AllRanges.reserve(DwCtx->getNumCompileUnits());
1686 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1687 Expected<DWARFAddressRangesVector> RangesOrError =
1688 CU->getUnitDIE().getAddressRanges();
1689 if (!RangesOrError) {
1690 consumeError(RangesOrError.takeError());
1691 continue;
1693 for (DWARFAddressRange &Range : *RangesOrError) {
1694 // Parts of the debug info could be invalidated due to corresponding code
1695 // being removed from the binary by the linker. Hence we check if the
1696 // address is a valid one.
1697 if (containsAddress(Range.LowPC))
1698 AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()});
1701 ContainsDwarf5 |= CU->getVersion() >= 5;
1702 ContainsDwarfLegacy |= CU->getVersion() < 5;
1705 llvm::sort(AllRanges);
1706 for (auto &KV : BinaryFunctions) {
1707 const uint64_t FunctionAddress = KV.first;
1708 BinaryFunction &Function = KV.second;
1710 auto It = llvm::partition_point(
1711 AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; });
1712 if (It != AllRanges.end() && It->LowPC <= FunctionAddress)
1713 Function.setDWARFUnit(It->Unit);
1716 // Discover units with debug info that needs to be updated.
1717 for (const auto &KV : BinaryFunctions) {
1718 const BinaryFunction &BF = KV.second;
1719 if (shouldEmit(BF) && BF.getDWARFUnit())
1720 ProcessedCUs.insert(BF.getDWARFUnit());
1723 // Clear debug info for functions from units that we are not going to process.
1724 for (auto &KV : BinaryFunctions) {
1725 BinaryFunction &BF = KV.second;
1726 if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit()))
1727 BF.setDWARFUnit(nullptr);
1730 if (opts::Verbosity >= 1) {
1731 this->outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of "
1732 << DwCtx->getNumCompileUnits() << " CUs will be updated\n";
1735 preprocessDWODebugInfo();
1737 // Populate MCContext with DWARF files from all units.
1738 StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix();
1739 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1740 const uint64_t CUID = CU->getOffset();
1741 DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID);
1742 BinaryLineTable.setLabel(Ctx->getOrCreateSymbol(
1743 GlobalPrefix + "line_table_start" + Twine(CUID)));
1745 if (!ProcessedCUs.count(CU.get()))
1746 continue;
1748 const DWARFDebugLine::LineTable *LineTable =
1749 DwCtx->getLineTableForUnit(CU.get());
1750 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1751 LineTable->Prologue.FileNames;
1753 uint16_t DwarfVersion = LineTable->Prologue.getVersion();
1754 if (DwarfVersion >= 5) {
1755 std::optional<MD5::MD5Result> Checksum;
1756 if (LineTable->Prologue.ContentTypes.HasMD5)
1757 Checksum = LineTable->Prologue.FileNames[0].Checksum;
1758 std::optional<const char *> Name =
1759 dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1760 if (std::optional<uint64_t> DWOID = CU->getDWOId()) {
1761 auto Iter = DWOCUs.find(*DWOID);
1762 assert(Iter != DWOCUs.end() && "DWO CU was not found.");
1763 Name = dwarf::toString(
1764 Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1766 BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum,
1767 std::nullopt);
1770 BinaryLineTable.setDwarfVersion(DwarfVersion);
1772 // Assign a unique label to every line table, one per CU.
1773 // Make sure empty debug line tables are registered too.
1774 if (FileNames.empty()) {
1775 cantFail(getDwarfFile("", "<unknown>", 0, std::nullopt, std::nullopt,
1776 CUID, DwarfVersion));
1777 continue;
1779 const uint32_t Offset = DwarfVersion < 5 ? 1 : 0;
1780 for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) {
1781 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1782 // means empty dir.
1783 StringRef Dir = "";
1784 if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5)
1785 if (std::optional<const char *> DirName = dwarf::toString(
1786 LineTable->Prologue
1787 .IncludeDirectories[FileNames[I].DirIdx - Offset]))
1788 Dir = *DirName;
1789 StringRef FileName = "";
1790 if (std::optional<const char *> FName =
1791 dwarf::toString(FileNames[I].Name))
1792 FileName = *FName;
1793 assert(FileName != "");
1794 std::optional<MD5::MD5Result> Checksum;
1795 if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5)
1796 Checksum = LineTable->Prologue.FileNames[I].Checksum;
1797 cantFail(getDwarfFile(Dir, FileName, 0, Checksum, std::nullopt, CUID,
1798 DwarfVersion));
1803 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const {
1804 if (Function.isPseudo())
1805 return false;
1807 if (opts::processAllFunctions())
1808 return true;
1810 if (Function.isIgnored())
1811 return false;
1813 // In relocation mode we will emit non-simple functions with CFG.
1814 // If the function does not have a CFG it should be marked as ignored.
1815 return HasRelocations || Function.isSimple();
1818 void BinaryContext::dump(const MCInst &Inst) const {
1819 if (LLVM_UNLIKELY(!InstPrinter)) {
1820 dbgs() << "Cannot dump for InstPrinter is not initialized.\n";
1821 return;
1823 InstPrinter->printInst(&Inst, 0, "", *STI, dbgs());
1824 dbgs() << "\n";
1827 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
1828 uint32_t Operation = Inst.getOperation();
1829 switch (Operation) {
1830 case MCCFIInstruction::OpSameValue:
1831 OS << "OpSameValue Reg" << Inst.getRegister();
1832 break;
1833 case MCCFIInstruction::OpRememberState:
1834 OS << "OpRememberState";
1835 break;
1836 case MCCFIInstruction::OpRestoreState:
1837 OS << "OpRestoreState";
1838 break;
1839 case MCCFIInstruction::OpOffset:
1840 OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1841 break;
1842 case MCCFIInstruction::OpDefCfaRegister:
1843 OS << "OpDefCfaRegister Reg" << Inst.getRegister();
1844 break;
1845 case MCCFIInstruction::OpDefCfaOffset:
1846 OS << "OpDefCfaOffset " << Inst.getOffset();
1847 break;
1848 case MCCFIInstruction::OpDefCfa:
1849 OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset();
1850 break;
1851 case MCCFIInstruction::OpRelOffset:
1852 OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1853 break;
1854 case MCCFIInstruction::OpAdjustCfaOffset:
1855 OS << "OfAdjustCfaOffset " << Inst.getOffset();
1856 break;
1857 case MCCFIInstruction::OpEscape:
1858 OS << "OpEscape";
1859 break;
1860 case MCCFIInstruction::OpRestore:
1861 OS << "OpRestore Reg" << Inst.getRegister();
1862 break;
1863 case MCCFIInstruction::OpUndefined:
1864 OS << "OpUndefined Reg" << Inst.getRegister();
1865 break;
1866 case MCCFIInstruction::OpRegister:
1867 OS << "OpRegister Reg" << Inst.getRegister() << " Reg"
1868 << Inst.getRegister2();
1869 break;
1870 case MCCFIInstruction::OpWindowSave:
1871 OS << "OpWindowSave";
1872 break;
1873 case MCCFIInstruction::OpGnuArgsSize:
1874 OS << "OpGnuArgsSize";
1875 break;
1876 default:
1877 OS << "Op#" << Operation;
1878 break;
1882 MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const {
1883 // For aarch64 and riscv, the ABI defines mapping symbols so we identify data
1884 // in the code section (see IHI0056B). $x identifies a symbol starting code or
1885 // the end of a data chunk inside code, $d identifies start of data.
1886 if (isX86() || ELFSymbolRef(Symbol).getSize())
1887 return MarkerSymType::NONE;
1889 Expected<StringRef> NameOrError = Symbol.getName();
1890 Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType();
1892 if (!TypeOrError || !NameOrError)
1893 return MarkerSymType::NONE;
1895 if (*TypeOrError != SymbolRef::ST_Unknown)
1896 return MarkerSymType::NONE;
1898 if (*NameOrError == "$x" || NameOrError->starts_with("$x."))
1899 return MarkerSymType::CODE;
1901 // $x<ISA>
1902 if (isRISCV() && NameOrError->starts_with("$x"))
1903 return MarkerSymType::CODE;
1905 if (*NameOrError == "$d" || NameOrError->starts_with("$d."))
1906 return MarkerSymType::DATA;
1908 return MarkerSymType::NONE;
1911 bool BinaryContext::isMarker(const SymbolRef &Symbol) const {
1912 return getMarkerType(Symbol) != MarkerSymType::NONE;
1915 static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction,
1916 const BinaryFunction *Function,
1917 DWARFContext *DwCtx) {
1918 DebugLineTableRowRef RowRef =
1919 DebugLineTableRowRef::fromSMLoc(Instruction.getLoc());
1920 if (RowRef == DebugLineTableRowRef::NULL_ROW)
1921 return;
1923 const DWARFDebugLine::LineTable *LineTable;
1924 if (Function && Function->getDWARFUnit() &&
1925 Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) {
1926 LineTable = Function->getDWARFLineTable();
1927 } else {
1928 LineTable = DwCtx->getLineTableForUnit(
1929 DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex));
1931 assert(LineTable && "line table expected for instruction with debug info");
1933 const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1];
1934 StringRef FileName = "";
1935 if (std::optional<const char *> FName =
1936 dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name))
1937 FileName = *FName;
1938 OS << " # debug line " << FileName << ":" << Row.Line;
1939 if (Row.Column)
1940 OS << ":" << Row.Column;
1941 if (Row.Discriminator)
1942 OS << " discriminator:" << Row.Discriminator;
1945 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
1946 uint64_t Offset,
1947 const BinaryFunction *Function,
1948 bool PrintMCInst, bool PrintMemData,
1949 bool PrintRelocations,
1950 StringRef Endl) const {
1951 OS << format(" %08" PRIx64 ": ", Offset);
1952 if (MIB->isCFI(Instruction)) {
1953 uint32_t Offset = Instruction.getOperand(0).getImm();
1954 OS << "\t!CFI\t$" << Offset << "\t; ";
1955 if (Function)
1956 printCFI(OS, *Function->getCFIFor(Instruction));
1957 OS << Endl;
1958 return;
1960 if (std::optional<uint32_t> DynamicID =
1961 MIB->getDynamicBranchID(Instruction)) {
1962 OS << "\tjit\t" << MIB->getTargetSymbol(Instruction)->getName()
1963 << " # ID: " << DynamicID;
1964 } else {
1965 // If there are annotations on the instruction, the MCInstPrinter will fail
1966 // to print the preferred alias as it only does so when the number of
1967 // operands is as expected. See
1968 // https://github.com/llvm/llvm-project/blob/782f1a0d895646c364a53f9dcdd6d4ec1f3e5ea0/llvm/lib/MC/MCInstPrinter.cpp#L142
1969 // Therefore, create a temporary copy of the Inst from which the annotations
1970 // are removed, and print that Inst.
1971 MCInst InstNoAnnot = Instruction;
1972 MIB->stripAnnotations(InstNoAnnot);
1973 InstPrinter->printInst(&InstNoAnnot, 0, "", *STI, OS);
1975 if (MIB->isCall(Instruction)) {
1976 if (MIB->isTailCall(Instruction))
1977 OS << " # TAILCALL ";
1978 if (MIB->isInvoke(Instruction)) {
1979 const std::optional<MCPlus::MCLandingPad> EHInfo =
1980 MIB->getEHInfo(Instruction);
1981 OS << " # handler: ";
1982 if (EHInfo->first)
1983 OS << *EHInfo->first;
1984 else
1985 OS << '0';
1986 OS << "; action: " << EHInfo->second;
1987 const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction);
1988 if (GnuArgsSize >= 0)
1989 OS << "; GNU_args_size = " << GnuArgsSize;
1991 } else if (MIB->isIndirectBranch(Instruction)) {
1992 if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) {
1993 OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress);
1994 } else {
1995 OS << " # UNKNOWN CONTROL FLOW";
1998 if (std::optional<uint32_t> Offset = MIB->getOffset(Instruction))
1999 OS << " # Offset: " << *Offset;
2000 if (std::optional<uint32_t> Size = MIB->getSize(Instruction))
2001 OS << " # Size: " << *Size;
2002 if (MCSymbol *Label = MIB->getInstLabel(Instruction))
2003 OS << " # Label: " << *Label;
2005 MIB->printAnnotations(Instruction, OS);
2007 if (opts::PrintDebugInfo)
2008 printDebugInfo(OS, Instruction, Function, DwCtx.get());
2010 if ((opts::PrintRelocations || PrintRelocations) && Function) {
2011 const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1);
2012 Function->printRelocations(OS, Offset, Size);
2015 OS << Endl;
2017 if (PrintMCInst) {
2018 Instruction.dump_pretty(OS, InstPrinter.get());
2019 OS << Endl;
2023 std::optional<uint64_t>
2024 BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress,
2025 uint64_t FileOffset) const {
2026 // Find a segment with a matching file offset.
2027 for (auto &KV : SegmentMapInfo) {
2028 const SegmentInfo &SegInfo = KV.second;
2029 // Only consider executable segments.
2030 if (!SegInfo.IsExecutable)
2031 continue;
2032 // FileOffset is got from perf event,
2033 // and it is equal to alignDown(SegInfo.FileOffset, pagesize).
2034 // If the pagesize is not equal to SegInfo.Alignment.
2035 // FileOffset and SegInfo.FileOffset should be aligned first,
2036 // and then judge whether they are equal.
2037 if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) ==
2038 alignDown(FileOffset, SegInfo.Alignment)) {
2039 // The function's offset from base address in VAS is aligned by pagesize
2040 // instead of SegInfo.Alignment. Pagesize can't be got from perf events.
2041 // However, The ELF document says that SegInfo.FileOffset should equal
2042 // to SegInfo.Address, modulo the pagesize.
2043 // Reference: https://refspecs.linuxfoundation.org/elf/elf.pdf
2045 // So alignDown(SegInfo.Address, pagesize) can be calculated by:
2046 // alignDown(SegInfo.Address, pagesize)
2047 // = SegInfo.Address - (SegInfo.Address % pagesize)
2048 // = SegInfo.Address - (SegInfo.FileOffset % pagesize)
2049 // = SegInfo.Address - SegInfo.FileOffset +
2050 // alignDown(SegInfo.FileOffset, pagesize)
2051 // = SegInfo.Address - SegInfo.FileOffset + FileOffset
2052 return MMapAddress - (SegInfo.Address - SegInfo.FileOffset + FileOffset);
2056 return std::nullopt;
2059 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) {
2060 auto SI = AddressToSection.upper_bound(Address);
2061 if (SI != AddressToSection.begin()) {
2062 --SI;
2063 uint64_t UpperBound = SI->first + SI->second->getSize();
2064 if (!SI->second->getSize())
2065 UpperBound += 1;
2066 if (UpperBound > Address)
2067 return *SI->second;
2069 return std::make_error_code(std::errc::bad_address);
2072 ErrorOr<StringRef>
2073 BinaryContext::getSectionNameForAddress(uint64_t Address) const {
2074 if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address))
2075 return Section->getName();
2076 return std::make_error_code(std::errc::bad_address);
2079 BinarySection &BinaryContext::registerSection(BinarySection *Section) {
2080 auto Res = Sections.insert(Section);
2081 (void)Res;
2082 assert(Res.second && "can't register the same section twice.");
2084 // Only register allocatable sections in the AddressToSection map.
2085 if (Section->isAllocatable() && Section->getAddress())
2086 AddressToSection.insert(std::make_pair(Section->getAddress(), Section));
2087 NameToSection.insert(
2088 std::make_pair(std::string(Section->getName()), Section));
2089 if (Section->hasSectionRef())
2090 SectionRefToBinarySection.insert(
2091 std::make_pair(Section->getSectionRef(), Section));
2093 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n");
2094 return *Section;
2097 BinarySection &BinaryContext::registerSection(SectionRef Section) {
2098 return registerSection(new BinarySection(*this, Section));
2101 BinarySection &
2102 BinaryContext::registerSection(const Twine &SectionName,
2103 const BinarySection &OriginalSection) {
2104 return registerSection(
2105 new BinarySection(*this, SectionName, OriginalSection));
2108 BinarySection &
2109 BinaryContext::registerOrUpdateSection(const Twine &Name, unsigned ELFType,
2110 unsigned ELFFlags, uint8_t *Data,
2111 uint64_t Size, unsigned Alignment) {
2112 auto NamedSections = getSectionByName(Name);
2113 if (NamedSections.begin() != NamedSections.end()) {
2114 assert(std::next(NamedSections.begin()) == NamedSections.end() &&
2115 "can only update unique sections");
2116 BinarySection *Section = NamedSections.begin()->second;
2118 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> ");
2119 const bool Flag = Section->isAllocatable();
2120 (void)Flag;
2121 Section->update(Data, Size, Alignment, ELFType, ELFFlags);
2122 LLVM_DEBUG(dbgs() << *Section << "\n");
2123 // FIXME: Fix section flags/attributes for MachO.
2124 if (isELF())
2125 assert(Flag == Section->isAllocatable() &&
2126 "can't change section allocation status");
2127 return *Section;
2130 return registerSection(
2131 new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags));
2134 void BinaryContext::deregisterSectionName(const BinarySection &Section) {
2135 auto NameRange = NameToSection.equal_range(Section.getName().str());
2136 while (NameRange.first != NameRange.second) {
2137 if (NameRange.first->second == &Section) {
2138 NameToSection.erase(NameRange.first);
2139 break;
2141 ++NameRange.first;
2145 void BinaryContext::deregisterUnusedSections() {
2146 ErrorOr<BinarySection &> AbsSection = getUniqueSectionByName("<absolute>");
2147 for (auto SI = Sections.begin(); SI != Sections.end();) {
2148 BinarySection *Section = *SI;
2149 // We check getOutputData() instead of getOutputSize() because sometimes
2150 // zero-sized .text.cold sections are allocated.
2151 if (Section->hasSectionRef() || Section->getOutputData() ||
2152 (AbsSection && Section == &AbsSection.get())) {
2153 ++SI;
2154 continue;
2157 LLVM_DEBUG(dbgs() << "LLVM-DEBUG: deregistering " << Section->getName()
2158 << '\n';);
2159 deregisterSectionName(*Section);
2160 SI = Sections.erase(SI);
2161 delete Section;
2165 bool BinaryContext::deregisterSection(BinarySection &Section) {
2166 BinarySection *SectionPtr = &Section;
2167 auto Itr = Sections.find(SectionPtr);
2168 if (Itr != Sections.end()) {
2169 auto Range = AddressToSection.equal_range(SectionPtr->getAddress());
2170 while (Range.first != Range.second) {
2171 if (Range.first->second == SectionPtr) {
2172 AddressToSection.erase(Range.first);
2173 break;
2175 ++Range.first;
2178 deregisterSectionName(*SectionPtr);
2179 Sections.erase(Itr);
2180 delete SectionPtr;
2181 return true;
2183 return false;
2186 void BinaryContext::renameSection(BinarySection &Section,
2187 const Twine &NewName) {
2188 auto Itr = Sections.find(&Section);
2189 assert(Itr != Sections.end() && "Section must exist to be renamed.");
2190 Sections.erase(Itr);
2192 deregisterSectionName(Section);
2194 Section.Name = NewName.str();
2195 Section.setOutputName(Section.Name);
2197 NameToSection.insert(std::make_pair(Section.Name, &Section));
2199 // Reinsert with the new name.
2200 Sections.insert(&Section);
2203 void BinaryContext::printSections(raw_ostream &OS) const {
2204 for (BinarySection *const &Section : Sections)
2205 OS << "BOLT-INFO: " << *Section << "\n";
2208 BinarySection &BinaryContext::absoluteSection() {
2209 if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>"))
2210 return *Section;
2211 return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u);
2214 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address,
2215 size_t Size) const {
2216 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2217 if (!Section)
2218 return std::make_error_code(std::errc::bad_address);
2220 if (Section->isVirtual())
2221 return 0;
2223 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
2224 AsmInfo->getCodePointerSize());
2225 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
2226 return DE.getUnsigned(&ValueOffset, Size);
2229 ErrorOr<int64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address,
2230 size_t Size) const {
2231 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2232 if (!Section)
2233 return std::make_error_code(std::errc::bad_address);
2235 if (Section->isVirtual())
2236 return 0;
2238 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
2239 AsmInfo->getCodePointerSize());
2240 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
2241 return DE.getSigned(&ValueOffset, Size);
2244 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol,
2245 uint64_t Type, uint64_t Addend,
2246 uint64_t Value) {
2247 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2248 assert(Section && "cannot find section for address");
2249 Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend,
2250 Value);
2253 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol,
2254 uint64_t Type, uint64_t Addend,
2255 uint64_t Value) {
2256 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2257 assert(Section && "cannot find section for address");
2258 Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type,
2259 Addend, Value);
2262 bool BinaryContext::removeRelocationAt(uint64_t Address) {
2263 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2264 assert(Section && "cannot find section for address");
2265 return Section->removeRelocationAt(Address - Section->getAddress());
2268 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) const {
2269 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2270 if (!Section)
2271 return nullptr;
2273 return Section->getRelocationAt(Address - Section->getAddress());
2276 const Relocation *
2277 BinaryContext::getDynamicRelocationAt(uint64_t Address) const {
2278 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2279 if (!Section)
2280 return nullptr;
2282 return Section->getDynamicRelocationAt(Address - Section->getAddress());
2285 void BinaryContext::markAmbiguousRelocations(BinaryData &BD,
2286 const uint64_t Address) {
2287 auto setImmovable = [&](BinaryData &BD) {
2288 BinaryData *Root = BD.getAtomicRoot();
2289 LLVM_DEBUG(if (Root->isMoveable()) {
2290 dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable "
2291 << "due to ambiguous relocation referencing 0x"
2292 << Twine::utohexstr(Address) << '\n';
2294 Root->setIsMoveable(false);
2297 if (Address == BD.getAddress()) {
2298 setImmovable(BD);
2300 // Set previous symbol as immovable
2301 BinaryData *Prev = getBinaryDataContainingAddress(Address - 1);
2302 if (Prev && Prev->getEndAddress() == BD.getAddress())
2303 setImmovable(*Prev);
2306 if (Address == BD.getEndAddress()) {
2307 setImmovable(BD);
2309 // Set next symbol as immovable
2310 BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress());
2311 if (Next && Next->getAddress() == BD.getEndAddress())
2312 setImmovable(*Next);
2316 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol,
2317 uint64_t *EntryDesc) {
2318 std::shared_lock<llvm::sys::RWMutex> Lock(SymbolToFunctionMapMutex);
2319 auto BFI = SymbolToFunctionMap.find(Symbol);
2320 if (BFI == SymbolToFunctionMap.end())
2321 return nullptr;
2323 BinaryFunction *BF = BFI->second;
2324 if (EntryDesc)
2325 *EntryDesc = BF->getEntryIDForSymbol(Symbol);
2327 return BF;
2330 std::string
2331 BinaryContext::generateBugReportMessage(StringRef Message,
2332 const BinaryFunction &Function) const {
2333 std::string Msg;
2334 raw_string_ostream SS(Msg);
2335 SS << "=======================================\n";
2336 SS << "BOLT is unable to proceed because it couldn't properly understand "
2337 "this function.\n";
2338 SS << "If you are running the most recent version of BOLT, you may "
2339 "want to "
2340 "report this and paste this dump.\nPlease check that there is no "
2341 "sensitive contents being shared in this dump.\n";
2342 SS << "\nOffending function: " << Function.getPrintName() << "\n\n";
2343 ScopedPrinter SP(SS);
2344 SP.printBinaryBlock("Function contents", *Function.getData());
2345 SS << "\n";
2346 const_cast<BinaryFunction &>(Function).print(SS, "");
2347 SS << "ERROR: " << Message;
2348 SS << "\n=======================================\n";
2349 return Msg;
2352 BinaryFunction *
2353 BinaryContext::createInjectedBinaryFunction(const std::string &Name,
2354 bool IsSimple) {
2355 InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple));
2356 BinaryFunction *BF = InjectedBinaryFunctions.back();
2357 setSymbolToFunctionMap(BF->getSymbol(), BF);
2358 BF->CurrentState = BinaryFunction::State::CFG;
2359 return BF;
2362 std::pair<size_t, size_t>
2363 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
2364 // Adjust branch instruction to match the current layout.
2365 if (FixBranches)
2366 BF.fixBranches();
2368 // Create local MC context to isolate the effect of ephemeral code emission.
2369 IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter();
2370 MCContext *LocalCtx = MCEInstance.LocalCtx.get();
2371 MCAsmBackend *MAB =
2372 TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions());
2374 SmallString<256> Code;
2375 raw_svector_ostream VecOS(Code);
2377 std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS);
2378 std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
2379 *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW),
2380 std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI));
2382 Streamer->initSections(false, *STI);
2384 MCSection *Section = MCEInstance.LocalMOFI->getTextSection();
2385 Section->setHasInstructions(true);
2387 // Create symbols in the LocalCtx so that they get destroyed with it.
2388 MCSymbol *StartLabel = LocalCtx->createTempSymbol();
2389 MCSymbol *EndLabel = LocalCtx->createTempSymbol();
2391 Streamer->switchSection(Section);
2392 Streamer->emitLabel(StartLabel);
2393 emitFunctionBody(*Streamer, BF, BF.getLayout().getMainFragment(),
2394 /*EmitCodeOnly=*/true);
2395 Streamer->emitLabel(EndLabel);
2397 using LabelRange = std::pair<const MCSymbol *, const MCSymbol *>;
2398 SmallVector<LabelRange> SplitLabels;
2399 for (FunctionFragment &FF : BF.getLayout().getSplitFragments()) {
2400 MCSymbol *const SplitStartLabel = LocalCtx->createTempSymbol();
2401 MCSymbol *const SplitEndLabel = LocalCtx->createTempSymbol();
2402 SplitLabels.emplace_back(SplitStartLabel, SplitEndLabel);
2404 MCSectionELF *const SplitSection = LocalCtx->getELFSection(
2405 BF.getCodeSectionName(FF.getFragmentNum()), ELF::SHT_PROGBITS,
2406 ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
2407 SplitSection->setHasInstructions(true);
2408 Streamer->switchSection(SplitSection);
2410 Streamer->emitLabel(SplitStartLabel);
2411 emitFunctionBody(*Streamer, BF, FF, /*EmitCodeOnly=*/true);
2412 Streamer->emitLabel(SplitEndLabel);
2415 MCAssembler &Assembler =
2416 static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler();
2417 Assembler.layout();
2419 // Obtain fragment sizes.
2420 std::vector<uint64_t> FragmentSizes;
2421 // Main fragment size.
2422 const uint64_t HotSize = Assembler.getSymbolOffset(*EndLabel) -
2423 Assembler.getSymbolOffset(*StartLabel);
2424 FragmentSizes.push_back(HotSize);
2425 // Split fragment sizes.
2426 uint64_t ColdSize = 0;
2427 for (const auto &Labels : SplitLabels) {
2428 uint64_t Size = Assembler.getSymbolOffset(*Labels.second) -
2429 Assembler.getSymbolOffset(*Labels.first);
2430 FragmentSizes.push_back(Size);
2431 ColdSize += Size;
2434 // Populate new start and end offsets of each basic block.
2435 uint64_t FragmentIndex = 0;
2436 for (FunctionFragment &FF : BF.getLayout().fragments()) {
2437 BinaryBasicBlock *PrevBB = nullptr;
2438 for (BinaryBasicBlock *BB : FF) {
2439 const uint64_t BBStartOffset =
2440 Assembler.getSymbolOffset(*(BB->getLabel()));
2441 BB->setOutputStartAddress(BBStartOffset);
2442 if (PrevBB)
2443 PrevBB->setOutputEndAddress(BBStartOffset);
2444 PrevBB = BB;
2446 if (PrevBB)
2447 PrevBB->setOutputEndAddress(FragmentSizes[FragmentIndex]);
2448 FragmentIndex++;
2451 // Clean-up the effect of the code emission.
2452 for (const MCSymbol &Symbol : Assembler.symbols()) {
2453 MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol);
2454 MutableSymbol->setUndefined();
2455 MutableSymbol->setIsRegistered(false);
2458 return std::make_pair(HotSize, ColdSize);
2461 bool BinaryContext::validateInstructionEncoding(
2462 ArrayRef<uint8_t> InputSequence) const {
2463 MCInst Inst;
2464 uint64_t InstSize;
2465 DisAsm->getInstruction(Inst, InstSize, InputSequence, 0, nulls());
2466 assert(InstSize == InputSequence.size() &&
2467 "Disassembled instruction size does not match the sequence.");
2469 SmallString<256> Code;
2470 SmallVector<MCFixup, 4> Fixups;
2472 MCE->encodeInstruction(Inst, Code, Fixups, *STI);
2473 auto OutputSequence = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size());
2474 if (InputSequence != OutputSequence) {
2475 if (opts::Verbosity > 1) {
2476 this->errs() << "BOLT-WARNING: mismatched encoding detected\n"
2477 << " input: " << InputSequence << '\n'
2478 << " output: " << OutputSequence << '\n';
2480 return false;
2483 return true;
2486 uint64_t BinaryContext::getHotThreshold() const {
2487 static uint64_t Threshold = 0;
2488 if (Threshold == 0) {
2489 Threshold = std::max(
2490 (uint64_t)opts::ExecutionCountThreshold,
2491 NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1);
2493 return Threshold;
2496 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress(
2497 uint64_t Address, bool CheckPastEnd, bool UseMaxSize) {
2498 auto FI = BinaryFunctions.upper_bound(Address);
2499 if (FI == BinaryFunctions.begin())
2500 return nullptr;
2501 --FI;
2503 const uint64_t UsedSize =
2504 UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize();
2506 if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0))
2507 return nullptr;
2509 return &FI->second;
2512 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) {
2513 // First, try to find a function starting at the given address. If the
2514 // function was folded, this will get us the original folded function if it
2515 // wasn't removed from the list, e.g. in non-relocation mode.
2516 auto BFI = BinaryFunctions.find(Address);
2517 if (BFI != BinaryFunctions.end())
2518 return &BFI->second;
2520 // We might have folded the function matching the object at the given
2521 // address. In such case, we look for a function matching the symbol
2522 // registered at the original address. The new function (the one that the
2523 // original was folded into) will hold the symbol.
2524 if (const BinaryData *BD = getBinaryDataAtAddress(Address)) {
2525 uint64_t EntryID = 0;
2526 BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID);
2527 if (BF && EntryID == 0)
2528 return BF;
2530 return nullptr;
2533 /// Deregister JumpTable registered at a given \p Address and delete it.
2534 void BinaryContext::deleteJumpTable(uint64_t Address) {
2535 assert(JumpTables.count(Address) && "Must have a jump table at address");
2536 JumpTable *JT = JumpTables.at(Address);
2537 for (BinaryFunction *Parent : JT->Parents)
2538 Parent->JumpTables.erase(Address);
2539 JumpTables.erase(Address);
2540 delete JT;
2543 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges(
2544 const DWARFAddressRangesVector &InputRanges) const {
2545 DebugAddressRangesVector OutputRanges;
2547 for (const DWARFAddressRange Range : InputRanges) {
2548 auto BFI = BinaryFunctions.lower_bound(Range.LowPC);
2549 while (BFI != BinaryFunctions.end()) {
2550 const BinaryFunction &Function = BFI->second;
2551 if (Function.getAddress() >= Range.HighPC)
2552 break;
2553 const DebugAddressRangesVector FunctionRanges =
2554 Function.getOutputAddressRanges();
2555 llvm::move(FunctionRanges, std::back_inserter(OutputRanges));
2556 std::advance(BFI, 1);
2560 return OutputRanges;
2563 } // namespace bolt
2564 } // namespace llvm