[llvm-shlib] Fix the version naming style of libLLVM for Windows (#85710)
[llvm-project.git] / bolt / lib / Rewrite / RewriteInstance.cpp
blobf5a8a5b71687454fc5a433a7e6626cf18cf596ff
1 //===- bolt/Rewrite/RewriteInstance.cpp - ELF rewriter --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "bolt/Rewrite/RewriteInstance.h"
10 #include "bolt/Core/AddressMap.h"
11 #include "bolt/Core/BinaryContext.h"
12 #include "bolt/Core/BinaryEmitter.h"
13 #include "bolt/Core/BinaryFunction.h"
14 #include "bolt/Core/DebugData.h"
15 #include "bolt/Core/Exceptions.h"
16 #include "bolt/Core/FunctionLayout.h"
17 #include "bolt/Core/MCPlusBuilder.h"
18 #include "bolt/Core/ParallelUtilities.h"
19 #include "bolt/Core/Relocation.h"
20 #include "bolt/Passes/CacheMetrics.h"
21 #include "bolt/Passes/ReorderFunctions.h"
22 #include "bolt/Profile/BoltAddressTranslation.h"
23 #include "bolt/Profile/DataAggregator.h"
24 #include "bolt/Profile/DataReader.h"
25 #include "bolt/Profile/YAMLProfileReader.h"
26 #include "bolt/Profile/YAMLProfileWriter.h"
27 #include "bolt/Rewrite/BinaryPassManager.h"
28 #include "bolt/Rewrite/DWARFRewriter.h"
29 #include "bolt/Rewrite/ExecutableFileMemoryManager.h"
30 #include "bolt/Rewrite/JITLinkLinker.h"
31 #include "bolt/Rewrite/MetadataRewriters.h"
32 #include "bolt/RuntimeLibs/HugifyRuntimeLibrary.h"
33 #include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h"
34 #include "bolt/Utils/CommandLineOpts.h"
35 #include "bolt/Utils/Utils.h"
36 #include "llvm/ADT/AddressRanges.h"
37 #include "llvm/ADT/STLExtras.h"
38 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
39 #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
40 #include "llvm/MC/MCAsmBackend.h"
41 #include "llvm/MC/MCAsmInfo.h"
42 #include "llvm/MC/MCAsmLayout.h"
43 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
44 #include "llvm/MC/MCObjectStreamer.h"
45 #include "llvm/MC/MCStreamer.h"
46 #include "llvm/MC/MCSymbol.h"
47 #include "llvm/MC/TargetRegistry.h"
48 #include "llvm/Object/ObjectFile.h"
49 #include "llvm/Support/Alignment.h"
50 #include "llvm/Support/Casting.h"
51 #include "llvm/Support/CommandLine.h"
52 #include "llvm/Support/DataExtractor.h"
53 #include "llvm/Support/Errc.h"
54 #include "llvm/Support/Error.h"
55 #include "llvm/Support/FileSystem.h"
56 #include "llvm/Support/ManagedStatic.h"
57 #include "llvm/Support/Regex.h"
58 #include "llvm/Support/Timer.h"
59 #include "llvm/Support/ToolOutputFile.h"
60 #include "llvm/Support/raw_ostream.h"
61 #include <algorithm>
62 #include <fstream>
63 #include <memory>
64 #include <optional>
65 #include <system_error>
67 #undef DEBUG_TYPE
68 #define DEBUG_TYPE "bolt"
70 using namespace llvm;
71 using namespace object;
72 using namespace bolt;
74 extern cl::opt<uint32_t> X86AlignBranchBoundary;
75 extern cl::opt<bool> X86AlignBranchWithin32BBoundaries;
77 namespace opts {
79 extern cl::opt<MacroFusionType> AlignMacroOpFusion;
80 extern cl::list<std::string> HotTextMoveSections;
81 extern cl::opt<bool> Hugify;
82 extern cl::opt<bool> Instrument;
83 extern cl::opt<JumpTableSupportLevel> JumpTables;
84 extern cl::list<std::string> ReorderData;
85 extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions;
86 extern cl::opt<bool> TimeBuild;
88 cl::opt<bool> AllowStripped("allow-stripped",
89 cl::desc("allow processing of stripped binaries"),
90 cl::Hidden, cl::cat(BoltCategory));
92 static cl::opt<bool> ForceToDataRelocations(
93 "force-data-relocations",
94 cl::desc("force relocations to data sections to always be processed"),
96 cl::Hidden, cl::cat(BoltCategory));
98 cl::opt<std::string>
99 BoltID("bolt-id",
100 cl::desc("add any string to tag this execution in the "
101 "output binary via bolt info section"),
102 cl::cat(BoltCategory));
104 cl::opt<bool> DumpDotAll(
105 "dump-dot-all",
106 cl::desc("dump function CFGs to graphviz format after each stage;"
107 "enable '-print-loops' for color-coded blocks"),
108 cl::Hidden, cl::cat(BoltCategory));
110 static cl::list<std::string>
111 ForceFunctionNames("funcs",
112 cl::CommaSeparated,
113 cl::desc("limit optimizations to functions from the list"),
114 cl::value_desc("func1,func2,func3,..."),
115 cl::Hidden,
116 cl::cat(BoltCategory));
118 static cl::opt<std::string>
119 FunctionNamesFile("funcs-file",
120 cl::desc("file with list of functions to optimize"),
121 cl::Hidden,
122 cl::cat(BoltCategory));
124 static cl::list<std::string> ForceFunctionNamesNR(
125 "funcs-no-regex", cl::CommaSeparated,
126 cl::desc("limit optimizations to functions from the list (non-regex)"),
127 cl::value_desc("func1,func2,func3,..."), cl::Hidden, cl::cat(BoltCategory));
129 static cl::opt<std::string> FunctionNamesFileNR(
130 "funcs-file-no-regex",
131 cl::desc("file with list of functions to optimize (non-regex)"), cl::Hidden,
132 cl::cat(BoltCategory));
134 cl::opt<bool>
135 KeepTmp("keep-tmp",
136 cl::desc("preserve intermediate .o file"),
137 cl::Hidden,
138 cl::cat(BoltCategory));
140 cl::opt<bool> Lite("lite", cl::desc("skip processing of cold functions"),
141 cl::cat(BoltCategory));
143 static cl::opt<unsigned>
144 LiteThresholdPct("lite-threshold-pct",
145 cl::desc("threshold (in percent) for selecting functions to process in lite "
146 "mode. Higher threshold means fewer functions to process. E.g "
147 "threshold of 90 means only top 10 percent of functions with "
148 "profile will be processed."),
149 cl::init(0),
150 cl::ZeroOrMore,
151 cl::Hidden,
152 cl::cat(BoltOptCategory));
154 static cl::opt<unsigned> LiteThresholdCount(
155 "lite-threshold-count",
156 cl::desc("similar to '-lite-threshold-pct' but specify threshold using "
157 "absolute function call count. I.e. limit processing to functions "
158 "executed at least the specified number of times."),
159 cl::init(0), cl::Hidden, cl::cat(BoltOptCategory));
161 static cl::opt<unsigned>
162 MaxFunctions("max-funcs",
163 cl::desc("maximum number of functions to process"), cl::Hidden,
164 cl::cat(BoltCategory));
166 static cl::opt<unsigned> MaxDataRelocations(
167 "max-data-relocations",
168 cl::desc("maximum number of data relocations to process"), cl::Hidden,
169 cl::cat(BoltCategory));
171 cl::opt<bool> PrintAll("print-all",
172 cl::desc("print functions after each stage"), cl::Hidden,
173 cl::cat(BoltCategory));
175 cl::opt<bool> PrintProfile("print-profile",
176 cl::desc("print functions after attaching profile"),
177 cl::Hidden, cl::cat(BoltCategory));
179 cl::opt<bool> PrintCFG("print-cfg",
180 cl::desc("print functions after CFG construction"),
181 cl::Hidden, cl::cat(BoltCategory));
183 cl::opt<bool> PrintDisasm("print-disasm",
184 cl::desc("print function after disassembly"),
185 cl::Hidden, cl::cat(BoltCategory));
187 static cl::opt<bool>
188 PrintGlobals("print-globals",
189 cl::desc("print global symbols after disassembly"), cl::Hidden,
190 cl::cat(BoltCategory));
192 extern cl::opt<bool> PrintSections;
194 static cl::opt<bool> PrintLoopInfo("print-loops",
195 cl::desc("print loop related information"),
196 cl::Hidden, cl::cat(BoltCategory));
198 static cl::opt<cl::boolOrDefault> RelocationMode(
199 "relocs", cl::desc("use relocations in the binary (default=autodetect)"),
200 cl::cat(BoltCategory));
202 static cl::opt<std::string>
203 SaveProfile("w",
204 cl::desc("save recorded profile to a file"),
205 cl::cat(BoltOutputCategory));
207 static cl::list<std::string>
208 SkipFunctionNames("skip-funcs",
209 cl::CommaSeparated,
210 cl::desc("list of functions to skip"),
211 cl::value_desc("func1,func2,func3,..."),
212 cl::Hidden,
213 cl::cat(BoltCategory));
215 static cl::opt<std::string>
216 SkipFunctionNamesFile("skip-funcs-file",
217 cl::desc("file with list of functions to skip"),
218 cl::Hidden,
219 cl::cat(BoltCategory));
221 cl::opt<bool>
222 TrapOldCode("trap-old-code",
223 cl::desc("insert traps in old function bodies (relocation mode)"),
224 cl::Hidden,
225 cl::cat(BoltCategory));
227 static cl::opt<std::string> DWPPathName("dwp",
228 cl::desc("Path and name to DWP file."),
229 cl::Hidden, cl::init(""),
230 cl::cat(BoltCategory));
232 static cl::opt<bool>
233 UseGnuStack("use-gnu-stack",
234 cl::desc("use GNU_STACK program header for new segment (workaround for "
235 "issues with strip/objcopy)"),
236 cl::ZeroOrMore,
237 cl::cat(BoltCategory));
239 static cl::opt<bool>
240 TimeRewrite("time-rewrite",
241 cl::desc("print time spent in rewriting passes"), cl::Hidden,
242 cl::cat(BoltCategory));
244 static cl::opt<bool>
245 SequentialDisassembly("sequential-disassembly",
246 cl::desc("performs disassembly sequentially"),
247 cl::init(false),
248 cl::cat(BoltOptCategory));
250 static cl::opt<bool> WriteBoltInfoSection(
251 "bolt-info", cl::desc("write bolt info section in the output binary"),
252 cl::init(true), cl::Hidden, cl::cat(BoltOutputCategory));
254 } // namespace opts
256 // FIXME: implement a better way to mark sections for replacement.
257 constexpr const char *RewriteInstance::SectionsToOverwrite[];
258 std::vector<std::string> RewriteInstance::DebugSectionsToOverwrite = {
259 ".debug_abbrev", ".debug_aranges", ".debug_line", ".debug_line_str",
260 ".debug_loc", ".debug_loclists", ".debug_ranges", ".debug_rnglists",
261 ".gdb_index", ".debug_addr", ".debug_abbrev", ".debug_info",
262 ".debug_types", ".pseudo_probe"};
264 const char RewriteInstance::TimerGroupName[] = "rewrite";
265 const char RewriteInstance::TimerGroupDesc[] = "Rewrite passes";
267 namespace llvm {
268 namespace bolt {
270 extern const char *BoltRevision;
272 MCPlusBuilder *createMCPlusBuilder(const Triple::ArchType Arch,
273 const MCInstrAnalysis *Analysis,
274 const MCInstrInfo *Info,
275 const MCRegisterInfo *RegInfo,
276 const MCSubtargetInfo *STI) {
277 #ifdef X86_AVAILABLE
278 if (Arch == Triple::x86_64)
279 return createX86MCPlusBuilder(Analysis, Info, RegInfo, STI);
280 #endif
282 #ifdef AARCH64_AVAILABLE
283 if (Arch == Triple::aarch64)
284 return createAArch64MCPlusBuilder(Analysis, Info, RegInfo, STI);
285 #endif
287 #ifdef RISCV_AVAILABLE
288 if (Arch == Triple::riscv64)
289 return createRISCVMCPlusBuilder(Analysis, Info, RegInfo, STI);
290 #endif
292 llvm_unreachable("architecture unsupported by MCPlusBuilder");
295 } // namespace bolt
296 } // namespace llvm
298 using ELF64LEPhdrTy = ELF64LEFile::Elf_Phdr;
300 namespace {
302 bool refersToReorderedSection(ErrorOr<BinarySection &> Section) {
303 return llvm::any_of(opts::ReorderData, [&](const std::string &SectionName) {
304 return Section && Section->getName() == SectionName;
308 } // anonymous namespace
310 Expected<std::unique_ptr<RewriteInstance>>
311 RewriteInstance::create(ELFObjectFileBase *File, const int Argc,
312 const char *const *Argv, StringRef ToolPath) {
313 Error Err = Error::success();
314 auto RI = std::make_unique<RewriteInstance>(File, Argc, Argv, ToolPath, Err);
315 if (Err)
316 return std::move(Err);
317 return std::move(RI);
320 RewriteInstance::RewriteInstance(ELFObjectFileBase *File, const int Argc,
321 const char *const *Argv, StringRef ToolPath,
322 Error &Err)
323 : InputFile(File), Argc(Argc), Argv(Argv), ToolPath(ToolPath),
324 SHStrTab(StringTableBuilder::ELF) {
325 ErrorAsOutParameter EAO(&Err);
326 auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile);
327 if (!ELF64LEFile) {
328 Err = createStringError(errc::not_supported,
329 "Only 64-bit LE ELF binaries are supported");
330 return;
333 bool IsPIC = false;
334 const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile();
335 if (Obj.getHeader().e_type != ELF::ET_EXEC) {
336 outs() << "BOLT-INFO: shared object or position-independent executable "
337 "detected\n";
338 IsPIC = true;
341 auto BCOrErr = BinaryContext::createBinaryContext(
342 File, IsPIC,
343 DWARFContext::create(*File, DWARFContext::ProcessDebugRelocations::Ignore,
344 nullptr, opts::DWPPathName,
345 WithColor::defaultErrorHandler,
346 WithColor::defaultWarningHandler));
347 if (Error E = BCOrErr.takeError()) {
348 Err = std::move(E);
349 return;
351 BC = std::move(BCOrErr.get());
352 BC->initializeTarget(std::unique_ptr<MCPlusBuilder>(
353 createMCPlusBuilder(BC->TheTriple->getArch(), BC->MIA.get(),
354 BC->MII.get(), BC->MRI.get(), BC->STI.get())));
356 BAT = std::make_unique<BoltAddressTranslation>();
358 if (opts::UpdateDebugSections)
359 DebugInfoRewriter = std::make_unique<DWARFRewriter>(*BC);
361 if (opts::Instrument)
362 BC->setRuntimeLibrary(std::make_unique<InstrumentationRuntimeLibrary>());
363 else if (opts::Hugify)
364 BC->setRuntimeLibrary(std::make_unique<HugifyRuntimeLibrary>());
367 RewriteInstance::~RewriteInstance() {}
369 Error RewriteInstance::setProfile(StringRef Filename) {
370 if (!sys::fs::exists(Filename))
371 return errorCodeToError(make_error_code(errc::no_such_file_or_directory));
373 if (ProfileReader) {
374 // Already exists
375 return make_error<StringError>(Twine("multiple profiles specified: ") +
376 ProfileReader->getFilename() + " and " +
377 Filename,
378 inconvertibleErrorCode());
381 // Spawn a profile reader based on file contents.
382 if (DataAggregator::checkPerfDataMagic(Filename))
383 ProfileReader = std::make_unique<DataAggregator>(Filename);
384 else if (YAMLProfileReader::isYAML(Filename))
385 ProfileReader = std::make_unique<YAMLProfileReader>(Filename);
386 else
387 ProfileReader = std::make_unique<DataReader>(Filename);
389 return Error::success();
392 /// Return true if the function \p BF should be disassembled.
393 static bool shouldDisassemble(const BinaryFunction &BF) {
394 if (BF.isPseudo())
395 return false;
397 if (opts::processAllFunctions())
398 return true;
400 return !BF.isIgnored();
403 // Return if a section stored in the image falls into a segment address space.
404 // If not, Set \p Overlap to true if there's a partial overlap.
405 template <class ELFT>
406 static bool checkOffsets(const typename ELFT::Phdr &Phdr,
407 const typename ELFT::Shdr &Sec, bool &Overlap) {
408 // SHT_NOBITS sections don't need to have an offset inside the segment.
409 if (Sec.sh_type == ELF::SHT_NOBITS)
410 return true;
412 // Only non-empty sections can be at the end of a segment.
413 uint64_t SectionSize = Sec.sh_size ? Sec.sh_size : 1ull;
414 AddressRange SectionAddressRange((uint64_t)Sec.sh_offset,
415 Sec.sh_offset + SectionSize);
416 AddressRange SegmentAddressRange(Phdr.p_offset,
417 Phdr.p_offset + Phdr.p_filesz);
418 if (SegmentAddressRange.contains(SectionAddressRange))
419 return true;
421 Overlap = SegmentAddressRange.intersects(SectionAddressRange);
422 return false;
425 // Check that an allocatable section belongs to a virtual address
426 // space of a segment.
427 template <class ELFT>
428 static bool checkVMA(const typename ELFT::Phdr &Phdr,
429 const typename ELFT::Shdr &Sec, bool &Overlap) {
430 // Only non-empty sections can be at the end of a segment.
431 uint64_t SectionSize = Sec.sh_size ? Sec.sh_size : 1ull;
432 AddressRange SectionAddressRange((uint64_t)Sec.sh_addr,
433 Sec.sh_addr + SectionSize);
434 AddressRange SegmentAddressRange(Phdr.p_vaddr, Phdr.p_vaddr + Phdr.p_memsz);
436 if (SegmentAddressRange.contains(SectionAddressRange))
437 return true;
438 Overlap = SegmentAddressRange.intersects(SectionAddressRange);
439 return false;
442 void RewriteInstance::markGnuRelroSections() {
443 using ELFT = ELF64LE;
444 using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr;
445 auto ELF64LEFile = cast<ELF64LEObjectFile>(InputFile);
446 const ELFFile<ELFT> &Obj = ELF64LEFile->getELFFile();
448 auto handleSection = [&](const ELFT::Phdr &Phdr, SectionRef SecRef) {
449 BinarySection *BinarySection = BC->getSectionForSectionRef(SecRef);
450 // If the section is non-allocatable, ignore it for GNU_RELRO purposes:
451 // it can't be made read-only after runtime relocations processing.
452 if (!BinarySection || !BinarySection->isAllocatable())
453 return;
454 const ELFShdrTy *Sec = cantFail(Obj.getSection(SecRef.getIndex()));
455 bool ImageOverlap{false}, VMAOverlap{false};
456 bool ImageContains = checkOffsets<ELFT>(Phdr, *Sec, ImageOverlap);
457 bool VMAContains = checkVMA<ELFT>(Phdr, *Sec, VMAOverlap);
458 if (ImageOverlap) {
459 if (opts::Verbosity >= 1)
460 errs() << "BOLT-WARNING: GNU_RELRO segment has partial file offset "
461 << "overlap with section " << BinarySection->getName() << '\n';
462 return;
464 if (VMAOverlap) {
465 if (opts::Verbosity >= 1)
466 errs() << "BOLT-WARNING: GNU_RELRO segment has partial VMA overlap "
467 << "with section " << BinarySection->getName() << '\n';
468 return;
470 if (!ImageContains || !VMAContains)
471 return;
472 BinarySection->setRelro();
473 if (opts::Verbosity >= 1)
474 outs() << "BOLT-INFO: marking " << BinarySection->getName()
475 << " as GNU_RELRO\n";
478 for (const ELFT::Phdr &Phdr : cantFail(Obj.program_headers()))
479 if (Phdr.p_type == ELF::PT_GNU_RELRO)
480 for (SectionRef SecRef : InputFile->sections())
481 handleSection(Phdr, SecRef);
484 Error RewriteInstance::discoverStorage() {
485 NamedRegionTimer T("discoverStorage", "discover storage", TimerGroupName,
486 TimerGroupDesc, opts::TimeRewrite);
488 auto ELF64LEFile = cast<ELF64LEObjectFile>(InputFile);
489 const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile();
491 BC->StartFunctionAddress = Obj.getHeader().e_entry;
493 NextAvailableAddress = 0;
494 uint64_t NextAvailableOffset = 0;
495 Expected<ELF64LE::PhdrRange> PHsOrErr = Obj.program_headers();
496 if (Error E = PHsOrErr.takeError())
497 return E;
499 ELF64LE::PhdrRange PHs = PHsOrErr.get();
500 for (const ELF64LE::Phdr &Phdr : PHs) {
501 switch (Phdr.p_type) {
502 case ELF::PT_LOAD:
503 BC->FirstAllocAddress = std::min(BC->FirstAllocAddress,
504 static_cast<uint64_t>(Phdr.p_vaddr));
505 NextAvailableAddress = std::max(NextAvailableAddress,
506 Phdr.p_vaddr + Phdr.p_memsz);
507 NextAvailableOffset = std::max(NextAvailableOffset,
508 Phdr.p_offset + Phdr.p_filesz);
510 BC->SegmentMapInfo[Phdr.p_vaddr] = SegmentInfo{Phdr.p_vaddr,
511 Phdr.p_memsz,
512 Phdr.p_offset,
513 Phdr.p_filesz,
514 Phdr.p_align};
515 break;
516 case ELF::PT_INTERP:
517 BC->HasInterpHeader = true;
518 break;
522 for (const SectionRef &Section : InputFile->sections()) {
523 Expected<StringRef> SectionNameOrErr = Section.getName();
524 if (Error E = SectionNameOrErr.takeError())
525 return E;
526 StringRef SectionName = SectionNameOrErr.get();
527 if (SectionName == ".text") {
528 BC->OldTextSectionAddress = Section.getAddress();
529 BC->OldTextSectionSize = Section.getSize();
531 Expected<StringRef> SectionContentsOrErr = Section.getContents();
532 if (Error E = SectionContentsOrErr.takeError())
533 return E;
534 StringRef SectionContents = SectionContentsOrErr.get();
535 BC->OldTextSectionOffset =
536 SectionContents.data() - InputFile->getData().data();
539 if (!opts::HeatmapMode &&
540 !(opts::AggregateOnly && BAT->enabledFor(InputFile)) &&
541 (SectionName.starts_with(getOrgSecPrefix()) ||
542 SectionName == getBOLTTextSectionName()))
543 return createStringError(
544 errc::function_not_supported,
545 "BOLT-ERROR: input file was processed by BOLT. Cannot re-optimize");
548 if (!NextAvailableAddress || !NextAvailableOffset)
549 return createStringError(errc::executable_format_error,
550 "no PT_LOAD pheader seen");
552 outs() << "BOLT-INFO: first alloc address is 0x"
553 << Twine::utohexstr(BC->FirstAllocAddress) << '\n';
555 FirstNonAllocatableOffset = NextAvailableOffset;
557 NextAvailableAddress = alignTo(NextAvailableAddress, BC->PageAlign);
558 NextAvailableOffset = alignTo(NextAvailableOffset, BC->PageAlign);
560 // Hugify: Additional huge page from left side due to
561 // weird ASLR mapping addresses (4KB aligned)
562 if (opts::Hugify && !BC->HasFixedLoadAddress)
563 NextAvailableAddress += BC->PageAlign;
565 if (!opts::UseGnuStack) {
566 // This is where the black magic happens. Creating PHDR table in a segment
567 // other than that containing ELF header is tricky. Some loaders and/or
568 // parts of loaders will apply e_phoff from ELF header assuming both are in
569 // the same segment, while others will do the proper calculation.
570 // We create the new PHDR table in such a way that both of the methods
571 // of loading and locating the table work. There's a slight file size
572 // overhead because of that.
574 // NB: bfd's strip command cannot do the above and will corrupt the
575 // binary during the process of stripping non-allocatable sections.
576 if (NextAvailableOffset <= NextAvailableAddress - BC->FirstAllocAddress)
577 NextAvailableOffset = NextAvailableAddress - BC->FirstAllocAddress;
578 else
579 NextAvailableAddress = NextAvailableOffset + BC->FirstAllocAddress;
581 assert(NextAvailableOffset ==
582 NextAvailableAddress - BC->FirstAllocAddress &&
583 "PHDR table address calculation error");
585 outs() << "BOLT-INFO: creating new program header table at address 0x"
586 << Twine::utohexstr(NextAvailableAddress) << ", offset 0x"
587 << Twine::utohexstr(NextAvailableOffset) << '\n';
589 PHDRTableAddress = NextAvailableAddress;
590 PHDRTableOffset = NextAvailableOffset;
592 // Reserve space for 3 extra pheaders.
593 unsigned Phnum = Obj.getHeader().e_phnum;
594 Phnum += 3;
596 NextAvailableAddress += Phnum * sizeof(ELF64LEPhdrTy);
597 NextAvailableOffset += Phnum * sizeof(ELF64LEPhdrTy);
600 // Align at cache line.
601 NextAvailableAddress = alignTo(NextAvailableAddress, 64);
602 NextAvailableOffset = alignTo(NextAvailableOffset, 64);
604 NewTextSegmentAddress = NextAvailableAddress;
605 NewTextSegmentOffset = NextAvailableOffset;
606 BC->LayoutStartAddress = NextAvailableAddress;
608 // Tools such as objcopy can strip section contents but leave header
609 // entries. Check that at least .text is mapped in the file.
610 if (!getFileOffsetForAddress(BC->OldTextSectionAddress))
611 return createStringError(errc::executable_format_error,
612 "BOLT-ERROR: input binary is not a valid ELF "
613 "executable as its text section is not "
614 "mapped to a valid segment");
615 return Error::success();
618 void RewriteInstance::parseBuildID() {
619 if (!BuildIDSection)
620 return;
622 StringRef Buf = BuildIDSection->getContents();
624 // Reading notes section (see Portable Formats Specification, Version 1.1,
625 // pg 2-5, section "Note Section").
626 DataExtractor DE =
627 DataExtractor(Buf,
628 /*IsLittleEndian=*/true, InputFile->getBytesInAddress());
629 uint64_t Offset = 0;
630 if (!DE.isValidOffset(Offset))
631 return;
632 uint32_t NameSz = DE.getU32(&Offset);
633 if (!DE.isValidOffset(Offset))
634 return;
635 uint32_t DescSz = DE.getU32(&Offset);
636 if (!DE.isValidOffset(Offset))
637 return;
638 uint32_t Type = DE.getU32(&Offset);
640 LLVM_DEBUG(dbgs() << "NameSz = " << NameSz << "; DescSz = " << DescSz
641 << "; Type = " << Type << "\n");
643 // Type 3 is a GNU build-id note section
644 if (Type != 3)
645 return;
647 StringRef Name = Buf.slice(Offset, Offset + NameSz);
648 Offset = alignTo(Offset + NameSz, 4);
649 if (Name.substr(0, 3) != "GNU")
650 return;
652 BuildID = Buf.slice(Offset, Offset + DescSz);
655 std::optional<std::string> RewriteInstance::getPrintableBuildID() const {
656 if (BuildID.empty())
657 return std::nullopt;
659 std::string Str;
660 raw_string_ostream OS(Str);
661 const unsigned char *CharIter = BuildID.bytes_begin();
662 while (CharIter != BuildID.bytes_end()) {
663 if (*CharIter < 0x10)
664 OS << "0";
665 OS << Twine::utohexstr(*CharIter);
666 ++CharIter;
668 return OS.str();
671 void RewriteInstance::patchBuildID() {
672 raw_fd_ostream &OS = Out->os();
674 if (BuildID.empty())
675 return;
677 size_t IDOffset = BuildIDSection->getContents().rfind(BuildID);
678 assert(IDOffset != StringRef::npos && "failed to patch build-id");
680 uint64_t FileOffset = getFileOffsetForAddress(BuildIDSection->getAddress());
681 if (!FileOffset) {
682 errs() << "BOLT-WARNING: Non-allocatable build-id will not be updated.\n";
683 return;
686 char LastIDByte = BuildID[BuildID.size() - 1];
687 LastIDByte ^= 1;
688 OS.pwrite(&LastIDByte, 1, FileOffset + IDOffset + BuildID.size() - 1);
690 outs() << "BOLT-INFO: patched build-id (flipped last bit)\n";
693 Error RewriteInstance::run() {
694 assert(BC && "failed to create a binary context");
696 outs() << "BOLT-INFO: Target architecture: "
697 << Triple::getArchTypeName(
698 (llvm::Triple::ArchType)InputFile->getArch())
699 << "\n";
700 outs() << "BOLT-INFO: BOLT version: " << BoltRevision << "\n";
702 if (Error E = discoverStorage())
703 return E;
704 if (Error E = readSpecialSections())
705 return E;
706 adjustCommandLineOptions();
707 discoverFileObjects();
709 if (opts::Instrument && !BC->IsStaticExecutable)
710 if (Error E = discoverRtFiniAddress())
711 return E;
713 preprocessProfileData();
715 // Skip disassembling if we have a translation table and we are running an
716 // aggregation job.
717 if (opts::AggregateOnly && BAT->enabledFor(InputFile)) {
718 processProfileData();
719 return Error::success();
722 selectFunctionsToProcess();
724 readDebugInfo();
726 disassembleFunctions();
728 processMetadataPreCFG();
730 buildFunctionsCFG();
732 processProfileData();
734 postProcessFunctions();
736 processMetadataPostCFG();
738 if (opts::DiffOnly)
739 return Error::success();
741 preregisterSections();
743 runOptimizationPasses();
745 emitAndLink();
747 updateMetadata();
749 if (opts::Instrument && !BC->IsStaticExecutable)
750 updateRtFiniReloc();
752 if (opts::LinuxKernelMode) {
753 errs() << "BOLT-WARNING: not writing the output file for Linux Kernel\n";
754 return Error::success();
755 } else if (opts::OutputFilename == "/dev/null") {
756 outs() << "BOLT-INFO: skipping writing final binary to disk\n";
757 return Error::success();
760 // Rewrite allocatable contents and copy non-allocatable parts with mods.
761 rewriteFile();
762 return Error::success();
765 void RewriteInstance::discoverFileObjects() {
766 NamedRegionTimer T("discoverFileObjects", "discover file objects",
767 TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
769 // For local symbols we want to keep track of associated FILE symbol name for
770 // disambiguation by combined name.
771 StringRef FileSymbolName;
772 bool SeenFileName = false;
773 struct SymbolRefHash {
774 size_t operator()(SymbolRef const &S) const {
775 return std::hash<decltype(DataRefImpl::p)>{}(S.getRawDataRefImpl().p);
778 std::unordered_map<SymbolRef, StringRef, SymbolRefHash> SymbolToFileName;
779 for (const ELFSymbolRef &Symbol : InputFile->symbols()) {
780 Expected<StringRef> NameOrError = Symbol.getName();
781 if (NameOrError && NameOrError->starts_with("__asan_init")) {
782 errs() << "BOLT-ERROR: input file was compiled or linked with sanitizer "
783 "support. Cannot optimize.\n";
784 exit(1);
786 if (NameOrError && NameOrError->starts_with("__llvm_coverage_mapping")) {
787 errs() << "BOLT-ERROR: input file was compiled or linked with coverage "
788 "support. Cannot optimize.\n";
789 exit(1);
792 if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Undefined)
793 continue;
795 if (cantFail(Symbol.getType()) == SymbolRef::ST_File) {
796 StringRef Name =
797 cantFail(std::move(NameOrError), "cannot get symbol name for file");
798 // Ignore Clang LTO artificial FILE symbol as it is not always generated,
799 // and this uncertainty is causing havoc in function name matching.
800 if (Name == "ld-temp.o")
801 continue;
802 FileSymbolName = Name;
803 SeenFileName = true;
804 continue;
806 if (!FileSymbolName.empty() &&
807 !(cantFail(Symbol.getFlags()) & SymbolRef::SF_Global))
808 SymbolToFileName[Symbol] = FileSymbolName;
811 // Sort symbols in the file by value. Ignore symbols from non-allocatable
812 // sections. We memoize getAddress(), as it has rather high overhead.
813 struct SymbolInfo {
814 uint64_t Address;
815 SymbolRef Symbol;
817 std::vector<SymbolInfo> SortedSymbols;
818 auto isSymbolInMemory = [this](const SymbolRef &Sym) {
819 if (cantFail(Sym.getType()) == SymbolRef::ST_File)
820 return false;
821 if (cantFail(Sym.getFlags()) & SymbolRef::SF_Absolute)
822 return true;
823 if (cantFail(Sym.getFlags()) & SymbolRef::SF_Undefined)
824 return false;
825 BinarySection Section(*BC, *cantFail(Sym.getSection()));
826 return Section.isAllocatable();
828 for (const SymbolRef &Symbol : InputFile->symbols())
829 if (isSymbolInMemory(Symbol))
830 SortedSymbols.push_back({cantFail(Symbol.getAddress()), Symbol});
832 auto CompareSymbols = [this](const SymbolInfo &A, const SymbolInfo &B) {
833 if (A.Address != B.Address)
834 return A.Address < B.Address;
836 const bool AMarker = BC->isMarker(A.Symbol);
837 const bool BMarker = BC->isMarker(B.Symbol);
838 if (AMarker || BMarker) {
839 return AMarker && !BMarker;
842 const auto AType = cantFail(A.Symbol.getType());
843 const auto BType = cantFail(B.Symbol.getType());
844 if (AType == SymbolRef::ST_Function && BType != SymbolRef::ST_Function)
845 return true;
846 if (BType == SymbolRef::ST_Debug && AType != SymbolRef::ST_Debug)
847 return true;
849 return false;
851 llvm::stable_sort(SortedSymbols, CompareSymbols);
853 auto LastSymbol = SortedSymbols.end();
854 if (!SortedSymbols.empty())
855 --LastSymbol;
857 // For aarch64, the ABI defines mapping symbols so we identify data in the
858 // code section (see IHI0056B). $d identifies data contents.
859 // Compilers usually merge multiple data objects in a single $d-$x interval,
860 // but we need every data object to be marked with $d. Because of that we
861 // create a vector of MarkerSyms with all locations of data objects.
863 struct MarkerSym {
864 uint64_t Address;
865 MarkerSymType Type;
868 std::vector<MarkerSym> SortedMarkerSymbols;
869 auto addExtraDataMarkerPerSymbol = [&]() {
870 bool IsData = false;
871 uint64_t LastAddr = 0;
872 for (const auto &SymInfo : SortedSymbols) {
873 if (LastAddr == SymInfo.Address) // don't repeat markers
874 continue;
876 MarkerSymType MarkerType = BC->getMarkerType(SymInfo.Symbol);
877 if (MarkerType != MarkerSymType::NONE) {
878 SortedMarkerSymbols.push_back(MarkerSym{SymInfo.Address, MarkerType});
879 LastAddr = SymInfo.Address;
880 IsData = MarkerType == MarkerSymType::DATA;
881 continue;
884 if (IsData) {
885 SortedMarkerSymbols.push_back({SymInfo.Address, MarkerSymType::DATA});
886 LastAddr = SymInfo.Address;
891 if (BC->isAArch64() || BC->isRISCV()) {
892 addExtraDataMarkerPerSymbol();
893 LastSymbol = std::stable_partition(
894 SortedSymbols.begin(), SortedSymbols.end(),
895 [this](const SymbolInfo &S) { return !BC->isMarker(S.Symbol); });
896 if (!SortedSymbols.empty())
897 --LastSymbol;
900 BinaryFunction *PreviousFunction = nullptr;
901 unsigned AnonymousId = 0;
903 // Regex object for matching cold fragments.
904 const Regex ColdFragment(".*\\.cold(\\.[0-9]+)?");
906 const auto SortedSymbolsEnd =
907 LastSymbol == SortedSymbols.end() ? LastSymbol : std::next(LastSymbol);
908 for (auto Iter = SortedSymbols.begin(); Iter != SortedSymbolsEnd; ++Iter) {
909 const SymbolRef &Symbol = Iter->Symbol;
910 const uint64_t SymbolAddress = Iter->Address;
911 const auto SymbolFlags = cantFail(Symbol.getFlags());
912 const SymbolRef::Type SymbolType = cantFail(Symbol.getType());
914 if (SymbolType == SymbolRef::ST_File)
915 continue;
917 StringRef SymName = cantFail(Symbol.getName(), "cannot get symbol name");
918 if (SymbolAddress == 0) {
919 if (opts::Verbosity >= 1 && SymbolType == SymbolRef::ST_Function)
920 errs() << "BOLT-WARNING: function with 0 address seen\n";
921 continue;
924 // Ignore input hot markers
925 if (SymName == "__hot_start" || SymName == "__hot_end")
926 continue;
928 FileSymRefs[SymbolAddress] = Symbol;
930 // Skip section symbols that will be registered by disassemblePLT().
931 if (SymbolType == SymbolRef::ST_Debug) {
932 ErrorOr<BinarySection &> BSection =
933 BC->getSectionForAddress(SymbolAddress);
934 if (BSection && getPLTSectionInfo(BSection->getName()))
935 continue;
938 /// It is possible we are seeing a globalized local. LLVM might treat it as
939 /// a local if it has a "private global" prefix, e.g. ".L". Thus we have to
940 /// change the prefix to enforce global scope of the symbol.
941 std::string Name =
942 SymName.starts_with(BC->AsmInfo->getPrivateGlobalPrefix())
943 ? "PG" + std::string(SymName)
944 : std::string(SymName);
946 // Disambiguate all local symbols before adding to symbol table.
947 // Since we don't know if we will see a global with the same name,
948 // always modify the local name.
950 // NOTE: the naming convention for local symbols should match
951 // the one we use for profile data.
952 std::string UniqueName;
953 std::string AlternativeName;
954 if (Name.empty()) {
955 UniqueName = "ANONYMOUS." + std::to_string(AnonymousId++);
956 } else if (SymbolFlags & SymbolRef::SF_Global) {
957 if (const BinaryData *BD = BC->getBinaryDataByName(Name)) {
958 if (BD->getSize() == ELFSymbolRef(Symbol).getSize() &&
959 BD->getAddress() == SymbolAddress) {
960 if (opts::Verbosity > 1)
961 errs() << "BOLT-WARNING: ignoring duplicate global symbol " << Name
962 << "\n";
963 // Ignore duplicate entry - possibly a bug in the linker
964 continue;
966 errs() << "BOLT-ERROR: bad input binary, global symbol \"" << Name
967 << "\" is not unique\n";
968 exit(1);
970 UniqueName = Name;
971 } else {
972 // If we have a local file name, we should create 2 variants for the
973 // function name. The reason is that perf profile might have been
974 // collected on a binary that did not have the local file name (e.g. as
975 // a side effect of stripping debug info from the binary):
977 // primary: <function>/<id>
978 // alternative: <function>/<file>/<id2>
980 // The <id> field is used for disambiguation of local symbols since there
981 // could be identical function names coming from identical file names
982 // (e.g. from different directories).
983 std::string AltPrefix;
984 auto SFI = SymbolToFileName.find(Symbol);
985 if (SymbolType == SymbolRef::ST_Function && SFI != SymbolToFileName.end())
986 AltPrefix = Name + "/" + std::string(SFI->second);
988 UniqueName = NR.uniquify(Name);
989 if (!AltPrefix.empty())
990 AlternativeName = NR.uniquify(AltPrefix);
993 uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize();
994 uint64_t SymbolAlignment = Symbol.getAlignment();
996 auto registerName = [&](uint64_t FinalSize) {
997 // Register names even if it's not a function, e.g. for an entry point.
998 BC->registerNameAtAddress(UniqueName, SymbolAddress, FinalSize,
999 SymbolAlignment, SymbolFlags);
1000 if (!AlternativeName.empty())
1001 BC->registerNameAtAddress(AlternativeName, SymbolAddress, FinalSize,
1002 SymbolAlignment, SymbolFlags);
1005 section_iterator Section =
1006 cantFail(Symbol.getSection(), "cannot get symbol section");
1007 if (Section == InputFile->section_end()) {
1008 // Could be an absolute symbol. Used on RISC-V for __global_pointer$ so we
1009 // need to record it to handle relocations against it. For other instances
1010 // of absolute symbols, we record for pretty printing.
1011 LLVM_DEBUG(if (opts::Verbosity > 1) {
1012 dbgs() << "BOLT-INFO: absolute sym " << UniqueName << "\n";
1014 registerName(SymbolSize);
1015 continue;
1018 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: considering symbol " << UniqueName
1019 << " for function\n");
1021 if (SymbolAddress == Section->getAddress() + Section->getSize()) {
1022 assert(SymbolSize == 0 &&
1023 "unexpect non-zero sized symbol at end of section");
1024 LLVM_DEBUG(
1025 dbgs()
1026 << "BOLT-DEBUG: rejecting as symbol points to end of its section\n");
1027 registerName(SymbolSize);
1028 continue;
1031 if (!Section->isText()) {
1032 assert(SymbolType != SymbolRef::ST_Function &&
1033 "unexpected function inside non-code section");
1034 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: rejecting as symbol is not in code\n");
1035 registerName(SymbolSize);
1036 continue;
1039 // Assembly functions could be ST_NONE with 0 size. Check that the
1040 // corresponding section is a code section and they are not inside any
1041 // other known function to consider them.
1043 // Sometimes assembly functions are not marked as functions and neither are
1044 // their local labels. The only way to tell them apart is to look at
1045 // symbol scope - global vs local.
1046 if (PreviousFunction && SymbolType != SymbolRef::ST_Function) {
1047 if (PreviousFunction->containsAddress(SymbolAddress)) {
1048 if (PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) {
1049 LLVM_DEBUG(dbgs()
1050 << "BOLT-DEBUG: symbol is a function local symbol\n");
1051 } else if (SymbolAddress == PreviousFunction->getAddress() &&
1052 !SymbolSize) {
1053 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring symbol as a marker\n");
1054 } else if (opts::Verbosity > 1) {
1055 errs() << "BOLT-WARNING: symbol " << UniqueName
1056 << " seen in the middle of function " << *PreviousFunction
1057 << ". Could be a new entry.\n";
1059 registerName(SymbolSize);
1060 continue;
1061 } else if (PreviousFunction->getSize() == 0 &&
1062 PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) {
1063 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: symbol is a function local symbol\n");
1064 registerName(SymbolSize);
1065 continue;
1069 if (PreviousFunction && PreviousFunction->containsAddress(SymbolAddress) &&
1070 PreviousFunction->getAddress() != SymbolAddress) {
1071 if (PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) {
1072 if (opts::Verbosity >= 1)
1073 outs() << "BOLT-INFO: skipping possibly another entry for function "
1074 << *PreviousFunction << " : " << UniqueName << '\n';
1075 registerName(SymbolSize);
1076 } else {
1077 outs() << "BOLT-INFO: using " << UniqueName << " as another entry to "
1078 << "function " << *PreviousFunction << '\n';
1080 registerName(0);
1082 PreviousFunction->addEntryPointAtOffset(SymbolAddress -
1083 PreviousFunction->getAddress());
1085 // Remove the symbol from FileSymRefs so that we can skip it from
1086 // in the future.
1087 auto SI = FileSymRefs.find(SymbolAddress);
1088 assert(SI != FileSymRefs.end() && "symbol expected to be present");
1089 assert(SI->second == Symbol && "wrong symbol found");
1090 FileSymRefs.erase(SI);
1092 continue;
1095 // Checkout for conflicts with function data from FDEs.
1096 bool IsSimple = true;
1097 auto FDEI = CFIRdWrt->getFDEs().lower_bound(SymbolAddress);
1098 if (FDEI != CFIRdWrt->getFDEs().end()) {
1099 const dwarf::FDE &FDE = *FDEI->second;
1100 if (FDEI->first != SymbolAddress) {
1101 // There's no matching starting address in FDE. Make sure the previous
1102 // FDE does not contain this address.
1103 if (FDEI != CFIRdWrt->getFDEs().begin()) {
1104 --FDEI;
1105 const dwarf::FDE &PrevFDE = *FDEI->second;
1106 uint64_t PrevStart = PrevFDE.getInitialLocation();
1107 uint64_t PrevLength = PrevFDE.getAddressRange();
1108 if (SymbolAddress > PrevStart &&
1109 SymbolAddress < PrevStart + PrevLength) {
1110 errs() << "BOLT-ERROR: function " << UniqueName
1111 << " is in conflict with FDE ["
1112 << Twine::utohexstr(PrevStart) << ", "
1113 << Twine::utohexstr(PrevStart + PrevLength)
1114 << "). Skipping.\n";
1115 IsSimple = false;
1118 } else if (FDE.getAddressRange() != SymbolSize) {
1119 if (SymbolSize) {
1120 // Function addresses match but sizes differ.
1121 errs() << "BOLT-WARNING: sizes differ for function " << UniqueName
1122 << ". FDE : " << FDE.getAddressRange()
1123 << "; symbol table : " << SymbolSize << ". Using max size.\n";
1125 SymbolSize = std::max(SymbolSize, FDE.getAddressRange());
1126 if (BC->getBinaryDataAtAddress(SymbolAddress)) {
1127 BC->setBinaryDataSize(SymbolAddress, SymbolSize);
1128 } else {
1129 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: No BD @ 0x"
1130 << Twine::utohexstr(SymbolAddress) << "\n");
1135 BinaryFunction *BF = nullptr;
1136 // Since function may not have yet obtained its real size, do a search
1137 // using the list of registered functions instead of calling
1138 // getBinaryFunctionAtAddress().
1139 auto BFI = BC->getBinaryFunctions().find(SymbolAddress);
1140 if (BFI != BC->getBinaryFunctions().end()) {
1141 BF = &BFI->second;
1142 // Duplicate the function name. Make sure everything matches before we add
1143 // an alternative name.
1144 if (SymbolSize != BF->getSize()) {
1145 if (opts::Verbosity >= 1) {
1146 if (SymbolSize && BF->getSize())
1147 errs() << "BOLT-WARNING: size mismatch for duplicate entries "
1148 << *BF << " and " << UniqueName << '\n';
1149 outs() << "BOLT-INFO: adjusting size of function " << *BF << " old "
1150 << BF->getSize() << " new " << SymbolSize << "\n";
1152 BF->setSize(std::max(SymbolSize, BF->getSize()));
1153 BC->setBinaryDataSize(SymbolAddress, BF->getSize());
1155 BF->addAlternativeName(UniqueName);
1156 } else {
1157 ErrorOr<BinarySection &> Section =
1158 BC->getSectionForAddress(SymbolAddress);
1159 // Skip symbols from invalid sections
1160 if (!Section) {
1161 errs() << "BOLT-WARNING: " << UniqueName << " (0x"
1162 << Twine::utohexstr(SymbolAddress)
1163 << ") does not have any section\n";
1164 continue;
1167 // Skip symbols from zero-sized sections.
1168 if (!Section->getSize())
1169 continue;
1171 BF = BC->createBinaryFunction(UniqueName, *Section, SymbolAddress,
1172 SymbolSize);
1173 if (!IsSimple)
1174 BF->setSimple(false);
1177 // Check if it's a cold function fragment.
1178 if (ColdFragment.match(SymName)) {
1179 static bool PrintedWarning = false;
1180 if (!PrintedWarning) {
1181 PrintedWarning = true;
1182 errs() << "BOLT-WARNING: split function detected on input : "
1183 << SymName;
1184 if (BC->HasRelocations)
1185 errs() << ". The support is limited in relocation mode\n";
1186 else
1187 errs() << '\n';
1189 BC->HasSplitFunctions = true;
1190 BF->IsFragment = true;
1193 if (!AlternativeName.empty())
1194 BF->addAlternativeName(AlternativeName);
1196 registerName(SymbolSize);
1197 PreviousFunction = BF;
1200 // Read dynamic relocation first as their presence affects the way we process
1201 // static relocations. E.g. we will ignore a static relocation at an address
1202 // that is a subject to dynamic relocation processing.
1203 processDynamicRelocations();
1205 // Process PLT section.
1206 disassemblePLT();
1208 // See if we missed any functions marked by FDE.
1209 for (const auto &FDEI : CFIRdWrt->getFDEs()) {
1210 const uint64_t Address = FDEI.first;
1211 const dwarf::FDE *FDE = FDEI.second;
1212 const BinaryFunction *BF = BC->getBinaryFunctionAtAddress(Address);
1213 if (BF)
1214 continue;
1216 BF = BC->getBinaryFunctionContainingAddress(Address);
1217 if (BF) {
1218 errs() << "BOLT-WARNING: FDE [0x" << Twine::utohexstr(Address) << ", 0x"
1219 << Twine::utohexstr(Address + FDE->getAddressRange())
1220 << ") conflicts with function " << *BF << '\n';
1221 continue;
1224 if (opts::Verbosity >= 1)
1225 errs() << "BOLT-WARNING: FDE [0x" << Twine::utohexstr(Address) << ", 0x"
1226 << Twine::utohexstr(Address + FDE->getAddressRange())
1227 << ") has no corresponding symbol table entry\n";
1229 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(Address);
1230 assert(Section && "cannot get section for address from FDE");
1231 std::string FunctionName =
1232 "__BOLT_FDE_FUNCat" + Twine::utohexstr(Address).str();
1233 BC->createBinaryFunction(FunctionName, *Section, Address,
1234 FDE->getAddressRange());
1237 BC->setHasSymbolsWithFileName(SeenFileName);
1239 // Now that all the functions were created - adjust their boundaries.
1240 adjustFunctionBoundaries();
1242 // Annotate functions with code/data markers in AArch64
1243 for (auto ISym = SortedMarkerSymbols.begin();
1244 ISym != SortedMarkerSymbols.end(); ++ISym) {
1246 auto *BF =
1247 BC->getBinaryFunctionContainingAddress(ISym->Address, true, true);
1249 if (!BF) {
1250 // Stray marker
1251 continue;
1253 const auto EntryOffset = ISym->Address - BF->getAddress();
1254 if (ISym->Type == MarkerSymType::CODE) {
1255 BF->markCodeAtOffset(EntryOffset);
1256 continue;
1258 if (ISym->Type == MarkerSymType::DATA) {
1259 BF->markDataAtOffset(EntryOffset);
1260 BC->AddressToConstantIslandMap[ISym->Address] = BF;
1261 continue;
1263 llvm_unreachable("Unknown marker");
1266 if (BC->isAArch64()) {
1267 // Check for dynamic relocations that might be contained in
1268 // constant islands.
1269 for (const BinarySection &Section : BC->allocatableSections()) {
1270 const uint64_t SectionAddress = Section.getAddress();
1271 for (const Relocation &Rel : Section.dynamicRelocations()) {
1272 const uint64_t RelAddress = SectionAddress + Rel.Offset;
1273 BinaryFunction *BF =
1274 BC->getBinaryFunctionContainingAddress(RelAddress,
1275 /*CheckPastEnd*/ false,
1276 /*UseMaxSize*/ true);
1277 if (BF) {
1278 assert(Rel.isRelative() && "Expected relative relocation for island");
1279 BF->markIslandDynamicRelocationAtAddress(RelAddress);
1285 if (!opts::LinuxKernelMode) {
1286 // Read all relocations now that we have binary functions mapped.
1287 processRelocations();
1290 registerFragments();
1293 Error RewriteInstance::discoverRtFiniAddress() {
1294 // Use DT_FINI if it's available.
1295 if (BC->FiniAddress) {
1296 BC->FiniFunctionAddress = BC->FiniAddress;
1297 return Error::success();
1300 if (!BC->FiniArrayAddress || !BC->FiniArraySize) {
1301 return createStringError(
1302 std::errc::not_supported,
1303 "Instrumentation needs either DT_FINI or DT_FINI_ARRAY");
1306 if (*BC->FiniArraySize < BC->AsmInfo->getCodePointerSize()) {
1307 return createStringError(std::errc::not_supported,
1308 "Need at least 1 DT_FINI_ARRAY slot");
1311 ErrorOr<BinarySection &> FiniArraySection =
1312 BC->getSectionForAddress(*BC->FiniArrayAddress);
1313 if (auto EC = FiniArraySection.getError())
1314 return errorCodeToError(EC);
1316 if (const Relocation *Reloc = FiniArraySection->getDynamicRelocationAt(0)) {
1317 BC->FiniFunctionAddress = Reloc->Addend;
1318 return Error::success();
1321 if (const Relocation *Reloc = FiniArraySection->getRelocationAt(0)) {
1322 BC->FiniFunctionAddress = Reloc->Value;
1323 return Error::success();
1326 return createStringError(std::errc::not_supported,
1327 "No relocation for first DT_FINI_ARRAY slot");
1330 void RewriteInstance::updateRtFiniReloc() {
1331 // Updating DT_FINI is handled by patchELFDynamic.
1332 if (BC->FiniAddress)
1333 return;
1335 const RuntimeLibrary *RT = BC->getRuntimeLibrary();
1336 if (!RT || !RT->getRuntimeFiniAddress())
1337 return;
1339 assert(BC->FiniArrayAddress && BC->FiniArraySize &&
1340 "inconsistent .fini_array state");
1342 ErrorOr<BinarySection &> FiniArraySection =
1343 BC->getSectionForAddress(*BC->FiniArrayAddress);
1344 assert(FiniArraySection && ".fini_array removed");
1346 if (std::optional<Relocation> Reloc =
1347 FiniArraySection->takeDynamicRelocationAt(0)) {
1348 assert(Reloc->Addend == BC->FiniFunctionAddress &&
1349 "inconsistent .fini_array dynamic relocation");
1350 Reloc->Addend = RT->getRuntimeFiniAddress();
1351 FiniArraySection->addDynamicRelocation(*Reloc);
1354 // Update the static relocation by adding a pending relocation which will get
1355 // patched when flushPendingRelocations is called in rewriteFile. Note that
1356 // flushPendingRelocations will calculate the value to patch as
1357 // "Symbol + Addend". Since we don't have a symbol, just set the addend to the
1358 // desired value.
1359 FiniArraySection->addPendingRelocation(Relocation{
1360 /*Offset*/ 0, /*Symbol*/ nullptr, /*Type*/ Relocation::getAbs64(),
1361 /*Addend*/ RT->getRuntimeFiniAddress(), /*Value*/ 0});
1364 void RewriteInstance::registerFragments() {
1365 if (!BC->HasSplitFunctions)
1366 return;
1368 for (auto &BFI : BC->getBinaryFunctions()) {
1369 BinaryFunction &Function = BFI.second;
1370 if (!Function.isFragment())
1371 continue;
1372 unsigned ParentsFound = 0;
1373 for (StringRef Name : Function.getNames()) {
1374 StringRef BaseName, Suffix;
1375 std::tie(BaseName, Suffix) = Name.split('/');
1376 const size_t ColdSuffixPos = BaseName.find(".cold");
1377 if (ColdSuffixPos == StringRef::npos)
1378 continue;
1379 // For cold function with local (foo.cold/1) symbol, prefer a parent with
1380 // local symbol as well (foo/1) over global symbol (foo).
1381 std::string ParentName = BaseName.substr(0, ColdSuffixPos).str();
1382 const BinaryData *BD = BC->getBinaryDataByName(ParentName);
1383 if (Suffix != "") {
1384 ParentName.append(Twine("/", Suffix).str());
1385 const BinaryData *BDLocal = BC->getBinaryDataByName(ParentName);
1386 if (BDLocal || !BD)
1387 BD = BDLocal;
1389 if (!BD) {
1390 if (opts::Verbosity >= 1)
1391 outs() << "BOLT-INFO: parent function not found for " << Name << "\n";
1392 continue;
1394 const uint64_t Address = BD->getAddress();
1395 BinaryFunction *BF = BC->getBinaryFunctionAtAddress(Address);
1396 if (!BF) {
1397 if (opts::Verbosity >= 1)
1398 outs() << formatv("BOLT-INFO: parent function not found at {0:x}\n",
1399 Address);
1400 continue;
1402 BC->registerFragment(Function, *BF);
1403 ++ParentsFound;
1405 if (!ParentsFound) {
1406 errs() << "BOLT-ERROR: parent function not found for " << Function
1407 << '\n';
1408 exit(1);
1413 void RewriteInstance::createPLTBinaryFunction(uint64_t TargetAddress,
1414 uint64_t EntryAddress,
1415 uint64_t EntrySize) {
1416 if (!TargetAddress)
1417 return;
1419 auto setPLTSymbol = [&](BinaryFunction *BF, StringRef Name) {
1420 const unsigned PtrSize = BC->AsmInfo->getCodePointerSize();
1421 MCSymbol *TargetSymbol = BC->registerNameAtAddress(
1422 Name.str() + "@GOT", TargetAddress, PtrSize, PtrSize);
1423 BF->setPLTSymbol(TargetSymbol);
1426 BinaryFunction *BF = BC->getBinaryFunctionAtAddress(EntryAddress);
1427 if (BF && BC->isAArch64()) {
1428 // Handle IFUNC trampoline with symbol
1429 setPLTSymbol(BF, BF->getOneName());
1430 return;
1433 const Relocation *Rel = BC->getDynamicRelocationAt(TargetAddress);
1434 if (!Rel)
1435 return;
1437 MCSymbol *Symbol = Rel->Symbol;
1438 if (!Symbol) {
1439 if (!BC->isAArch64() || !Rel->Addend || !Rel->isIRelative())
1440 return;
1442 // IFUNC trampoline without symbol
1443 BinaryFunction *TargetBF = BC->getBinaryFunctionAtAddress(Rel->Addend);
1444 if (!TargetBF) {
1445 errs()
1446 << "BOLT-WARNING: Expected BF to be presented as IFUNC resolver at "
1447 << Twine::utohexstr(Rel->Addend) << ", skipping\n";
1448 return;
1451 Symbol = TargetBF->getSymbol();
1454 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(EntryAddress);
1455 assert(Section && "cannot get section for address");
1456 if (!BF)
1457 BF = BC->createBinaryFunction(Symbol->getName().str() + "@PLT", *Section,
1458 EntryAddress, 0, EntrySize,
1459 Section->getAlignment());
1460 else
1461 BF->addAlternativeName(Symbol->getName().str() + "@PLT");
1462 setPLTSymbol(BF, Symbol->getName());
1465 void RewriteInstance::disassemblePLTSectionAArch64(BinarySection &Section) {
1466 const uint64_t SectionAddress = Section.getAddress();
1467 const uint64_t SectionSize = Section.getSize();
1468 StringRef PLTContents = Section.getContents();
1469 ArrayRef<uint8_t> PLTData(
1470 reinterpret_cast<const uint8_t *>(PLTContents.data()), SectionSize);
1472 auto disassembleInstruction = [&](uint64_t InstrOffset, MCInst &Instruction,
1473 uint64_t &InstrSize) {
1474 const uint64_t InstrAddr = SectionAddress + InstrOffset;
1475 if (!BC->DisAsm->getInstruction(Instruction, InstrSize,
1476 PLTData.slice(InstrOffset), InstrAddr,
1477 nulls())) {
1478 errs() << "BOLT-ERROR: unable to disassemble instruction in PLT section "
1479 << Section.getName() << " at offset 0x"
1480 << Twine::utohexstr(InstrOffset) << '\n';
1481 exit(1);
1485 uint64_t InstrOffset = 0;
1486 // Locate new plt entry
1487 while (InstrOffset < SectionSize) {
1488 InstructionListType Instructions;
1489 MCInst Instruction;
1490 uint64_t EntryOffset = InstrOffset;
1491 uint64_t EntrySize = 0;
1492 uint64_t InstrSize;
1493 // Loop through entry instructions
1494 while (InstrOffset < SectionSize) {
1495 disassembleInstruction(InstrOffset, Instruction, InstrSize);
1496 EntrySize += InstrSize;
1497 if (!BC->MIB->isIndirectBranch(Instruction)) {
1498 Instructions.emplace_back(Instruction);
1499 InstrOffset += InstrSize;
1500 continue;
1503 const uint64_t EntryAddress = SectionAddress + EntryOffset;
1504 const uint64_t TargetAddress = BC->MIB->analyzePLTEntry(
1505 Instruction, Instructions.begin(), Instructions.end(), EntryAddress);
1507 createPLTBinaryFunction(TargetAddress, EntryAddress, EntrySize);
1508 break;
1511 // Branch instruction
1512 InstrOffset += InstrSize;
1514 // Skip nops if any
1515 while (InstrOffset < SectionSize) {
1516 disassembleInstruction(InstrOffset, Instruction, InstrSize);
1517 if (!BC->MIB->isNoop(Instruction))
1518 break;
1520 InstrOffset += InstrSize;
1525 void RewriteInstance::disassemblePLTSectionRISCV(BinarySection &Section) {
1526 const uint64_t SectionAddress = Section.getAddress();
1527 const uint64_t SectionSize = Section.getSize();
1528 StringRef PLTContents = Section.getContents();
1529 ArrayRef<uint8_t> PLTData(
1530 reinterpret_cast<const uint8_t *>(PLTContents.data()), SectionSize);
1532 auto disassembleInstruction = [&](uint64_t InstrOffset, MCInst &Instruction,
1533 uint64_t &InstrSize) {
1534 const uint64_t InstrAddr = SectionAddress + InstrOffset;
1535 if (!BC->DisAsm->getInstruction(Instruction, InstrSize,
1536 PLTData.slice(InstrOffset), InstrAddr,
1537 nulls())) {
1538 errs() << "BOLT-ERROR: unable to disassemble instruction in PLT section "
1539 << Section.getName() << " at offset 0x"
1540 << Twine::utohexstr(InstrOffset) << '\n';
1541 exit(1);
1545 // Skip the first special entry since no relocation points to it.
1546 uint64_t InstrOffset = 32;
1548 while (InstrOffset < SectionSize) {
1549 InstructionListType Instructions;
1550 MCInst Instruction;
1551 const uint64_t EntryOffset = InstrOffset;
1552 const uint64_t EntrySize = 16;
1553 uint64_t InstrSize;
1555 while (InstrOffset < EntryOffset + EntrySize) {
1556 disassembleInstruction(InstrOffset, Instruction, InstrSize);
1557 Instructions.emplace_back(Instruction);
1558 InstrOffset += InstrSize;
1561 const uint64_t EntryAddress = SectionAddress + EntryOffset;
1562 const uint64_t TargetAddress = BC->MIB->analyzePLTEntry(
1563 Instruction, Instructions.begin(), Instructions.end(), EntryAddress);
1565 createPLTBinaryFunction(TargetAddress, EntryAddress, EntrySize);
1569 void RewriteInstance::disassemblePLTSectionX86(BinarySection &Section,
1570 uint64_t EntrySize) {
1571 const uint64_t SectionAddress = Section.getAddress();
1572 const uint64_t SectionSize = Section.getSize();
1573 StringRef PLTContents = Section.getContents();
1574 ArrayRef<uint8_t> PLTData(
1575 reinterpret_cast<const uint8_t *>(PLTContents.data()), SectionSize);
1577 auto disassembleInstruction = [&](uint64_t InstrOffset, MCInst &Instruction,
1578 uint64_t &InstrSize) {
1579 const uint64_t InstrAddr = SectionAddress + InstrOffset;
1580 if (!BC->DisAsm->getInstruction(Instruction, InstrSize,
1581 PLTData.slice(InstrOffset), InstrAddr,
1582 nulls())) {
1583 errs() << "BOLT-ERROR: unable to disassemble instruction in PLT section "
1584 << Section.getName() << " at offset 0x"
1585 << Twine::utohexstr(InstrOffset) << '\n';
1586 exit(1);
1590 for (uint64_t EntryOffset = 0; EntryOffset + EntrySize <= SectionSize;
1591 EntryOffset += EntrySize) {
1592 MCInst Instruction;
1593 uint64_t InstrSize, InstrOffset = EntryOffset;
1594 while (InstrOffset < EntryOffset + EntrySize) {
1595 disassembleInstruction(InstrOffset, Instruction, InstrSize);
1596 // Check if the entry size needs adjustment.
1597 if (EntryOffset == 0 && BC->MIB->isTerminateBranch(Instruction) &&
1598 EntrySize == 8)
1599 EntrySize = 16;
1601 if (BC->MIB->isIndirectBranch(Instruction))
1602 break;
1604 InstrOffset += InstrSize;
1607 if (InstrOffset + InstrSize > EntryOffset + EntrySize)
1608 continue;
1610 uint64_t TargetAddress;
1611 if (!BC->MIB->evaluateMemOperandTarget(Instruction, TargetAddress,
1612 SectionAddress + InstrOffset,
1613 InstrSize)) {
1614 errs() << "BOLT-ERROR: error evaluating PLT instruction at offset 0x"
1615 << Twine::utohexstr(SectionAddress + InstrOffset) << '\n';
1616 exit(1);
1619 createPLTBinaryFunction(TargetAddress, SectionAddress + EntryOffset,
1620 EntrySize);
1624 void RewriteInstance::disassemblePLT() {
1625 auto analyzeOnePLTSection = [&](BinarySection &Section, uint64_t EntrySize) {
1626 if (BC->isAArch64())
1627 return disassemblePLTSectionAArch64(Section);
1628 if (BC->isRISCV())
1629 return disassemblePLTSectionRISCV(Section);
1630 return disassemblePLTSectionX86(Section, EntrySize);
1633 for (BinarySection &Section : BC->allocatableSections()) {
1634 const PLTSectionInfo *PLTSI = getPLTSectionInfo(Section.getName());
1635 if (!PLTSI)
1636 continue;
1638 analyzeOnePLTSection(Section, PLTSI->EntrySize);
1640 BinaryFunction *PltBF;
1641 auto BFIter = BC->getBinaryFunctions().find(Section.getAddress());
1642 if (BFIter != BC->getBinaryFunctions().end()) {
1643 PltBF = &BFIter->second;
1644 } else {
1645 // If we did not register any function at the start of the section,
1646 // then it must be a general PLT entry. Add a function at the location.
1647 PltBF = BC->createBinaryFunction(
1648 "__BOLT_PSEUDO_" + Section.getName().str(), Section,
1649 Section.getAddress(), 0, PLTSI->EntrySize, Section.getAlignment());
1651 PltBF->setPseudo(true);
1655 void RewriteInstance::adjustFunctionBoundaries() {
1656 for (auto BFI = BC->getBinaryFunctions().begin(),
1657 BFE = BC->getBinaryFunctions().end();
1658 BFI != BFE; ++BFI) {
1659 BinaryFunction &Function = BFI->second;
1660 const BinaryFunction *NextFunction = nullptr;
1661 if (std::next(BFI) != BFE)
1662 NextFunction = &std::next(BFI)->second;
1664 // Check if there's a symbol or a function with a larger address in the
1665 // same section. If there is - it determines the maximum size for the
1666 // current function. Otherwise, it is the size of a containing section
1667 // the defines it.
1669 // NOTE: ignore some symbols that could be tolerated inside the body
1670 // of a function.
1671 auto NextSymRefI = FileSymRefs.upper_bound(Function.getAddress());
1672 while (NextSymRefI != FileSymRefs.end()) {
1673 SymbolRef &Symbol = NextSymRefI->second;
1674 const uint64_t SymbolAddress = NextSymRefI->first;
1675 const uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize();
1677 if (NextFunction && SymbolAddress >= NextFunction->getAddress())
1678 break;
1680 if (!Function.isSymbolValidInScope(Symbol, SymbolSize))
1681 break;
1683 // Ignore unnamed symbols. Used, for example, by debugging info on RISC-V.
1684 if (BC->isRISCV() && cantFail(Symbol.getName()).empty()) {
1685 ++NextSymRefI;
1686 continue;
1689 // Skip basic block labels. This happens on RISC-V with linker relaxation
1690 // enabled because every branch needs a relocation and corresponding
1691 // symbol. We don't want to add such symbols as entry points.
1692 const auto PrivateLabelPrefix = BC->AsmInfo->getPrivateLabelPrefix();
1693 if (!PrivateLabelPrefix.empty() &&
1694 cantFail(Symbol.getName()).starts_with(PrivateLabelPrefix)) {
1695 ++NextSymRefI;
1696 continue;
1699 // This is potentially another entry point into the function.
1700 uint64_t EntryOffset = NextSymRefI->first - Function.getAddress();
1701 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding entry point to function "
1702 << Function << " at offset 0x"
1703 << Twine::utohexstr(EntryOffset) << '\n');
1704 Function.addEntryPointAtOffset(EntryOffset);
1706 ++NextSymRefI;
1709 // Function runs at most till the end of the containing section.
1710 uint64_t NextObjectAddress = Function.getOriginSection()->getEndAddress();
1711 // Or till the next object marked by a symbol.
1712 if (NextSymRefI != FileSymRefs.end())
1713 NextObjectAddress = std::min(NextSymRefI->first, NextObjectAddress);
1715 // Or till the next function not marked by a symbol.
1716 if (NextFunction)
1717 NextObjectAddress =
1718 std::min(NextFunction->getAddress(), NextObjectAddress);
1720 const uint64_t MaxSize = NextObjectAddress - Function.getAddress();
1721 if (MaxSize < Function.getSize()) {
1722 errs() << "BOLT-ERROR: symbol seen in the middle of the function "
1723 << Function << ". Skipping.\n";
1724 Function.setSimple(false);
1725 Function.setMaxSize(Function.getSize());
1726 continue;
1728 Function.setMaxSize(MaxSize);
1729 if (!Function.getSize() && Function.isSimple()) {
1730 // Some assembly functions have their size set to 0, use the max
1731 // size as their real size.
1732 if (opts::Verbosity >= 1)
1733 outs() << "BOLT-INFO: setting size of function " << Function << " to "
1734 << Function.getMaxSize() << " (was 0)\n";
1735 Function.setSize(Function.getMaxSize());
1740 void RewriteInstance::relocateEHFrameSection() {
1741 assert(EHFrameSection && "Non-empty .eh_frame section expected.");
1743 BinarySection *RelocatedEHFrameSection =
1744 getSection(".relocated" + getEHFrameSectionName());
1745 assert(RelocatedEHFrameSection &&
1746 "Relocated eh_frame section should be preregistered.");
1747 DWARFDataExtractor DE(EHFrameSection->getContents(),
1748 BC->AsmInfo->isLittleEndian(),
1749 BC->AsmInfo->getCodePointerSize());
1750 auto createReloc = [&](uint64_t Value, uint64_t Offset, uint64_t DwarfType) {
1751 if (DwarfType == dwarf::DW_EH_PE_omit)
1752 return;
1754 // Only fix references that are relative to other locations.
1755 if (!(DwarfType & dwarf::DW_EH_PE_pcrel) &&
1756 !(DwarfType & dwarf::DW_EH_PE_textrel) &&
1757 !(DwarfType & dwarf::DW_EH_PE_funcrel) &&
1758 !(DwarfType & dwarf::DW_EH_PE_datarel))
1759 return;
1761 if (!(DwarfType & dwarf::DW_EH_PE_sdata4))
1762 return;
1764 uint64_t RelType;
1765 switch (DwarfType & 0x0f) {
1766 default:
1767 llvm_unreachable("unsupported DWARF encoding type");
1768 case dwarf::DW_EH_PE_sdata4:
1769 case dwarf::DW_EH_PE_udata4:
1770 RelType = Relocation::getPC32();
1771 Offset -= 4;
1772 break;
1773 case dwarf::DW_EH_PE_sdata8:
1774 case dwarf::DW_EH_PE_udata8:
1775 RelType = Relocation::getPC64();
1776 Offset -= 8;
1777 break;
1780 // Create a relocation against an absolute value since the goal is to
1781 // preserve the contents of the section independent of the new values
1782 // of referenced symbols.
1783 RelocatedEHFrameSection->addRelocation(Offset, nullptr, RelType, Value);
1786 Error E = EHFrameParser::parse(DE, EHFrameSection->getAddress(), createReloc);
1787 check_error(std::move(E), "failed to patch EH frame");
1790 Error RewriteInstance::readSpecialSections() {
1791 NamedRegionTimer T("readSpecialSections", "read special sections",
1792 TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
1794 bool HasTextRelocations = false;
1795 bool HasSymbolTable = false;
1796 bool HasDebugInfo = false;
1798 // Process special sections.
1799 for (const SectionRef &Section : InputFile->sections()) {
1800 Expected<StringRef> SectionNameOrErr = Section.getName();
1801 check_error(SectionNameOrErr.takeError(), "cannot get section name");
1802 StringRef SectionName = *SectionNameOrErr;
1804 if (Error E = Section.getContents().takeError())
1805 return E;
1806 BC->registerSection(Section);
1807 LLVM_DEBUG(
1808 dbgs() << "BOLT-DEBUG: registering section " << SectionName << " @ 0x"
1809 << Twine::utohexstr(Section.getAddress()) << ":0x"
1810 << Twine::utohexstr(Section.getAddress() + Section.getSize())
1811 << "\n");
1812 if (isDebugSection(SectionName))
1813 HasDebugInfo = true;
1814 if (isKSymtabSection(SectionName))
1815 opts::LinuxKernelMode = true;
1818 // Set IsRelro section attribute based on PT_GNU_RELRO segment.
1819 markGnuRelroSections();
1821 if (HasDebugInfo && !opts::UpdateDebugSections && !opts::AggregateOnly) {
1822 errs() << "BOLT-WARNING: debug info will be stripped from the binary. "
1823 "Use -update-debug-sections to keep it.\n";
1826 HasTextRelocations = (bool)BC->getUniqueSectionByName(".rela.text");
1827 HasSymbolTable = (bool)BC->getUniqueSectionByName(".symtab");
1828 EHFrameSection = BC->getUniqueSectionByName(".eh_frame");
1829 BuildIDSection = BC->getUniqueSectionByName(".note.gnu.build-id");
1831 if (ErrorOr<BinarySection &> BATSec =
1832 BC->getUniqueSectionByName(BoltAddressTranslation::SECTION_NAME)) {
1833 // Do not read BAT when plotting a heatmap
1834 if (!opts::HeatmapMode) {
1835 if (std::error_code EC = BAT->parse(BATSec->getContents())) {
1836 errs() << "BOLT-ERROR: failed to parse BOLT address translation "
1837 "table.\n";
1838 exit(1);
1843 if (opts::PrintSections) {
1844 outs() << "BOLT-INFO: Sections from original binary:\n";
1845 BC->printSections(outs());
1848 if (opts::RelocationMode == cl::BOU_TRUE && !HasTextRelocations) {
1849 errs() << "BOLT-ERROR: relocations against code are missing from the input "
1850 "file. Cannot proceed in relocations mode (-relocs).\n";
1851 exit(1);
1854 BC->HasRelocations =
1855 HasTextRelocations && (opts::RelocationMode != cl::BOU_FALSE);
1857 BC->IsStripped = !HasSymbolTable;
1859 if (BC->IsStripped && !opts::AllowStripped) {
1860 errs() << "BOLT-ERROR: stripped binaries are not supported. If you know "
1861 "what you're doing, use --allow-stripped to proceed";
1862 exit(1);
1865 // Force non-relocation mode for heatmap generation
1866 if (opts::HeatmapMode)
1867 BC->HasRelocations = false;
1869 if (BC->HasRelocations)
1870 outs() << "BOLT-INFO: enabling " << (opts::StrictMode ? "strict " : "")
1871 << "relocation mode\n";
1873 // Read EH frame for function boundaries info.
1874 Expected<const DWARFDebugFrame *> EHFrameOrError = BC->DwCtx->getEHFrame();
1875 if (!EHFrameOrError)
1876 report_error("expected valid eh_frame section", EHFrameOrError.takeError());
1877 CFIRdWrt.reset(new CFIReaderWriter(*EHFrameOrError.get()));
1879 // Parse build-id
1880 parseBuildID();
1881 if (std::optional<std::string> FileBuildID = getPrintableBuildID())
1882 BC->setFileBuildID(*FileBuildID);
1884 // Read .dynamic/PT_DYNAMIC.
1885 return readELFDynamic();
1888 void RewriteInstance::adjustCommandLineOptions() {
1889 if (BC->isAArch64() && !BC->HasRelocations)
1890 errs() << "BOLT-WARNING: non-relocation mode for AArch64 is not fully "
1891 "supported\n";
1893 if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary())
1894 RtLibrary->adjustCommandLineOptions(*BC);
1896 if (opts::AlignMacroOpFusion != MFT_NONE && !BC->isX86()) {
1897 outs() << "BOLT-INFO: disabling -align-macro-fusion on non-x86 platform\n";
1898 opts::AlignMacroOpFusion = MFT_NONE;
1901 if (BC->isX86() && BC->MAB->allowAutoPadding()) {
1902 if (!BC->HasRelocations) {
1903 errs() << "BOLT-ERROR: cannot apply mitigations for Intel JCC erratum in "
1904 "non-relocation mode\n";
1905 exit(1);
1907 outs() << "BOLT-WARNING: using mitigation for Intel JCC erratum, layout "
1908 "may take several minutes\n";
1909 opts::AlignMacroOpFusion = MFT_NONE;
1912 if (opts::AlignMacroOpFusion != MFT_NONE && !BC->HasRelocations) {
1913 outs() << "BOLT-INFO: disabling -align-macro-fusion in non-relocation "
1914 "mode\n";
1915 opts::AlignMacroOpFusion = MFT_NONE;
1918 if (opts::SplitEH && !BC->HasRelocations) {
1919 errs() << "BOLT-WARNING: disabling -split-eh in non-relocation mode\n";
1920 opts::SplitEH = false;
1923 if (opts::StrictMode && !BC->HasRelocations) {
1924 errs() << "BOLT-WARNING: disabling strict mode (-strict) in non-relocation "
1925 "mode\n";
1926 opts::StrictMode = false;
1929 if (BC->HasRelocations && opts::AggregateOnly &&
1930 !opts::StrictMode.getNumOccurrences()) {
1931 outs() << "BOLT-INFO: enabling strict relocation mode for aggregation "
1932 "purposes\n";
1933 opts::StrictMode = true;
1936 if (BC->isX86() && BC->HasRelocations &&
1937 opts::AlignMacroOpFusion == MFT_HOT && !ProfileReader) {
1938 outs() << "BOLT-INFO: enabling -align-macro-fusion=all since no profile "
1939 "was specified\n";
1940 opts::AlignMacroOpFusion = MFT_ALL;
1943 if (!BC->HasRelocations &&
1944 opts::ReorderFunctions != ReorderFunctions::RT_NONE) {
1945 errs() << "BOLT-ERROR: function reordering only works when "
1946 << "relocations are enabled\n";
1947 exit(1);
1950 if (opts::Instrument ||
1951 (opts::ReorderFunctions != ReorderFunctions::RT_NONE &&
1952 !opts::HotText.getNumOccurrences())) {
1953 opts::HotText = true;
1954 } else if (opts::HotText && !BC->HasRelocations) {
1955 errs() << "BOLT-WARNING: hot text is disabled in non-relocation mode\n";
1956 opts::HotText = false;
1959 if (opts::HotText && opts::HotTextMoveSections.getNumOccurrences() == 0) {
1960 opts::HotTextMoveSections.addValue(".stub");
1961 opts::HotTextMoveSections.addValue(".mover");
1962 opts::HotTextMoveSections.addValue(".never_hugify");
1965 if (opts::UseOldText && !BC->OldTextSectionAddress) {
1966 errs() << "BOLT-WARNING: cannot use old .text as the section was not found"
1967 "\n";
1968 opts::UseOldText = false;
1970 if (opts::UseOldText && !BC->HasRelocations) {
1971 errs() << "BOLT-WARNING: cannot use old .text in non-relocation mode\n";
1972 opts::UseOldText = false;
1975 if (!opts::AlignText.getNumOccurrences())
1976 opts::AlignText = BC->PageAlign;
1978 if (opts::AlignText < opts::AlignFunctions)
1979 opts::AlignText = (unsigned)opts::AlignFunctions;
1981 if (BC->isX86() && opts::Lite.getNumOccurrences() == 0 && !opts::StrictMode &&
1982 !opts::UseOldText)
1983 opts::Lite = true;
1985 if (opts::Lite && opts::UseOldText) {
1986 errs() << "BOLT-WARNING: cannot combine -lite with -use-old-text. "
1987 "Disabling -use-old-text.\n";
1988 opts::UseOldText = false;
1991 if (opts::Lite && opts::StrictMode) {
1992 errs() << "BOLT-ERROR: -strict and -lite cannot be used at the same time\n";
1993 exit(1);
1996 if (opts::Lite)
1997 outs() << "BOLT-INFO: enabling lite mode\n";
1999 if (!opts::SaveProfile.empty() && BAT->enabledFor(InputFile)) {
2000 errs() << "BOLT-ERROR: unable to save profile in YAML format for input "
2001 "file processed by BOLT. Please remove -w option and use branch "
2002 "profile.\n";
2003 exit(1);
2007 namespace {
2008 template <typename ELFT>
2009 int64_t getRelocationAddend(const ELFObjectFile<ELFT> *Obj,
2010 const RelocationRef &RelRef) {
2011 using ELFShdrTy = typename ELFT::Shdr;
2012 using Elf_Rela = typename ELFT::Rela;
2013 int64_t Addend = 0;
2014 const ELFFile<ELFT> &EF = Obj->getELFFile();
2015 DataRefImpl Rel = RelRef.getRawDataRefImpl();
2016 const ELFShdrTy *RelocationSection = cantFail(EF.getSection(Rel.d.a));
2017 switch (RelocationSection->sh_type) {
2018 default:
2019 llvm_unreachable("unexpected relocation section type");
2020 case ELF::SHT_REL:
2021 break;
2022 case ELF::SHT_RELA: {
2023 const Elf_Rela *RelA = Obj->getRela(Rel);
2024 Addend = RelA->r_addend;
2025 break;
2029 return Addend;
2032 int64_t getRelocationAddend(const ELFObjectFileBase *Obj,
2033 const RelocationRef &Rel) {
2034 return getRelocationAddend(cast<ELF64LEObjectFile>(Obj), Rel);
2037 template <typename ELFT>
2038 uint32_t getRelocationSymbol(const ELFObjectFile<ELFT> *Obj,
2039 const RelocationRef &RelRef) {
2040 using ELFShdrTy = typename ELFT::Shdr;
2041 uint32_t Symbol = 0;
2042 const ELFFile<ELFT> &EF = Obj->getELFFile();
2043 DataRefImpl Rel = RelRef.getRawDataRefImpl();
2044 const ELFShdrTy *RelocationSection = cantFail(EF.getSection(Rel.d.a));
2045 switch (RelocationSection->sh_type) {
2046 default:
2047 llvm_unreachable("unexpected relocation section type");
2048 case ELF::SHT_REL:
2049 Symbol = Obj->getRel(Rel)->getSymbol(EF.isMips64EL());
2050 break;
2051 case ELF::SHT_RELA:
2052 Symbol = Obj->getRela(Rel)->getSymbol(EF.isMips64EL());
2053 break;
2056 return Symbol;
2059 uint32_t getRelocationSymbol(const ELFObjectFileBase *Obj,
2060 const RelocationRef &Rel) {
2061 return getRelocationSymbol(cast<ELF64LEObjectFile>(Obj), Rel);
2063 } // anonymous namespace
2065 bool RewriteInstance::analyzeRelocation(
2066 const RelocationRef &Rel, uint64_t &RType, std::string &SymbolName,
2067 bool &IsSectionRelocation, uint64_t &SymbolAddress, int64_t &Addend,
2068 uint64_t &ExtractedValue, bool &Skip) const {
2069 Skip = false;
2070 if (!Relocation::isSupported(RType))
2071 return false;
2073 const bool IsAArch64 = BC->isAArch64();
2075 const size_t RelSize = Relocation::getSizeForType(RType);
2077 ErrorOr<uint64_t> Value =
2078 BC->getUnsignedValueAtAddress(Rel.getOffset(), RelSize);
2079 assert(Value && "failed to extract relocated value");
2080 if ((Skip = Relocation::skipRelocationProcess(RType, *Value)))
2081 return true;
2083 ExtractedValue = Relocation::extractValue(RType, *Value, Rel.getOffset());
2084 Addend = getRelocationAddend(InputFile, Rel);
2086 const bool IsPCRelative = Relocation::isPCRelative(RType);
2087 const uint64_t PCRelOffset = IsPCRelative && !IsAArch64 ? Rel.getOffset() : 0;
2088 bool SkipVerification = false;
2089 auto SymbolIter = Rel.getSymbol();
2090 if (SymbolIter == InputFile->symbol_end()) {
2091 SymbolAddress = ExtractedValue - Addend + PCRelOffset;
2092 MCSymbol *RelSymbol =
2093 BC->getOrCreateGlobalSymbol(SymbolAddress, "RELSYMat");
2094 SymbolName = std::string(RelSymbol->getName());
2095 IsSectionRelocation = false;
2096 } else {
2097 const SymbolRef &Symbol = *SymbolIter;
2098 SymbolName = std::string(cantFail(Symbol.getName()));
2099 SymbolAddress = cantFail(Symbol.getAddress());
2100 SkipVerification = (cantFail(Symbol.getType()) == SymbolRef::ST_Other);
2101 // Section symbols are marked as ST_Debug.
2102 IsSectionRelocation = (cantFail(Symbol.getType()) == SymbolRef::ST_Debug);
2103 // Check for PLT entry registered with symbol name
2104 if (!SymbolAddress && (IsAArch64 || BC->isRISCV())) {
2105 const BinaryData *BD = BC->getPLTBinaryDataByName(SymbolName);
2106 SymbolAddress = BD ? BD->getAddress() : 0;
2109 // For PIE or dynamic libs, the linker may choose not to put the relocation
2110 // result at the address if it is a X86_64_64 one because it will emit a
2111 // dynamic relocation (X86_RELATIVE) for the dynamic linker and loader to
2112 // resolve it at run time. The static relocation result goes as the addend
2113 // of the dynamic relocation in this case. We can't verify these cases.
2114 // FIXME: perhaps we can try to find if it really emitted a corresponding
2115 // RELATIVE relocation at this offset with the correct value as the addend.
2116 if (!BC->HasFixedLoadAddress && RelSize == 8)
2117 SkipVerification = true;
2119 if (IsSectionRelocation && !IsAArch64) {
2120 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(SymbolAddress);
2121 assert(Section && "section expected for section relocation");
2122 SymbolName = "section " + std::string(Section->getName());
2123 // Convert section symbol relocations to regular relocations inside
2124 // non-section symbols.
2125 if (Section->containsAddress(ExtractedValue) && !IsPCRelative) {
2126 SymbolAddress = ExtractedValue;
2127 Addend = 0;
2128 } else {
2129 Addend = ExtractedValue - (SymbolAddress - PCRelOffset);
2133 // If no symbol has been found or if it is a relocation requiring the
2134 // creation of a GOT entry, do not link against the symbol but against
2135 // whatever address was extracted from the instruction itself. We are
2136 // not creating a GOT entry as this was already processed by the linker.
2137 // For GOT relocs, do not subtract addend as the addend does not refer
2138 // to this instruction's target, but it refers to the target in the GOT
2139 // entry.
2140 if (Relocation::isGOT(RType)) {
2141 Addend = 0;
2142 SymbolAddress = ExtractedValue + PCRelOffset;
2143 } else if (Relocation::isTLS(RType)) {
2144 SkipVerification = true;
2145 } else if (!SymbolAddress) {
2146 assert(!IsSectionRelocation);
2147 if (ExtractedValue || Addend == 0 || IsPCRelative) {
2148 SymbolAddress =
2149 truncateToSize(ExtractedValue - Addend + PCRelOffset, RelSize);
2150 } else {
2151 // This is weird case. The extracted value is zero but the addend is
2152 // non-zero and the relocation is not pc-rel. Using the previous logic,
2153 // the SymbolAddress would end up as a huge number. Seen in
2154 // exceptions_pic.test.
2155 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: relocation @ 0x"
2156 << Twine::utohexstr(Rel.getOffset())
2157 << " value does not match addend for "
2158 << "relocation to undefined symbol.\n");
2159 return true;
2163 auto verifyExtractedValue = [&]() {
2164 if (SkipVerification)
2165 return true;
2167 if (IsAArch64 || BC->isRISCV())
2168 return true;
2170 if (SymbolName == "__hot_start" || SymbolName == "__hot_end")
2171 return true;
2173 if (RType == ELF::R_X86_64_PLT32)
2174 return true;
2176 return truncateToSize(ExtractedValue, RelSize) ==
2177 truncateToSize(SymbolAddress + Addend - PCRelOffset, RelSize);
2180 (void)verifyExtractedValue;
2181 assert(verifyExtractedValue() && "mismatched extracted relocation value");
2183 return true;
2186 void RewriteInstance::processDynamicRelocations() {
2187 // Read .relr.dyn section containing compressed R_*_RELATIVE relocations.
2188 if (DynamicRelrSize > 0) {
2189 ErrorOr<BinarySection &> DynamicRelrSectionOrErr =
2190 BC->getSectionForAddress(*DynamicRelrAddress);
2191 if (!DynamicRelrSectionOrErr)
2192 report_error("unable to find section corresponding to DT_RELR",
2193 DynamicRelrSectionOrErr.getError());
2194 if (DynamicRelrSectionOrErr->getSize() != DynamicRelrSize)
2195 report_error("section size mismatch for DT_RELRSZ",
2196 errc::executable_format_error);
2197 readDynamicRelrRelocations(*DynamicRelrSectionOrErr);
2200 // Read relocations for PLT - DT_JMPREL.
2201 if (PLTRelocationsSize > 0) {
2202 ErrorOr<BinarySection &> PLTRelSectionOrErr =
2203 BC->getSectionForAddress(*PLTRelocationsAddress);
2204 if (!PLTRelSectionOrErr)
2205 report_error("unable to find section corresponding to DT_JMPREL",
2206 PLTRelSectionOrErr.getError());
2207 if (PLTRelSectionOrErr->getSize() != PLTRelocationsSize)
2208 report_error("section size mismatch for DT_PLTRELSZ",
2209 errc::executable_format_error);
2210 readDynamicRelocations(PLTRelSectionOrErr->getSectionRef(),
2211 /*IsJmpRel*/ true);
2214 // The rest of dynamic relocations - DT_RELA.
2215 // The static executable might have .rela.dyn secion and not have PT_DYNAMIC
2216 if (!DynamicRelocationsSize && BC->IsStaticExecutable) {
2217 ErrorOr<BinarySection &> DynamicRelSectionOrErr =
2218 BC->getUniqueSectionByName(getRelaDynSectionName());
2219 if (DynamicRelSectionOrErr) {
2220 DynamicRelocationsAddress = DynamicRelSectionOrErr->getAddress();
2221 DynamicRelocationsSize = DynamicRelSectionOrErr->getSize();
2222 const SectionRef &SectionRef = DynamicRelSectionOrErr->getSectionRef();
2223 DynamicRelativeRelocationsCount = std::distance(
2224 SectionRef.relocation_begin(), SectionRef.relocation_end());
2228 if (DynamicRelocationsSize > 0) {
2229 ErrorOr<BinarySection &> DynamicRelSectionOrErr =
2230 BC->getSectionForAddress(*DynamicRelocationsAddress);
2231 if (!DynamicRelSectionOrErr)
2232 report_error("unable to find section corresponding to DT_RELA",
2233 DynamicRelSectionOrErr.getError());
2234 auto DynamicRelSectionSize = DynamicRelSectionOrErr->getSize();
2235 // On RISC-V DT_RELASZ seems to include both .rela.dyn and .rela.plt
2236 if (DynamicRelocationsSize == DynamicRelSectionSize + PLTRelocationsSize)
2237 DynamicRelocationsSize = DynamicRelSectionSize;
2238 if (DynamicRelSectionSize != DynamicRelocationsSize)
2239 report_error("section size mismatch for DT_RELASZ",
2240 errc::executable_format_error);
2241 readDynamicRelocations(DynamicRelSectionOrErr->getSectionRef(),
2242 /*IsJmpRel*/ false);
2246 void RewriteInstance::processRelocations() {
2247 if (!BC->HasRelocations)
2248 return;
2250 for (const SectionRef &Section : InputFile->sections()) {
2251 if (cantFail(Section.getRelocatedSection()) != InputFile->section_end() &&
2252 !BinarySection(*BC, Section).isAllocatable())
2253 readRelocations(Section);
2256 if (NumFailedRelocations)
2257 errs() << "BOLT-WARNING: Failed to analyze " << NumFailedRelocations
2258 << " relocations\n";
2261 void RewriteInstance::readDynamicRelocations(const SectionRef &Section,
2262 bool IsJmpRel) {
2263 assert(BinarySection(*BC, Section).isAllocatable() && "allocatable expected");
2265 LLVM_DEBUG({
2266 StringRef SectionName = cantFail(Section.getName());
2267 dbgs() << "BOLT-DEBUG: reading relocations for section " << SectionName
2268 << ":\n";
2271 for (const RelocationRef &Rel : Section.relocations()) {
2272 const uint64_t RType = Rel.getType();
2273 if (Relocation::isNone(RType))
2274 continue;
2276 StringRef SymbolName = "<none>";
2277 MCSymbol *Symbol = nullptr;
2278 uint64_t SymbolAddress = 0;
2279 const uint64_t Addend = getRelocationAddend(InputFile, Rel);
2281 symbol_iterator SymbolIter = Rel.getSymbol();
2282 if (SymbolIter != InputFile->symbol_end()) {
2283 SymbolName = cantFail(SymbolIter->getName());
2284 BinaryData *BD = BC->getBinaryDataByName(SymbolName);
2285 Symbol = BD ? BD->getSymbol()
2286 : BC->getOrCreateUndefinedGlobalSymbol(SymbolName);
2287 SymbolAddress = cantFail(SymbolIter->getAddress());
2288 (void)SymbolAddress;
2291 LLVM_DEBUG(
2292 SmallString<16> TypeName;
2293 Rel.getTypeName(TypeName);
2294 dbgs() << "BOLT-DEBUG: dynamic relocation at 0x"
2295 << Twine::utohexstr(Rel.getOffset()) << " : " << TypeName
2296 << " : " << SymbolName << " : " << Twine::utohexstr(SymbolAddress)
2297 << " : + 0x" << Twine::utohexstr(Addend) << '\n'
2300 if (IsJmpRel)
2301 IsJmpRelocation[RType] = true;
2303 if (Symbol)
2304 SymbolIndex[Symbol] = getRelocationSymbol(InputFile, Rel);
2306 BC->addDynamicRelocation(Rel.getOffset(), Symbol, RType, Addend);
2310 void RewriteInstance::readDynamicRelrRelocations(BinarySection &Section) {
2311 assert(Section.isAllocatable() && "allocatable expected");
2313 LLVM_DEBUG({
2314 StringRef SectionName = Section.getName();
2315 dbgs() << "BOLT-DEBUG: reading relocations in section " << SectionName
2316 << ":\n";
2319 const uint64_t RType = Relocation::getRelative();
2320 const uint8_t PSize = BC->AsmInfo->getCodePointerSize();
2321 const uint64_t MaxDelta = ((CHAR_BIT * DynamicRelrEntrySize) - 1) * PSize;
2323 auto ExtractAddendValue = [&](uint64_t Address) -> uint64_t {
2324 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(Address);
2325 assert(Section && "cannot get section for data address from RELR");
2326 DataExtractor DE = DataExtractor(Section->getContents(),
2327 BC->AsmInfo->isLittleEndian(), PSize);
2328 uint64_t Offset = Address - Section->getAddress();
2329 return DE.getUnsigned(&Offset, PSize);
2332 auto AddRelocation = [&](uint64_t Address) {
2333 uint64_t Addend = ExtractAddendValue(Address);
2334 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: R_*_RELATIVE relocation at 0x"
2335 << Twine::utohexstr(Address) << " to 0x"
2336 << Twine::utohexstr(Addend) << '\n';);
2337 BC->addDynamicRelocation(Address, nullptr, RType, Addend);
2340 DataExtractor DE = DataExtractor(Section.getContents(),
2341 BC->AsmInfo->isLittleEndian(), PSize);
2342 uint64_t Offset = 0, Address = 0;
2343 uint64_t RelrCount = DynamicRelrSize / DynamicRelrEntrySize;
2344 while (RelrCount--) {
2345 assert(DE.isValidOffset(Offset));
2346 uint64_t Entry = DE.getUnsigned(&Offset, DynamicRelrEntrySize);
2347 if ((Entry & 1) == 0) {
2348 AddRelocation(Entry);
2349 Address = Entry + PSize;
2350 } else {
2351 const uint64_t StartAddress = Address;
2352 while (Entry >>= 1) {
2353 if (Entry & 1)
2354 AddRelocation(Address);
2356 Address += PSize;
2359 Address = StartAddress + MaxDelta;
2364 void RewriteInstance::printRelocationInfo(const RelocationRef &Rel,
2365 StringRef SymbolName,
2366 uint64_t SymbolAddress,
2367 uint64_t Addend,
2368 uint64_t ExtractedValue) const {
2369 SmallString<16> TypeName;
2370 Rel.getTypeName(TypeName);
2371 const uint64_t Address = SymbolAddress + Addend;
2372 const uint64_t Offset = Rel.getOffset();
2373 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(SymbolAddress);
2374 BinaryFunction *Func =
2375 BC->getBinaryFunctionContainingAddress(Offset, false, BC->isAArch64());
2376 dbgs() << formatv("Relocation: offset = {0:x}; type = {1}; value = {2:x}; ",
2377 Offset, TypeName, ExtractedValue)
2378 << formatv("symbol = {0} ({1}); symbol address = {2:x}; ", SymbolName,
2379 Section ? Section->getName() : "", SymbolAddress)
2380 << formatv("addend = {0:x}; address = {1:x}; in = ", Addend, Address);
2381 if (Func)
2382 dbgs() << Func->getPrintName();
2383 else
2384 dbgs() << BC->getSectionForAddress(Rel.getOffset())->getName();
2385 dbgs() << '\n';
2388 void RewriteInstance::readRelocations(const SectionRef &Section) {
2389 LLVM_DEBUG({
2390 StringRef SectionName = cantFail(Section.getName());
2391 dbgs() << "BOLT-DEBUG: reading relocations for section " << SectionName
2392 << ":\n";
2394 if (BinarySection(*BC, Section).isAllocatable()) {
2395 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring runtime relocations\n");
2396 return;
2398 section_iterator SecIter = cantFail(Section.getRelocatedSection());
2399 assert(SecIter != InputFile->section_end() && "relocated section expected");
2400 SectionRef RelocatedSection = *SecIter;
2402 StringRef RelocatedSectionName = cantFail(RelocatedSection.getName());
2403 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: relocated section is "
2404 << RelocatedSectionName << '\n');
2406 if (!BinarySection(*BC, RelocatedSection).isAllocatable()) {
2407 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocations against "
2408 << "non-allocatable section\n");
2409 return;
2411 const bool SkipRelocs = StringSwitch<bool>(RelocatedSectionName)
2412 .Cases(".plt", ".rela.plt", ".got.plt",
2413 ".eh_frame", ".gcc_except_table", true)
2414 .Default(false);
2415 if (SkipRelocs) {
2416 LLVM_DEBUG(
2417 dbgs() << "BOLT-DEBUG: ignoring relocations against known section\n");
2418 return;
2421 for (const RelocationRef &Rel : Section.relocations())
2422 handleRelocation(RelocatedSection, Rel);
2425 void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection,
2426 const RelocationRef &Rel) {
2427 const bool IsAArch64 = BC->isAArch64();
2428 const bool IsFromCode = RelocatedSection.isText();
2430 SmallString<16> TypeName;
2431 Rel.getTypeName(TypeName);
2432 uint64_t RType = Rel.getType();
2433 if (Relocation::skipRelocationType(RType))
2434 return;
2436 // Adjust the relocation type as the linker might have skewed it.
2437 if (BC->isX86() && (RType & ELF::R_X86_64_converted_reloc_bit)) {
2438 if (opts::Verbosity >= 1)
2439 dbgs() << "BOLT-WARNING: ignoring R_X86_64_converted_reloc_bit\n";
2440 RType &= ~ELF::R_X86_64_converted_reloc_bit;
2443 if (Relocation::isTLS(RType)) {
2444 // No special handling required for TLS relocations on X86.
2445 if (BC->isX86())
2446 return;
2448 // The non-got related TLS relocations on AArch64 and RISC-V also could be
2449 // skipped.
2450 if (!Relocation::isGOT(RType))
2451 return;
2454 if (!IsAArch64 && BC->getDynamicRelocationAt(Rel.getOffset())) {
2455 LLVM_DEBUG({
2456 dbgs() << formatv("BOLT-DEBUG: address {0:x} has a ", Rel.getOffset())
2457 << "dynamic relocation against it. Ignoring static relocation.\n";
2459 return;
2462 std::string SymbolName;
2463 uint64_t SymbolAddress;
2464 int64_t Addend;
2465 uint64_t ExtractedValue;
2466 bool IsSectionRelocation;
2467 bool Skip;
2468 if (!analyzeRelocation(Rel, RType, SymbolName, IsSectionRelocation,
2469 SymbolAddress, Addend, ExtractedValue, Skip)) {
2470 LLVM_DEBUG({
2471 dbgs() << "BOLT-WARNING: failed to analyze relocation @ offset = "
2472 << formatv("{0:x}; type name = {1}\n", Rel.getOffset(), TypeName);
2474 ++NumFailedRelocations;
2475 return;
2478 if (Skip) {
2479 LLVM_DEBUG({
2480 dbgs() << "BOLT-DEBUG: skipping relocation @ offset = "
2481 << formatv("{0:x}; type name = {1}\n", Rel.getOffset(), TypeName);
2483 return;
2486 const uint64_t Address = SymbolAddress + Addend;
2488 LLVM_DEBUG({
2489 dbgs() << "BOLT-DEBUG: ";
2490 printRelocationInfo(Rel, SymbolName, SymbolAddress, Addend, ExtractedValue);
2493 BinaryFunction *ContainingBF = nullptr;
2494 if (IsFromCode) {
2495 ContainingBF =
2496 BC->getBinaryFunctionContainingAddress(Rel.getOffset(),
2497 /*CheckPastEnd*/ false,
2498 /*UseMaxSize*/ true);
2499 assert(ContainingBF && "cannot find function for address in code");
2500 if (!IsAArch64 && !ContainingBF->containsAddress(Rel.getOffset())) {
2501 if (opts::Verbosity >= 1)
2502 outs() << formatv("BOLT-INFO: {0} has relocations in padding area\n",
2503 *ContainingBF);
2504 ContainingBF->setSize(ContainingBF->getMaxSize());
2505 ContainingBF->setSimple(false);
2506 return;
2510 MCSymbol *ReferencedSymbol = nullptr;
2511 if (!IsSectionRelocation) {
2512 if (BinaryData *BD = BC->getBinaryDataByName(SymbolName))
2513 ReferencedSymbol = BD->getSymbol();
2514 else if (BC->isGOTSymbol(SymbolName))
2515 if (BinaryData *BD = BC->getGOTSymbol())
2516 ReferencedSymbol = BD->getSymbol();
2519 ErrorOr<BinarySection &> ReferencedSection{std::errc::bad_address};
2520 symbol_iterator SymbolIter = Rel.getSymbol();
2521 if (SymbolIter != InputFile->symbol_end()) {
2522 SymbolRef Symbol = *SymbolIter;
2523 section_iterator Section =
2524 cantFail(Symbol.getSection(), "cannot get symbol section");
2525 if (Section != InputFile->section_end()) {
2526 Expected<StringRef> SectionName = Section->getName();
2527 if (SectionName && !SectionName->empty())
2528 ReferencedSection = BC->getUniqueSectionByName(*SectionName);
2529 } else if (ReferencedSymbol &&
2530 (cantFail(Symbol.getFlags()) & SymbolRef::SF_Absolute)) {
2531 // This might be a relocation for an ABS symbols like __global_pointer$ on
2532 // RISC-V
2533 ContainingBF->addRelocation(Rel.getOffset(), ReferencedSymbol,
2534 Rel.getType(), 0,
2535 cantFail(Symbol.getValue()));
2536 return;
2540 if (!ReferencedSection)
2541 ReferencedSection = BC->getSectionForAddress(SymbolAddress);
2543 const bool IsToCode = ReferencedSection && ReferencedSection->isText();
2545 // Special handling of PC-relative relocations.
2546 if (!IsAArch64 && !BC->isRISCV() && Relocation::isPCRelative(RType)) {
2547 if (!IsFromCode && IsToCode) {
2548 // PC-relative relocations from data to code are tricky since the
2549 // original information is typically lost after linking, even with
2550 // '--emit-relocs'. Such relocations are normally used by PIC-style
2551 // jump tables and they reference both the jump table and jump
2552 // targets by computing the difference between the two. If we blindly
2553 // apply the relocation, it will appear that it references an arbitrary
2554 // location in the code, possibly in a different function from the one
2555 // containing the jump table.
2557 // For that reason, we only register the fact that there is a
2558 // PC-relative relocation at a given address against the code.
2559 // The actual referenced label/address will be determined during jump
2560 // table analysis.
2561 BC->addPCRelativeDataRelocation(Rel.getOffset());
2562 } else if (ContainingBF && !IsSectionRelocation && ReferencedSymbol) {
2563 // If we know the referenced symbol, register the relocation from
2564 // the code. It's required to properly handle cases where
2565 // "symbol + addend" references an object different from "symbol".
2566 ContainingBF->addRelocation(Rel.getOffset(), ReferencedSymbol, RType,
2567 Addend, ExtractedValue);
2568 } else {
2569 LLVM_DEBUG({
2570 dbgs() << "BOLT-DEBUG: not creating PC-relative relocation at"
2571 << formatv("{0:x} for {1}\n", Rel.getOffset(), SymbolName);
2575 return;
2578 bool ForceRelocation = BC->forceSymbolRelocations(SymbolName);
2579 if ((BC->isAArch64() || BC->isRISCV()) && Relocation::isGOT(RType))
2580 ForceRelocation = true;
2582 if (!ReferencedSection && !ForceRelocation) {
2583 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: cannot determine referenced section.\n");
2584 return;
2587 // Occasionally we may see a reference past the last byte of the function
2588 // typically as a result of __builtin_unreachable(). Check it here.
2589 BinaryFunction *ReferencedBF = BC->getBinaryFunctionContainingAddress(
2590 Address, /*CheckPastEnd*/ true, /*UseMaxSize*/ IsAArch64);
2592 if (!IsSectionRelocation) {
2593 if (BinaryFunction *BF =
2594 BC->getBinaryFunctionContainingAddress(SymbolAddress)) {
2595 if (BF != ReferencedBF) {
2596 // It's possible we are referencing a function without referencing any
2597 // code, e.g. when taking a bitmask action on a function address.
2598 errs() << "BOLT-WARNING: non-standard function reference (e.g. bitmask)"
2599 << formatv(" detected against function {0} from ", *BF);
2600 if (IsFromCode)
2601 errs() << formatv("function {0}\n", *ContainingBF);
2602 else
2603 errs() << formatv("data section at {0:x}\n", Rel.getOffset());
2604 LLVM_DEBUG(printRelocationInfo(Rel, SymbolName, SymbolAddress, Addend,
2605 ExtractedValue));
2606 ReferencedBF = BF;
2609 } else if (ReferencedBF) {
2610 assert(ReferencedSection && "section expected for section relocation");
2611 if (*ReferencedBF->getOriginSection() != *ReferencedSection) {
2612 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring false function reference\n");
2613 ReferencedBF = nullptr;
2617 // Workaround for a member function pointer de-virtualization bug. We check
2618 // if a non-pc-relative relocation in the code is pointing to (fptr - 1).
2619 if (IsToCode && ContainingBF && !Relocation::isPCRelative(RType) &&
2620 (!ReferencedBF || (ReferencedBF->getAddress() != Address))) {
2621 if (const BinaryFunction *RogueBF =
2622 BC->getBinaryFunctionAtAddress(Address + 1)) {
2623 // Do an extra check that the function was referenced previously.
2624 // It's a linear search, but it should rarely happen.
2625 auto CheckReloc = [&](const Relocation &Rel) {
2626 return Rel.Symbol == RogueBF->getSymbol() &&
2627 !Relocation::isPCRelative(Rel.Type);
2629 bool Found = llvm::any_of(
2630 llvm::make_second_range(ContainingBF->Relocations), CheckReloc);
2632 if (Found) {
2633 errs() << "BOLT-WARNING: detected possible compiler de-virtualization "
2634 "bug: -1 addend used with non-pc-relative relocation against "
2635 << formatv("function {0} in function {1}\n", *RogueBF,
2636 *ContainingBF);
2637 return;
2642 if (ForceRelocation) {
2643 std::string Name =
2644 Relocation::isGOT(RType) ? "__BOLT_got_zero" : SymbolName;
2645 ReferencedSymbol = BC->registerNameAtAddress(Name, 0, 0, 0);
2646 SymbolAddress = 0;
2647 if (Relocation::isGOT(RType))
2648 Addend = Address;
2649 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: forcing relocation against symbol "
2650 << SymbolName << " with addend " << Addend << '\n');
2651 } else if (ReferencedBF) {
2652 ReferencedSymbol = ReferencedBF->getSymbol();
2653 uint64_t RefFunctionOffset = 0;
2655 // Adjust the point of reference to a code location inside a function.
2656 if (ReferencedBF->containsAddress(Address, /*UseMaxSize = */ true)) {
2657 RefFunctionOffset = Address - ReferencedBF->getAddress();
2658 if (Relocation::isInstructionReference(RType)) {
2659 // Instruction labels are created while disassembling so we just leave
2660 // the symbol empty for now. Since the extracted value is typically
2661 // unrelated to the referenced symbol (e.g., %pcrel_lo in RISC-V
2662 // references an instruction but the patched value references the low
2663 // bits of a data address), we set the extracted value to the symbol
2664 // address in order to be able to correctly reconstruct the reference
2665 // later.
2666 ReferencedSymbol = nullptr;
2667 ExtractedValue = Address;
2668 } else if (RefFunctionOffset) {
2669 if (ContainingBF && ContainingBF != ReferencedBF) {
2670 ReferencedSymbol =
2671 ReferencedBF->addEntryPointAtOffset(RefFunctionOffset);
2672 } else {
2673 ReferencedSymbol =
2674 ReferencedBF->getOrCreateLocalLabel(Address,
2675 /*CreatePastEnd =*/true);
2677 // If ContainingBF != nullptr, it equals ReferencedBF (see
2678 // if-condition above) so we're handling a relocation from a function
2679 // to itself. RISC-V uses such relocations for branches, for example.
2680 // These should not be registered as externally references offsets.
2681 if (!ContainingBF)
2682 ReferencedBF->registerReferencedOffset(RefFunctionOffset);
2684 if (opts::Verbosity > 1 &&
2685 BinarySection(*BC, RelocatedSection).isWritable())
2686 errs() << "BOLT-WARNING: writable reference into the middle of the "
2687 << formatv("function {0} detected at address {1:x}\n",
2688 *ReferencedBF, Rel.getOffset());
2690 SymbolAddress = Address;
2691 Addend = 0;
2693 LLVM_DEBUG({
2694 dbgs() << " referenced function " << *ReferencedBF;
2695 if (Address != ReferencedBF->getAddress())
2696 dbgs() << formatv(" at offset {0:x}", RefFunctionOffset);
2697 dbgs() << '\n';
2699 } else {
2700 if (IsToCode && SymbolAddress) {
2701 // This can happen e.g. with PIC-style jump tables.
2702 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: no corresponding function for "
2703 "relocation against code\n");
2706 // In AArch64 there are zero reasons to keep a reference to the
2707 // "original" symbol plus addend. The original symbol is probably just a
2708 // section symbol. If we are here, this means we are probably accessing
2709 // data, so it is imperative to keep the original address.
2710 if (IsAArch64) {
2711 SymbolName = formatv("SYMBOLat{0:x}", Address);
2712 SymbolAddress = Address;
2713 Addend = 0;
2716 if (BinaryData *BD = BC->getBinaryDataContainingAddress(SymbolAddress)) {
2717 // Note: this assertion is trying to check sanity of BinaryData objects
2718 // but AArch64 has inferred and incomplete object locations coming from
2719 // GOT/TLS or any other non-trivial relocation (that requires creation
2720 // of sections and whose symbol address is not really what should be
2721 // encoded in the instruction). So we essentially disabled this check
2722 // for AArch64 and live with bogus names for objects.
2723 assert((IsAArch64 || IsSectionRelocation ||
2724 BD->nameStartsWith(SymbolName) ||
2725 BD->nameStartsWith("PG" + SymbolName) ||
2726 (BD->nameStartsWith("ANONYMOUS") &&
2727 (BD->getSectionName().starts_with(".plt") ||
2728 BD->getSectionName().ends_with(".plt")))) &&
2729 "BOLT symbol names of all non-section relocations must match up "
2730 "with symbol names referenced in the relocation");
2732 if (IsSectionRelocation)
2733 BC->markAmbiguousRelocations(*BD, Address);
2735 ReferencedSymbol = BD->getSymbol();
2736 Addend += (SymbolAddress - BD->getAddress());
2737 SymbolAddress = BD->getAddress();
2738 assert(Address == SymbolAddress + Addend);
2739 } else {
2740 // These are mostly local data symbols but undefined symbols
2741 // in relocation sections can get through here too, from .plt.
2742 assert(
2743 (IsAArch64 || BC->isRISCV() || IsSectionRelocation ||
2744 BC->getSectionNameForAddress(SymbolAddress)->starts_with(".plt")) &&
2745 "known symbols should not resolve to anonymous locals");
2747 if (IsSectionRelocation) {
2748 ReferencedSymbol =
2749 BC->getOrCreateGlobalSymbol(SymbolAddress, "SYMBOLat");
2750 } else {
2751 SymbolRef Symbol = *Rel.getSymbol();
2752 const uint64_t SymbolSize =
2753 IsAArch64 ? 0 : ELFSymbolRef(Symbol).getSize();
2754 const uint64_t SymbolAlignment = IsAArch64 ? 1 : Symbol.getAlignment();
2755 const uint32_t SymbolFlags = cantFail(Symbol.getFlags());
2756 std::string Name;
2757 if (SymbolFlags & SymbolRef::SF_Global) {
2758 Name = SymbolName;
2759 } else {
2760 if (StringRef(SymbolName)
2761 .starts_with(BC->AsmInfo->getPrivateGlobalPrefix()))
2762 Name = NR.uniquify("PG" + SymbolName);
2763 else
2764 Name = NR.uniquify(SymbolName);
2766 ReferencedSymbol = BC->registerNameAtAddress(
2767 Name, SymbolAddress, SymbolSize, SymbolAlignment, SymbolFlags);
2770 if (IsSectionRelocation) {
2771 BinaryData *BD = BC->getBinaryDataByName(ReferencedSymbol->getName());
2772 BC->markAmbiguousRelocations(*BD, Address);
2777 auto checkMaxDataRelocations = [&]() {
2778 ++NumDataRelocations;
2779 LLVM_DEBUG(if (opts::MaxDataRelocations &&
2780 NumDataRelocations + 1 == opts::MaxDataRelocations) {
2781 dbgs() << "BOLT-DEBUG: processing ending on data relocation "
2782 << NumDataRelocations << ": ";
2783 printRelocationInfo(Rel, ReferencedSymbol->getName(), SymbolAddress,
2784 Addend, ExtractedValue);
2787 return (!opts::MaxDataRelocations ||
2788 NumDataRelocations < opts::MaxDataRelocations);
2791 if ((ReferencedSection && refersToReorderedSection(ReferencedSection)) ||
2792 (opts::ForceToDataRelocations && checkMaxDataRelocations()) ||
2793 // RISC-V has ADD/SUB data-to-data relocations
2794 BC->isRISCV())
2795 ForceRelocation = true;
2797 if (IsFromCode) {
2798 ContainingBF->addRelocation(Rel.getOffset(), ReferencedSymbol, RType,
2799 Addend, ExtractedValue);
2800 } else if (IsToCode || ForceRelocation) {
2801 BC->addRelocation(Rel.getOffset(), ReferencedSymbol, RType, Addend,
2802 ExtractedValue);
2803 } else {
2804 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocation from data to data\n");
2808 void RewriteInstance::selectFunctionsToProcess() {
2809 // Extend the list of functions to process or skip from a file.
2810 auto populateFunctionNames = [](cl::opt<std::string> &FunctionNamesFile,
2811 cl::list<std::string> &FunctionNames) {
2812 if (FunctionNamesFile.empty())
2813 return;
2814 std::ifstream FuncsFile(FunctionNamesFile, std::ios::in);
2815 std::string FuncName;
2816 while (std::getline(FuncsFile, FuncName))
2817 FunctionNames.push_back(FuncName);
2819 populateFunctionNames(opts::FunctionNamesFile, opts::ForceFunctionNames);
2820 populateFunctionNames(opts::SkipFunctionNamesFile, opts::SkipFunctionNames);
2821 populateFunctionNames(opts::FunctionNamesFileNR, opts::ForceFunctionNamesNR);
2823 // Make a set of functions to process to speed up lookups.
2824 std::unordered_set<std::string> ForceFunctionsNR(
2825 opts::ForceFunctionNamesNR.begin(), opts::ForceFunctionNamesNR.end());
2827 if ((!opts::ForceFunctionNames.empty() ||
2828 !opts::ForceFunctionNamesNR.empty()) &&
2829 !opts::SkipFunctionNames.empty()) {
2830 errs() << "BOLT-ERROR: cannot select functions to process and skip at the "
2831 "same time. Please use only one type of selection.\n";
2832 exit(1);
2835 uint64_t LiteThresholdExecCount = 0;
2836 if (opts::LiteThresholdPct) {
2837 if (opts::LiteThresholdPct > 100)
2838 opts::LiteThresholdPct = 100;
2840 std::vector<const BinaryFunction *> TopFunctions;
2841 for (auto &BFI : BC->getBinaryFunctions()) {
2842 const BinaryFunction &Function = BFI.second;
2843 if (ProfileReader->mayHaveProfileData(Function))
2844 TopFunctions.push_back(&Function);
2846 llvm::sort(
2847 TopFunctions, [](const BinaryFunction *A, const BinaryFunction *B) {
2848 return A->getKnownExecutionCount() < B->getKnownExecutionCount();
2851 size_t Index = TopFunctions.size() * opts::LiteThresholdPct / 100;
2852 if (Index)
2853 --Index;
2854 LiteThresholdExecCount = TopFunctions[Index]->getKnownExecutionCount();
2855 outs() << "BOLT-INFO: limiting processing to functions with at least "
2856 << LiteThresholdExecCount << " invocations\n";
2858 LiteThresholdExecCount = std::max(
2859 LiteThresholdExecCount, static_cast<uint64_t>(opts::LiteThresholdCount));
2861 StringSet<> ReorderFunctionsUserSet;
2862 StringSet<> ReorderFunctionsLTOCommonSet;
2863 if (opts::ReorderFunctions == ReorderFunctions::RT_USER) {
2864 for (const std::string &Function :
2865 ReorderFunctions::readFunctionOrderFile()) {
2866 ReorderFunctionsUserSet.insert(Function);
2867 if (std::optional<StringRef> LTOCommonName = getLTOCommonName(Function))
2868 ReorderFunctionsLTOCommonSet.insert(*LTOCommonName);
2872 uint64_t NumFunctionsToProcess = 0;
2873 auto mustSkip = [&](const BinaryFunction &Function) {
2874 if (opts::MaxFunctions.getNumOccurrences() &&
2875 NumFunctionsToProcess >= opts::MaxFunctions)
2876 return true;
2877 for (std::string &Name : opts::SkipFunctionNames)
2878 if (Function.hasNameRegex(Name))
2879 return true;
2881 return false;
2884 auto shouldProcess = [&](const BinaryFunction &Function) {
2885 if (mustSkip(Function))
2886 return false;
2888 // If the list is not empty, only process functions from the list.
2889 if (!opts::ForceFunctionNames.empty() || !ForceFunctionsNR.empty()) {
2890 // Regex check (-funcs and -funcs-file options).
2891 for (std::string &Name : opts::ForceFunctionNames)
2892 if (Function.hasNameRegex(Name))
2893 return true;
2895 // Non-regex check (-funcs-no-regex and -funcs-file-no-regex).
2896 for (const StringRef Name : Function.getNames())
2897 if (ForceFunctionsNR.count(Name.str()))
2898 return true;
2900 return false;
2903 if (opts::Lite) {
2904 // Forcibly include functions specified in the -function-order file.
2905 if (opts::ReorderFunctions == ReorderFunctions::RT_USER) {
2906 for (const StringRef Name : Function.getNames())
2907 if (ReorderFunctionsUserSet.contains(Name))
2908 return true;
2909 for (const StringRef Name : Function.getNames())
2910 if (std::optional<StringRef> LTOCommonName = getLTOCommonName(Name))
2911 if (ReorderFunctionsLTOCommonSet.contains(*LTOCommonName))
2912 return true;
2915 if (ProfileReader && !ProfileReader->mayHaveProfileData(Function))
2916 return false;
2918 if (Function.getKnownExecutionCount() < LiteThresholdExecCount)
2919 return false;
2922 return true;
2925 for (auto &BFI : BC->getBinaryFunctions()) {
2926 BinaryFunction &Function = BFI.second;
2928 // Pseudo functions are explicitly marked by us not to be processed.
2929 if (Function.isPseudo()) {
2930 Function.IsIgnored = true;
2931 Function.HasExternalRefRelocations = true;
2932 continue;
2935 // Decide what to do with fragments after parent functions are processed.
2936 if (Function.isFragment())
2937 continue;
2939 if (!shouldProcess(Function)) {
2940 if (opts::Verbosity >= 1) {
2941 outs() << "BOLT-INFO: skipping processing " << Function
2942 << " per user request\n";
2944 Function.setIgnored();
2945 } else {
2946 ++NumFunctionsToProcess;
2947 if (opts::MaxFunctions.getNumOccurrences() &&
2948 NumFunctionsToProcess == opts::MaxFunctions)
2949 outs() << "BOLT-INFO: processing ending on " << Function << '\n';
2953 if (!BC->HasSplitFunctions)
2954 return;
2956 // Fragment overrides:
2957 // - If the fragment must be skipped, then the parent must be skipped as well.
2958 // Otherwise, fragment should follow the parent function:
2959 // - if the parent is skipped, skip fragment,
2960 // - if the parent is processed, process the fragment(s) as well.
2961 for (auto &BFI : BC->getBinaryFunctions()) {
2962 BinaryFunction &Function = BFI.second;
2963 if (!Function.isFragment())
2964 continue;
2965 if (mustSkip(Function)) {
2966 for (BinaryFunction *Parent : Function.ParentFragments) {
2967 if (opts::Verbosity >= 1) {
2968 outs() << "BOLT-INFO: skipping processing " << *Parent
2969 << " together with fragment function\n";
2971 Parent->setIgnored();
2972 --NumFunctionsToProcess;
2974 Function.setIgnored();
2975 continue;
2978 bool IgnoredParent =
2979 llvm::any_of(Function.ParentFragments, [&](BinaryFunction *Parent) {
2980 return Parent->isIgnored();
2982 if (IgnoredParent) {
2983 if (opts::Verbosity >= 1) {
2984 outs() << "BOLT-INFO: skipping processing " << Function
2985 << " together with parent function\n";
2987 Function.setIgnored();
2988 } else {
2989 ++NumFunctionsToProcess;
2990 if (opts::Verbosity >= 1) {
2991 outs() << "BOLT-INFO: processing " << Function
2992 << " as a sibling of non-ignored function\n";
2994 if (opts::MaxFunctions && NumFunctionsToProcess == opts::MaxFunctions)
2995 outs() << "BOLT-INFO: processing ending on " << Function << '\n';
3000 void RewriteInstance::readDebugInfo() {
3001 NamedRegionTimer T("readDebugInfo", "read debug info", TimerGroupName,
3002 TimerGroupDesc, opts::TimeRewrite);
3003 if (!opts::UpdateDebugSections)
3004 return;
3006 BC->preprocessDebugInfo();
3009 void RewriteInstance::preprocessProfileData() {
3010 if (!ProfileReader)
3011 return;
3013 NamedRegionTimer T("preprocessprofile", "pre-process profile data",
3014 TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
3016 outs() << "BOLT-INFO: pre-processing profile using "
3017 << ProfileReader->getReaderName() << '\n';
3019 if (BAT->enabledFor(InputFile)) {
3020 outs() << "BOLT-INFO: profile collection done on a binary already "
3021 "processed by BOLT\n";
3022 ProfileReader->setBAT(&*BAT);
3025 if (Error E = ProfileReader->preprocessProfile(*BC.get()))
3026 report_error("cannot pre-process profile", std::move(E));
3028 if (!BC->hasSymbolsWithFileName() && ProfileReader->hasLocalsWithFileName()) {
3029 errs() << "BOLT-ERROR: input binary does not have local file symbols "
3030 "but profile data includes function names with embedded file "
3031 "names. It appears that the input binary was stripped while a "
3032 "profiled binary was not\n";
3033 exit(1);
3037 void RewriteInstance::initializeMetadataManager() {
3038 if (opts::LinuxKernelMode)
3039 MetadataManager.registerRewriter(createLinuxKernelRewriter(*BC));
3041 MetadataManager.registerRewriter(createPseudoProbeRewriter(*BC));
3043 MetadataManager.registerRewriter(createSDTRewriter(*BC));
3046 void RewriteInstance::processMetadataPreCFG() {
3047 initializeMetadataManager();
3049 MetadataManager.runInitializersPreCFG();
3051 processProfileDataPreCFG();
3054 void RewriteInstance::processMetadataPostCFG() {
3055 MetadataManager.runInitializersPostCFG();
3058 void RewriteInstance::processProfileDataPreCFG() {
3059 if (!ProfileReader)
3060 return;
3062 NamedRegionTimer T("processprofile-precfg", "process profile data pre-CFG",
3063 TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
3065 if (Error E = ProfileReader->readProfilePreCFG(*BC.get()))
3066 report_error("cannot read profile pre-CFG", std::move(E));
3069 void RewriteInstance::processProfileData() {
3070 if (!ProfileReader)
3071 return;
3073 NamedRegionTimer T("processprofile", "process profile data", TimerGroupName,
3074 TimerGroupDesc, opts::TimeRewrite);
3076 if (Error E = ProfileReader->readProfile(*BC.get()))
3077 report_error("cannot read profile", std::move(E));
3079 if (opts::PrintProfile || opts::PrintAll) {
3080 for (auto &BFI : BC->getBinaryFunctions()) {
3081 BinaryFunction &Function = BFI.second;
3082 if (Function.empty())
3083 continue;
3085 Function.print(outs(), "after attaching profile");
3089 if (!opts::SaveProfile.empty()) {
3090 YAMLProfileWriter PW(opts::SaveProfile);
3091 PW.writeProfile(*this);
3093 if (opts::AggregateOnly &&
3094 opts::ProfileFormat == opts::ProfileFormatKind::PF_YAML) {
3095 YAMLProfileWriter PW(opts::OutputFilename);
3096 PW.writeProfile(*this);
3099 // Release memory used by profile reader.
3100 ProfileReader.reset();
3102 if (opts::AggregateOnly)
3103 exit(0);
3106 void RewriteInstance::disassembleFunctions() {
3107 NamedRegionTimer T("disassembleFunctions", "disassemble functions",
3108 TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
3109 for (auto &BFI : BC->getBinaryFunctions()) {
3110 BinaryFunction &Function = BFI.second;
3112 ErrorOr<ArrayRef<uint8_t>> FunctionData = Function.getData();
3113 if (!FunctionData) {
3114 errs() << "BOLT-ERROR: corresponding section is non-executable or "
3115 << "empty for function " << Function << '\n';
3116 exit(1);
3119 // Treat zero-sized functions as non-simple ones.
3120 if (Function.getSize() == 0) {
3121 Function.setSimple(false);
3122 continue;
3125 // Offset of the function in the file.
3126 const auto *FileBegin =
3127 reinterpret_cast<const uint8_t *>(InputFile->getData().data());
3128 Function.setFileOffset(FunctionData->begin() - FileBegin);
3130 if (!shouldDisassemble(Function)) {
3131 NamedRegionTimer T("scan", "scan functions", "buildfuncs",
3132 "Scan Binary Functions", opts::TimeBuild);
3133 Function.scanExternalRefs();
3134 Function.setSimple(false);
3135 continue;
3138 if (!Function.disassemble()) {
3139 if (opts::processAllFunctions())
3140 BC->exitWithBugReport("function cannot be properly disassembled. "
3141 "Unable to continue in relocation mode.",
3142 Function);
3143 if (opts::Verbosity >= 1)
3144 outs() << "BOLT-INFO: could not disassemble function " << Function
3145 << ". Will ignore.\n";
3146 // Forcefully ignore the function.
3147 Function.setIgnored();
3148 continue;
3151 if (opts::PrintAll || opts::PrintDisasm)
3152 Function.print(outs(), "after disassembly");
3155 BC->processInterproceduralReferences();
3156 BC->populateJumpTables();
3158 for (auto &BFI : BC->getBinaryFunctions()) {
3159 BinaryFunction &Function = BFI.second;
3161 if (!shouldDisassemble(Function))
3162 continue;
3164 Function.postProcessEntryPoints();
3165 Function.postProcessJumpTables();
3168 BC->clearJumpTableTempData();
3169 BC->adjustCodePadding();
3171 for (auto &BFI : BC->getBinaryFunctions()) {
3172 BinaryFunction &Function = BFI.second;
3174 if (!shouldDisassemble(Function))
3175 continue;
3177 if (!Function.isSimple()) {
3178 assert((!BC->HasRelocations || Function.getSize() == 0 ||
3179 Function.hasIndirectTargetToSplitFragment()) &&
3180 "unexpected non-simple function in relocation mode");
3181 continue;
3184 // Fill in CFI information for this function
3185 if (!Function.trapsOnEntry() && !CFIRdWrt->fillCFIInfoFor(Function)) {
3186 if (BC->HasRelocations) {
3187 BC->exitWithBugReport("unable to fill CFI.", Function);
3188 } else {
3189 errs() << "BOLT-WARNING: unable to fill CFI for function " << Function
3190 << ". Skipping.\n";
3191 Function.setSimple(false);
3192 continue;
3196 // Parse LSDA.
3197 if (Function.getLSDAAddress() != 0 &&
3198 !BC->getFragmentsToSkip().count(&Function)) {
3199 ErrorOr<BinarySection &> LSDASection =
3200 BC->getSectionForAddress(Function.getLSDAAddress());
3201 check_error(LSDASection.getError(), "failed to get LSDA section");
3202 ArrayRef<uint8_t> LSDAData = ArrayRef<uint8_t>(
3203 LSDASection->getData(), LSDASection->getContents().size());
3204 Function.parseLSDA(LSDAData, LSDASection->getAddress());
3209 void RewriteInstance::buildFunctionsCFG() {
3210 NamedRegionTimer T("buildCFG", "buildCFG", "buildfuncs",
3211 "Build Binary Functions", opts::TimeBuild);
3213 // Create annotation indices to allow lock-free execution
3214 BC->MIB->getOrCreateAnnotationIndex("JTIndexReg");
3215 BC->MIB->getOrCreateAnnotationIndex("NOP");
3217 ParallelUtilities::WorkFuncWithAllocTy WorkFun =
3218 [&](BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId) {
3219 if (!BF.buildCFG(AllocId))
3220 return;
3222 if (opts::PrintAll) {
3223 auto L = BC->scopeLock();
3224 BF.print(outs(), "while building cfg");
3228 ParallelUtilities::PredicateTy SkipPredicate = [&](const BinaryFunction &BF) {
3229 return !shouldDisassemble(BF) || !BF.isSimple();
3232 ParallelUtilities::runOnEachFunctionWithUniqueAllocId(
3233 *BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun,
3234 SkipPredicate, "disassembleFunctions-buildCFG",
3235 /*ForceSequential*/ opts::SequentialDisassembly || opts::PrintAll);
3237 BC->postProcessSymbolTable();
3240 void RewriteInstance::postProcessFunctions() {
3241 // We mark fragments as non-simple here, not during disassembly,
3242 // So we can build their CFGs.
3243 BC->skipMarkedFragments();
3244 BC->clearFragmentsToSkip();
3246 BC->TotalScore = 0;
3247 BC->SumExecutionCount = 0;
3248 for (auto &BFI : BC->getBinaryFunctions()) {
3249 BinaryFunction &Function = BFI.second;
3251 // Set function as non-simple if it has dynamic relocations
3252 // in constant island, we don't want this function to be optimized
3253 // e.g. function splitting is unsupported.
3254 if (Function.hasDynamicRelocationAtIsland())
3255 Function.setSimple(false);
3257 if (Function.empty())
3258 continue;
3260 Function.postProcessCFG();
3262 if (opts::PrintAll || opts::PrintCFG)
3263 Function.print(outs(), "after building cfg");
3265 if (opts::DumpDotAll)
3266 Function.dumpGraphForPass("00_build-cfg");
3268 if (opts::PrintLoopInfo) {
3269 Function.calculateLoopInfo();
3270 Function.printLoopInfo(outs());
3273 BC->TotalScore += Function.getFunctionScore();
3274 BC->SumExecutionCount += Function.getKnownExecutionCount();
3277 if (opts::PrintGlobals) {
3278 outs() << "BOLT-INFO: Global symbols:\n";
3279 BC->printGlobalSymbols(outs());
3283 void RewriteInstance::runOptimizationPasses() {
3284 NamedRegionTimer T("runOptimizationPasses", "run optimization passes",
3285 TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
3286 BinaryFunctionPassManager::runAllPasses(*BC);
3289 void RewriteInstance::preregisterSections() {
3290 // Preregister sections before emission to set their order in the output.
3291 const unsigned ROFlags = BinarySection::getFlags(/*IsReadOnly*/ true,
3292 /*IsText*/ false,
3293 /*IsAllocatable*/ true);
3294 if (BinarySection *EHFrameSection = getSection(getEHFrameSectionName())) {
3295 // New .eh_frame.
3296 BC->registerOrUpdateSection(getNewSecPrefix() + getEHFrameSectionName(),
3297 ELF::SHT_PROGBITS, ROFlags);
3298 // Fully register a relocatable copy of the original .eh_frame.
3299 BC->registerSection(".relocated.eh_frame", *EHFrameSection);
3301 BC->registerOrUpdateSection(getNewSecPrefix() + ".gcc_except_table",
3302 ELF::SHT_PROGBITS, ROFlags);
3303 BC->registerOrUpdateSection(getNewSecPrefix() + ".rodata", ELF::SHT_PROGBITS,
3304 ROFlags);
3305 BC->registerOrUpdateSection(getNewSecPrefix() + ".rodata.cold",
3306 ELF::SHT_PROGBITS, ROFlags);
3309 void RewriteInstance::emitAndLink() {
3310 NamedRegionTimer T("emitAndLink", "emit and link", TimerGroupName,
3311 TimerGroupDesc, opts::TimeRewrite);
3313 SmallString<0> ObjectBuffer;
3314 raw_svector_ostream OS(ObjectBuffer);
3316 // Implicitly MCObjectStreamer takes ownership of MCAsmBackend (MAB)
3317 // and MCCodeEmitter (MCE). ~MCObjectStreamer() will delete these
3318 // two instances.
3319 std::unique_ptr<MCStreamer> Streamer = BC->createStreamer(OS);
3321 if (EHFrameSection) {
3322 if (opts::UseOldText || opts::StrictMode) {
3323 // The section is going to be regenerated from scratch.
3324 // Empty the contents, but keep the section reference.
3325 EHFrameSection->clearContents();
3326 } else {
3327 // Make .eh_frame relocatable.
3328 relocateEHFrameSection();
3332 emitBinaryContext(*Streamer, *BC, getOrgSecPrefix());
3334 Streamer->finish();
3335 if (Streamer->getContext().hadError()) {
3336 errs() << "BOLT-ERROR: Emission failed.\n";
3337 exit(1);
3340 if (opts::KeepTmp) {
3341 SmallString<128> OutObjectPath;
3342 sys::fs::getPotentiallyUniqueTempFileName("output", "o", OutObjectPath);
3343 std::error_code EC;
3344 raw_fd_ostream FOS(OutObjectPath, EC);
3345 check_error(EC, "cannot create output object file");
3346 FOS << ObjectBuffer;
3347 outs() << "BOLT-INFO: intermediary output object file saved for debugging "
3348 "purposes: "
3349 << OutObjectPath << "\n";
3352 ErrorOr<BinarySection &> TextSection =
3353 BC->getUniqueSectionByName(BC->getMainCodeSectionName());
3354 if (BC->HasRelocations && TextSection)
3355 BC->renameSection(*TextSection, getOrgSecPrefix() + ".text");
3357 //////////////////////////////////////////////////////////////////////////////
3358 // Assign addresses to new sections.
3359 //////////////////////////////////////////////////////////////////////////////
3361 // Get output object as ObjectFile.
3362 std::unique_ptr<MemoryBuffer> ObjectMemBuffer =
3363 MemoryBuffer::getMemBuffer(ObjectBuffer, "in-memory object file", false);
3365 auto EFMM = std::make_unique<ExecutableFileMemoryManager>(*BC);
3366 EFMM->setNewSecPrefix(getNewSecPrefix());
3367 EFMM->setOrgSecPrefix(getOrgSecPrefix());
3369 Linker = std::make_unique<JITLinkLinker>(*BC, std::move(EFMM));
3370 Linker->loadObject(ObjectMemBuffer->getMemBufferRef(),
3371 [this](auto MapSection) { mapFileSections(MapSection); });
3373 // Update output addresses based on the new section map and
3374 // layout. Only do this for the object created by ourselves.
3375 updateOutputValues(*Linker);
3377 if (opts::UpdateDebugSections) {
3378 MCAsmLayout FinalLayout(
3379 static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler());
3380 DebugInfoRewriter->updateLineTableOffsets(FinalLayout);
3383 if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary())
3384 RtLibrary->link(*BC, ToolPath, *Linker, [this](auto MapSection) {
3385 // Map newly registered sections.
3386 this->mapAllocatableSections(MapSection);
3389 // Once the code is emitted, we can rename function sections to actual
3390 // output sections and de-register sections used for emission.
3391 for (BinaryFunction *Function : BC->getAllBinaryFunctions()) {
3392 ErrorOr<BinarySection &> Section = Function->getCodeSection();
3393 if (Section &&
3394 (Function->getImageAddress() == 0 || Function->getImageSize() == 0))
3395 continue;
3397 // Restore origin section for functions that were emitted or supposed to
3398 // be emitted to patch sections.
3399 if (Section)
3400 BC->deregisterSection(*Section);
3401 assert(Function->getOriginSectionName() && "expected origin section");
3402 Function->CodeSectionName = Function->getOriginSectionName()->str();
3403 for (const FunctionFragment &FF :
3404 Function->getLayout().getSplitFragments()) {
3405 if (ErrorOr<BinarySection &> ColdSection =
3406 Function->getCodeSection(FF.getFragmentNum()))
3407 BC->deregisterSection(*ColdSection);
3409 if (Function->getLayout().isSplit())
3410 Function->setColdCodeSectionName(getBOLTTextSectionName());
3413 if (opts::PrintCacheMetrics) {
3414 outs() << "BOLT-INFO: cache metrics after emitting functions:\n";
3415 CacheMetrics::printAll(BC->getSortedFunctions());
3419 void RewriteInstance::updateMetadata() {
3420 MetadataManager.runFinalizersAfterEmit();
3422 if (opts::UpdateDebugSections) {
3423 NamedRegionTimer T("updateDebugInfo", "update debug info", TimerGroupName,
3424 TimerGroupDesc, opts::TimeRewrite);
3425 DebugInfoRewriter->updateDebugInfo();
3428 if (opts::WriteBoltInfoSection)
3429 addBoltInfoSection();
3432 void RewriteInstance::mapFileSections(BOLTLinker::SectionMapper MapSection) {
3433 BC->deregisterUnusedSections();
3435 // If no new .eh_frame was written, remove relocated original .eh_frame.
3436 BinarySection *RelocatedEHFrameSection =
3437 getSection(".relocated" + getEHFrameSectionName());
3438 if (RelocatedEHFrameSection && RelocatedEHFrameSection->hasValidSectionID()) {
3439 BinarySection *NewEHFrameSection =
3440 getSection(getNewSecPrefix() + getEHFrameSectionName());
3441 if (!NewEHFrameSection || !NewEHFrameSection->isFinalized()) {
3442 // JITLink will still have to process relocations for the section, hence
3443 // we need to assign it the address that wouldn't result in relocation
3444 // processing failure.
3445 MapSection(*RelocatedEHFrameSection, NextAvailableAddress);
3446 BC->deregisterSection(*RelocatedEHFrameSection);
3450 mapCodeSections(MapSection);
3452 // Map the rest of the sections.
3453 mapAllocatableSections(MapSection);
3456 std::vector<BinarySection *> RewriteInstance::getCodeSections() {
3457 std::vector<BinarySection *> CodeSections;
3458 for (BinarySection &Section : BC->textSections())
3459 if (Section.hasValidSectionID())
3460 CodeSections.emplace_back(&Section);
3462 auto compareSections = [&](const BinarySection *A, const BinarySection *B) {
3463 // If both A and B have names starting with ".text.cold", then
3464 // - if opts::HotFunctionsAtEnd is true, we want order
3465 // ".text.cold.T", ".text.cold.T-1", ... ".text.cold.1", ".text.cold"
3466 // - if opts::HotFunctionsAtEnd is false, we want order
3467 // ".text.cold", ".text.cold.1", ... ".text.cold.T-1", ".text.cold.T"
3468 if (A->getName().starts_with(BC->getColdCodeSectionName()) &&
3469 B->getName().starts_with(BC->getColdCodeSectionName())) {
3470 if (A->getName().size() != B->getName().size())
3471 return (opts::HotFunctionsAtEnd)
3472 ? (A->getName().size() > B->getName().size())
3473 : (A->getName().size() < B->getName().size());
3474 return (opts::HotFunctionsAtEnd) ? (A->getName() > B->getName())
3475 : (A->getName() < B->getName());
3478 // Place movers before anything else.
3479 if (A->getName() == BC->getHotTextMoverSectionName())
3480 return true;
3481 if (B->getName() == BC->getHotTextMoverSectionName())
3482 return false;
3484 // Depending on opts::HotFunctionsAtEnd, place main and warm sections in
3485 // order.
3486 if (opts::HotFunctionsAtEnd) {
3487 if (B->getName() == BC->getMainCodeSectionName())
3488 return true;
3489 if (A->getName() == BC->getMainCodeSectionName())
3490 return false;
3491 return (B->getName() == BC->getWarmCodeSectionName());
3492 } else {
3493 if (A->getName() == BC->getMainCodeSectionName())
3494 return true;
3495 if (B->getName() == BC->getMainCodeSectionName())
3496 return false;
3497 return (A->getName() == BC->getWarmCodeSectionName());
3501 // Determine the order of sections.
3502 llvm::stable_sort(CodeSections, compareSections);
3504 return CodeSections;
3507 void RewriteInstance::mapCodeSections(BOLTLinker::SectionMapper MapSection) {
3508 if (BC->HasRelocations) {
3509 // Map sections for functions with pre-assigned addresses.
3510 for (BinaryFunction *InjectedFunction : BC->getInjectedBinaryFunctions()) {
3511 const uint64_t OutputAddress = InjectedFunction->getOutputAddress();
3512 if (!OutputAddress)
3513 continue;
3515 ErrorOr<BinarySection &> FunctionSection =
3516 InjectedFunction->getCodeSection();
3517 assert(FunctionSection && "function should have section");
3518 FunctionSection->setOutputAddress(OutputAddress);
3519 MapSection(*FunctionSection, OutputAddress);
3520 InjectedFunction->setImageAddress(FunctionSection->getAllocAddress());
3521 InjectedFunction->setImageSize(FunctionSection->getOutputSize());
3524 // Populate the list of sections to be allocated.
3525 std::vector<BinarySection *> CodeSections = getCodeSections();
3527 // Remove sections that were pre-allocated (patch sections).
3528 llvm::erase_if(CodeSections, [](BinarySection *Section) {
3529 return Section->getOutputAddress();
3531 LLVM_DEBUG(dbgs() << "Code sections in the order of output:\n";
3532 for (const BinarySection *Section : CodeSections)
3533 dbgs() << Section->getName() << '\n';
3536 uint64_t PaddingSize = 0; // size of padding required at the end
3538 // Allocate sections starting at a given Address.
3539 auto allocateAt = [&](uint64_t Address) {
3540 const char *LastNonColdSectionName = BC->HasWarmSection
3541 ? BC->getWarmCodeSectionName()
3542 : BC->getMainCodeSectionName();
3543 for (BinarySection *Section : CodeSections) {
3544 Address = alignTo(Address, Section->getAlignment());
3545 Section->setOutputAddress(Address);
3546 Address += Section->getOutputSize();
3548 // Hugify: Additional huge page from right side due to
3549 // weird ASLR mapping addresses (4KB aligned)
3550 if (opts::Hugify && !BC->HasFixedLoadAddress &&
3551 Section->getName() == LastNonColdSectionName)
3552 Address = alignTo(Address, Section->getAlignment());
3555 // Make sure we allocate enough space for huge pages.
3556 ErrorOr<BinarySection &> TextSection =
3557 BC->getUniqueSectionByName(LastNonColdSectionName);
3558 if (opts::HotText && TextSection && TextSection->hasValidSectionID()) {
3559 uint64_t HotTextEnd =
3560 TextSection->getOutputAddress() + TextSection->getOutputSize();
3561 HotTextEnd = alignTo(HotTextEnd, BC->PageAlign);
3562 if (HotTextEnd > Address) {
3563 PaddingSize = HotTextEnd - Address;
3564 Address = HotTextEnd;
3567 return Address;
3570 // Check if we can fit code in the original .text
3571 bool AllocationDone = false;
3572 if (opts::UseOldText) {
3573 const uint64_t CodeSize =
3574 allocateAt(BC->OldTextSectionAddress) - BC->OldTextSectionAddress;
3576 if (CodeSize <= BC->OldTextSectionSize) {
3577 outs() << "BOLT-INFO: using original .text for new code with 0x"
3578 << Twine::utohexstr(opts::AlignText) << " alignment\n";
3579 AllocationDone = true;
3580 } else {
3581 errs() << "BOLT-WARNING: original .text too small to fit the new code"
3582 << " using 0x" << Twine::utohexstr(opts::AlignText)
3583 << " alignment. " << CodeSize << " bytes needed, have "
3584 << BC->OldTextSectionSize << " bytes available.\n";
3585 opts::UseOldText = false;
3589 if (!AllocationDone)
3590 NextAvailableAddress = allocateAt(NextAvailableAddress);
3592 // Do the mapping for ORC layer based on the allocation.
3593 for (BinarySection *Section : CodeSections) {
3594 LLVM_DEBUG(
3595 dbgs() << "BOLT: mapping " << Section->getName() << " at 0x"
3596 << Twine::utohexstr(Section->getAllocAddress()) << " to 0x"
3597 << Twine::utohexstr(Section->getOutputAddress()) << '\n');
3598 MapSection(*Section, Section->getOutputAddress());
3599 Section->setOutputFileOffset(
3600 getFileOffsetForAddress(Section->getOutputAddress()));
3603 // Check if we need to insert a padding section for hot text.
3604 if (PaddingSize && !opts::UseOldText)
3605 outs() << "BOLT-INFO: padding code to 0x"
3606 << Twine::utohexstr(NextAvailableAddress)
3607 << " to accommodate hot text\n";
3609 return;
3612 // Processing in non-relocation mode.
3613 uint64_t NewTextSectionStartAddress = NextAvailableAddress;
3615 for (auto &BFI : BC->getBinaryFunctions()) {
3616 BinaryFunction &Function = BFI.second;
3617 if (!Function.isEmitted())
3618 continue;
3620 bool TooLarge = false;
3621 ErrorOr<BinarySection &> FuncSection = Function.getCodeSection();
3622 assert(FuncSection && "cannot find section for function");
3623 FuncSection->setOutputAddress(Function.getAddress());
3624 LLVM_DEBUG(dbgs() << "BOLT: mapping 0x"
3625 << Twine::utohexstr(FuncSection->getAllocAddress())
3626 << " to 0x" << Twine::utohexstr(Function.getAddress())
3627 << '\n');
3628 MapSection(*FuncSection, Function.getAddress());
3629 Function.setImageAddress(FuncSection->getAllocAddress());
3630 Function.setImageSize(FuncSection->getOutputSize());
3631 if (Function.getImageSize() > Function.getMaxSize()) {
3632 TooLarge = true;
3633 FailedAddresses.emplace_back(Function.getAddress());
3636 // Map jump tables if updating in-place.
3637 if (opts::JumpTables == JTS_BASIC) {
3638 for (auto &JTI : Function.JumpTables) {
3639 JumpTable *JT = JTI.second;
3640 BinarySection &Section = JT->getOutputSection();
3641 Section.setOutputAddress(JT->getAddress());
3642 Section.setOutputFileOffset(getFileOffsetForAddress(JT->getAddress()));
3643 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: mapping JT " << Section.getName()
3644 << " to 0x" << Twine::utohexstr(JT->getAddress())
3645 << '\n');
3646 MapSection(Section, JT->getAddress());
3650 if (!Function.isSplit())
3651 continue;
3653 assert(Function.getLayout().isHotColdSplit() &&
3654 "Cannot allocate more than two fragments per function in "
3655 "non-relocation mode.");
3657 FunctionFragment &FF =
3658 Function.getLayout().getFragment(FragmentNum::cold());
3659 ErrorOr<BinarySection &> ColdSection =
3660 Function.getCodeSection(FF.getFragmentNum());
3661 assert(ColdSection && "cannot find section for cold part");
3662 // Cold fragments are aligned at 16 bytes.
3663 NextAvailableAddress = alignTo(NextAvailableAddress, 16);
3664 if (TooLarge) {
3665 // The corresponding FDE will refer to address 0.
3666 FF.setAddress(0);
3667 FF.setImageAddress(0);
3668 FF.setImageSize(0);
3669 FF.setFileOffset(0);
3670 } else {
3671 FF.setAddress(NextAvailableAddress);
3672 FF.setImageAddress(ColdSection->getAllocAddress());
3673 FF.setImageSize(ColdSection->getOutputSize());
3674 FF.setFileOffset(getFileOffsetForAddress(NextAvailableAddress));
3675 ColdSection->setOutputAddress(FF.getAddress());
3678 LLVM_DEBUG(
3679 dbgs() << formatv(
3680 "BOLT: mapping cold fragment {0:x+} to {1:x+} with size {2:x+}\n",
3681 FF.getImageAddress(), FF.getAddress(), FF.getImageSize()));
3682 MapSection(*ColdSection, FF.getAddress());
3684 if (TooLarge)
3685 BC->deregisterSection(*ColdSection);
3687 NextAvailableAddress += FF.getImageSize();
3690 // Add the new text section aggregating all existing code sections.
3691 // This is pseudo-section that serves a purpose of creating a corresponding
3692 // entry in section header table.
3693 int64_t NewTextSectionSize =
3694 NextAvailableAddress - NewTextSectionStartAddress;
3695 if (NewTextSectionSize) {
3696 const unsigned Flags = BinarySection::getFlags(/*IsReadOnly=*/true,
3697 /*IsText=*/true,
3698 /*IsAllocatable=*/true);
3699 BinarySection &Section =
3700 BC->registerOrUpdateSection(getBOLTTextSectionName(),
3701 ELF::SHT_PROGBITS,
3702 Flags,
3703 /*Data=*/nullptr,
3704 NewTextSectionSize,
3705 16);
3706 Section.setOutputAddress(NewTextSectionStartAddress);
3707 Section.setOutputFileOffset(
3708 getFileOffsetForAddress(NewTextSectionStartAddress));
3712 void RewriteInstance::mapAllocatableSections(
3713 BOLTLinker::SectionMapper MapSection) {
3714 // Allocate read-only sections first, then writable sections.
3715 enum : uint8_t { ST_READONLY, ST_READWRITE };
3716 for (uint8_t SType = ST_READONLY; SType <= ST_READWRITE; ++SType) {
3717 const uint64_t LastNextAvailableAddress = NextAvailableAddress;
3718 if (SType == ST_READWRITE) {
3719 // Align R+W segment to regular page size
3720 NextAvailableAddress = alignTo(NextAvailableAddress, BC->RegularPageSize);
3721 NewWritableSegmentAddress = NextAvailableAddress;
3724 for (BinarySection &Section : BC->allocatableSections()) {
3725 if (Section.isLinkOnly())
3726 continue;
3728 if (!Section.hasValidSectionID())
3729 continue;
3731 if (Section.isWritable() == (SType == ST_READONLY))
3732 continue;
3734 if (Section.getOutputAddress()) {
3735 LLVM_DEBUG({
3736 dbgs() << "BOLT-DEBUG: section " << Section.getName()
3737 << " is already mapped at 0x"
3738 << Twine::utohexstr(Section.getOutputAddress()) << '\n';
3740 continue;
3743 if (Section.hasSectionRef()) {
3744 LLVM_DEBUG({
3745 dbgs() << "BOLT-DEBUG: mapping original section " << Section.getName()
3746 << " to 0x" << Twine::utohexstr(Section.getAddress()) << '\n';
3748 Section.setOutputAddress(Section.getAddress());
3749 Section.setOutputFileOffset(Section.getInputFileOffset());
3750 MapSection(Section, Section.getAddress());
3751 } else {
3752 NextAvailableAddress =
3753 alignTo(NextAvailableAddress, Section.getAlignment());
3754 LLVM_DEBUG({
3755 dbgs() << "BOLT: mapping section " << Section.getName() << " (0x"
3756 << Twine::utohexstr(Section.getAllocAddress()) << ") to 0x"
3757 << Twine::utohexstr(NextAvailableAddress) << ":0x"
3758 << Twine::utohexstr(NextAvailableAddress +
3759 Section.getOutputSize())
3760 << '\n';
3763 MapSection(Section, NextAvailableAddress);
3764 Section.setOutputAddress(NextAvailableAddress);
3765 Section.setOutputFileOffset(
3766 getFileOffsetForAddress(NextAvailableAddress));
3768 NextAvailableAddress += Section.getOutputSize();
3772 if (SType == ST_READONLY) {
3773 if (PHDRTableAddress) {
3774 // Segment size includes the size of the PHDR area.
3775 NewTextSegmentSize = NextAvailableAddress - PHDRTableAddress;
3776 } else {
3777 // Existing PHDR table would be updated.
3778 NewTextSegmentSize = NextAvailableAddress - NewTextSegmentAddress;
3780 } else if (SType == ST_READWRITE) {
3781 NewWritableSegmentSize = NextAvailableAddress - NewWritableSegmentAddress;
3782 // Restore NextAvailableAddress if no new writable sections
3783 if (!NewWritableSegmentSize)
3784 NextAvailableAddress = LastNextAvailableAddress;
3789 void RewriteInstance::updateOutputValues(const BOLTLinker &Linker) {
3790 if (std::optional<AddressMap> Map = AddressMap::parse(*BC))
3791 BC->setIOAddressMap(std::move(*Map));
3793 for (BinaryFunction *Function : BC->getAllBinaryFunctions())
3794 Function->updateOutputValues(Linker);
3797 void RewriteInstance::patchELFPHDRTable() {
3798 auto ELF64LEFile = cast<ELF64LEObjectFile>(InputFile);
3799 const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile();
3800 raw_fd_ostream &OS = Out->os();
3802 // Write/re-write program headers.
3803 Phnum = Obj.getHeader().e_phnum;
3804 if (PHDRTableOffset) {
3805 // Writing new pheader table and adding one new entry for R+X segment.
3806 Phnum += 1;
3807 if (NewWritableSegmentSize) {
3808 // Adding one more entry for R+W segment.
3809 Phnum += 1;
3811 } else {
3812 assert(!PHDRTableAddress && "unexpected address for program header table");
3813 PHDRTableOffset = Obj.getHeader().e_phoff;
3814 if (NewWritableSegmentSize) {
3815 errs() << "Unable to add writable segment with UseGnuStack option\n";
3816 exit(1);
3820 // NOTE Currently .eh_frame_hdr appends to the last segment, recalculate
3821 // last segments size based on the NextAvailableAddress variable.
3822 if (!NewWritableSegmentSize) {
3823 if (PHDRTableAddress)
3824 NewTextSegmentSize = NextAvailableAddress - PHDRTableAddress;
3825 else
3826 NewTextSegmentSize = NextAvailableAddress - NewTextSegmentAddress;
3827 } else {
3828 NewWritableSegmentSize = NextAvailableAddress - NewWritableSegmentAddress;
3831 OS.seek(PHDRTableOffset);
3833 bool ModdedGnuStack = false;
3834 (void)ModdedGnuStack;
3835 bool AddedSegment = false;
3836 (void)AddedSegment;
3838 auto createNewTextPhdr = [&]() {
3839 ELF64LEPhdrTy NewPhdr;
3840 NewPhdr.p_type = ELF::PT_LOAD;
3841 if (PHDRTableAddress) {
3842 NewPhdr.p_offset = PHDRTableOffset;
3843 NewPhdr.p_vaddr = PHDRTableAddress;
3844 NewPhdr.p_paddr = PHDRTableAddress;
3845 } else {
3846 NewPhdr.p_offset = NewTextSegmentOffset;
3847 NewPhdr.p_vaddr = NewTextSegmentAddress;
3848 NewPhdr.p_paddr = NewTextSegmentAddress;
3850 NewPhdr.p_filesz = NewTextSegmentSize;
3851 NewPhdr.p_memsz = NewTextSegmentSize;
3852 NewPhdr.p_flags = ELF::PF_X | ELF::PF_R;
3853 // FIXME: Currently instrumentation is experimental and the runtime data
3854 // is emitted with code, thus everything needs to be writable
3855 if (opts::Instrument)
3856 NewPhdr.p_flags |= ELF::PF_W;
3857 NewPhdr.p_align = BC->PageAlign;
3859 return NewPhdr;
3862 auto createNewWritableSectionsPhdr = [&]() {
3863 ELF64LEPhdrTy NewPhdr;
3864 NewPhdr.p_type = ELF::PT_LOAD;
3865 NewPhdr.p_offset = getFileOffsetForAddress(NewWritableSegmentAddress);
3866 NewPhdr.p_vaddr = NewWritableSegmentAddress;
3867 NewPhdr.p_paddr = NewWritableSegmentAddress;
3868 NewPhdr.p_filesz = NewWritableSegmentSize;
3869 NewPhdr.p_memsz = NewWritableSegmentSize;
3870 NewPhdr.p_align = BC->RegularPageSize;
3871 NewPhdr.p_flags = ELF::PF_R | ELF::PF_W;
3872 return NewPhdr;
3875 // Copy existing program headers with modifications.
3876 for (const ELF64LE::Phdr &Phdr : cantFail(Obj.program_headers())) {
3877 ELF64LE::Phdr NewPhdr = Phdr;
3878 if (PHDRTableAddress && Phdr.p_type == ELF::PT_PHDR) {
3879 NewPhdr.p_offset = PHDRTableOffset;
3880 NewPhdr.p_vaddr = PHDRTableAddress;
3881 NewPhdr.p_paddr = PHDRTableAddress;
3882 NewPhdr.p_filesz = sizeof(NewPhdr) * Phnum;
3883 NewPhdr.p_memsz = sizeof(NewPhdr) * Phnum;
3884 } else if (Phdr.p_type == ELF::PT_GNU_EH_FRAME) {
3885 ErrorOr<BinarySection &> EHFrameHdrSec =
3886 BC->getUniqueSectionByName(getNewSecPrefix() + ".eh_frame_hdr");
3887 if (EHFrameHdrSec && EHFrameHdrSec->isAllocatable() &&
3888 EHFrameHdrSec->isFinalized()) {
3889 NewPhdr.p_offset = EHFrameHdrSec->getOutputFileOffset();
3890 NewPhdr.p_vaddr = EHFrameHdrSec->getOutputAddress();
3891 NewPhdr.p_paddr = EHFrameHdrSec->getOutputAddress();
3892 NewPhdr.p_filesz = EHFrameHdrSec->getOutputSize();
3893 NewPhdr.p_memsz = EHFrameHdrSec->getOutputSize();
3895 } else if (opts::UseGnuStack && Phdr.p_type == ELF::PT_GNU_STACK) {
3896 NewPhdr = createNewTextPhdr();
3897 ModdedGnuStack = true;
3898 } else if (!opts::UseGnuStack && Phdr.p_type == ELF::PT_DYNAMIC) {
3899 // Insert the new header before DYNAMIC.
3900 ELF64LE::Phdr NewTextPhdr = createNewTextPhdr();
3901 OS.write(reinterpret_cast<const char *>(&NewTextPhdr),
3902 sizeof(NewTextPhdr));
3903 if (NewWritableSegmentSize) {
3904 ELF64LEPhdrTy NewWritablePhdr = createNewWritableSectionsPhdr();
3905 OS.write(reinterpret_cast<const char *>(&NewWritablePhdr),
3906 sizeof(NewWritablePhdr));
3908 AddedSegment = true;
3910 OS.write(reinterpret_cast<const char *>(&NewPhdr), sizeof(NewPhdr));
3913 if (!opts::UseGnuStack && !AddedSegment) {
3914 // Append the new header to the end of the table.
3915 ELF64LE::Phdr NewTextPhdr = createNewTextPhdr();
3916 OS.write(reinterpret_cast<const char *>(&NewTextPhdr), sizeof(NewTextPhdr));
3917 if (NewWritableSegmentSize) {
3918 ELF64LEPhdrTy NewWritablePhdr = createNewWritableSectionsPhdr();
3919 OS.write(reinterpret_cast<const char *>(&NewWritablePhdr),
3920 sizeof(NewWritablePhdr));
3924 assert((!opts::UseGnuStack || ModdedGnuStack) &&
3925 "could not find GNU_STACK program header to modify");
3928 namespace {
3930 /// Write padding to \p OS such that its current \p Offset becomes aligned
3931 /// at \p Alignment. Return new (aligned) offset.
3932 uint64_t appendPadding(raw_pwrite_stream &OS, uint64_t Offset,
3933 uint64_t Alignment) {
3934 if (!Alignment)
3935 return Offset;
3937 const uint64_t PaddingSize =
3938 offsetToAlignment(Offset, llvm::Align(Alignment));
3939 for (unsigned I = 0; I < PaddingSize; ++I)
3940 OS.write((unsigned char)0);
3941 return Offset + PaddingSize;
3946 void RewriteInstance::rewriteNoteSections() {
3947 auto ELF64LEFile = cast<ELF64LEObjectFile>(InputFile);
3948 const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile();
3949 raw_fd_ostream &OS = Out->os();
3951 uint64_t NextAvailableOffset = getFileOffsetForAddress(NextAvailableAddress);
3952 assert(NextAvailableOffset >= FirstNonAllocatableOffset &&
3953 "next available offset calculation failure");
3954 OS.seek(NextAvailableOffset);
3956 // Copy over non-allocatable section contents and update file offsets.
3957 for (const ELF64LE::Shdr &Section : cantFail(Obj.sections())) {
3958 if (Section.sh_type == ELF::SHT_NULL)
3959 continue;
3960 if (Section.sh_flags & ELF::SHF_ALLOC)
3961 continue;
3963 SectionRef SecRef = ELF64LEFile->toSectionRef(&Section);
3964 BinarySection *BSec = BC->getSectionForSectionRef(SecRef);
3965 assert(BSec && !BSec->isAllocatable() &&
3966 "Matching non-allocatable BinarySection should exist.");
3968 StringRef SectionName =
3969 cantFail(Obj.getSectionName(Section), "cannot get section name");
3970 if (shouldStrip(Section, SectionName))
3971 continue;
3973 // Insert padding as needed.
3974 NextAvailableOffset =
3975 appendPadding(OS, NextAvailableOffset, Section.sh_addralign);
3977 // New section size.
3978 uint64_t Size = 0;
3979 bool DataWritten = false;
3980 uint8_t *SectionData = nullptr;
3981 // Copy over section contents unless it's one of the sections we overwrite.
3982 if (!willOverwriteSection(SectionName)) {
3983 Size = Section.sh_size;
3984 StringRef Dataref = InputFile->getData().substr(Section.sh_offset, Size);
3985 std::string Data;
3986 if (BSec->getPatcher()) {
3987 Data = BSec->getPatcher()->patchBinary(Dataref);
3988 Dataref = StringRef(Data);
3991 // Section was expanded, so need to treat it as overwrite.
3992 if (Size != Dataref.size()) {
3993 BSec = &BC->registerOrUpdateNoteSection(
3994 SectionName, copyByteArray(Dataref), Dataref.size());
3995 Size = 0;
3996 } else {
3997 OS << Dataref;
3998 DataWritten = true;
4000 // Add padding as the section extension might rely on the alignment.
4001 Size = appendPadding(OS, Size, Section.sh_addralign);
4005 // Perform section post-processing.
4006 assert(BSec->getAlignment() <= Section.sh_addralign &&
4007 "alignment exceeds value in file");
4009 if (BSec->getAllocAddress()) {
4010 assert(!DataWritten && "Writing section twice.");
4011 (void)DataWritten;
4012 SectionData = BSec->getOutputData();
4014 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: " << (Size ? "appending" : "writing")
4015 << " contents to section " << SectionName << '\n');
4016 OS.write(reinterpret_cast<char *>(SectionData), BSec->getOutputSize());
4017 Size += BSec->getOutputSize();
4020 BSec->setOutputFileOffset(NextAvailableOffset);
4021 BSec->flushPendingRelocations(OS, [this](const MCSymbol *S) {
4022 return getNewValueForSymbol(S->getName());
4025 // Set/modify section info.
4026 BinarySection &NewSection = BC->registerOrUpdateNoteSection(
4027 SectionName, SectionData, Size, Section.sh_addralign,
4028 !BSec->isWritable(), BSec->getELFType());
4029 NewSection.setOutputAddress(0);
4030 NewSection.setOutputFileOffset(NextAvailableOffset);
4032 NextAvailableOffset += Size;
4035 // Write new note sections.
4036 for (BinarySection &Section : BC->nonAllocatableSections()) {
4037 if (Section.getOutputFileOffset() || !Section.getAllocAddress())
4038 continue;
4040 assert(!Section.hasPendingRelocations() && "cannot have pending relocs");
4042 NextAvailableOffset =
4043 appendPadding(OS, NextAvailableOffset, Section.getAlignment());
4044 Section.setOutputFileOffset(NextAvailableOffset);
4046 LLVM_DEBUG(
4047 dbgs() << "BOLT-DEBUG: writing out new section " << Section.getName()
4048 << " of size " << Section.getOutputSize() << " at offset 0x"
4049 << Twine::utohexstr(Section.getOutputFileOffset()) << '\n');
4051 OS.write(Section.getOutputContents().data(), Section.getOutputSize());
4052 NextAvailableOffset += Section.getOutputSize();
4056 template <typename ELFT>
4057 void RewriteInstance::finalizeSectionStringTable(ELFObjectFile<ELFT> *File) {
4058 // Pre-populate section header string table.
4059 for (const BinarySection &Section : BC->sections())
4060 if (!Section.isAnonymous())
4061 SHStrTab.add(Section.getOutputName());
4062 SHStrTab.finalize();
4064 const size_t SHStrTabSize = SHStrTab.getSize();
4065 uint8_t *DataCopy = new uint8_t[SHStrTabSize];
4066 memset(DataCopy, 0, SHStrTabSize);
4067 SHStrTab.write(DataCopy);
4068 BC->registerOrUpdateNoteSection(".shstrtab",
4069 DataCopy,
4070 SHStrTabSize,
4071 /*Alignment=*/1,
4072 /*IsReadOnly=*/true,
4073 ELF::SHT_STRTAB);
4076 void RewriteInstance::addBoltInfoSection() {
4077 std::string DescStr;
4078 raw_string_ostream DescOS(DescStr);
4080 DescOS << "BOLT revision: " << BoltRevision << ", "
4081 << "command line:";
4082 for (int I = 0; I < Argc; ++I)
4083 DescOS << " " << Argv[I];
4084 DescOS.flush();
4086 // Encode as GNU GOLD VERSION so it is easily printable by 'readelf -n'
4087 const std::string BoltInfo =
4088 BinarySection::encodeELFNote("GNU", DescStr, 4 /*NT_GNU_GOLD_VERSION*/);
4089 BC->registerOrUpdateNoteSection(".note.bolt_info", copyByteArray(BoltInfo),
4090 BoltInfo.size(),
4091 /*Alignment=*/1,
4092 /*IsReadOnly=*/true, ELF::SHT_NOTE);
4095 void RewriteInstance::addBATSection() {
4096 BC->registerOrUpdateNoteSection(BoltAddressTranslation::SECTION_NAME, nullptr,
4098 /*Alignment=*/1,
4099 /*IsReadOnly=*/true, ELF::SHT_NOTE);
4102 void RewriteInstance::encodeBATSection() {
4103 std::string DescStr;
4104 raw_string_ostream DescOS(DescStr);
4106 BAT->write(*BC, DescOS);
4107 DescOS.flush();
4109 const std::string BoltInfo =
4110 BinarySection::encodeELFNote("BOLT", DescStr, BinarySection::NT_BOLT_BAT);
4111 BC->registerOrUpdateNoteSection(BoltAddressTranslation::SECTION_NAME,
4112 copyByteArray(BoltInfo), BoltInfo.size(),
4113 /*Alignment=*/1,
4114 /*IsReadOnly=*/true, ELF::SHT_NOTE);
4115 outs() << "BOLT-INFO: BAT section size (bytes): " << BoltInfo.size() << '\n';
4118 template <typename ELFShdrTy>
4119 bool RewriteInstance::shouldStrip(const ELFShdrTy &Section,
4120 StringRef SectionName) {
4121 // Strip non-allocatable relocation sections.
4122 if (!(Section.sh_flags & ELF::SHF_ALLOC) && Section.sh_type == ELF::SHT_RELA)
4123 return true;
4125 // Strip debug sections if not updating them.
4126 if (isDebugSection(SectionName) && !opts::UpdateDebugSections)
4127 return true;
4129 // Strip symtab section if needed
4130 if (opts::RemoveSymtab && Section.sh_type == ELF::SHT_SYMTAB)
4131 return true;
4133 return false;
4136 template <typename ELFT>
4137 std::vector<typename object::ELFObjectFile<ELFT>::Elf_Shdr>
4138 RewriteInstance::getOutputSections(ELFObjectFile<ELFT> *File,
4139 std::vector<uint32_t> &NewSectionIndex) {
4140 using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr;
4141 const ELFFile<ELFT> &Obj = File->getELFFile();
4142 typename ELFT::ShdrRange Sections = cantFail(Obj.sections());
4144 // Keep track of section header entries attached to the corresponding section.
4145 std::vector<std::pair<BinarySection *, ELFShdrTy>> OutputSections;
4146 auto addSection = [&](const ELFShdrTy &Section, BinarySection *BinSec) {
4147 ELFShdrTy NewSection = Section;
4148 NewSection.sh_name = SHStrTab.getOffset(BinSec->getOutputName());
4149 OutputSections.emplace_back(BinSec, std::move(NewSection));
4152 // Copy over entries for original allocatable sections using modified name.
4153 for (const ELFShdrTy &Section : Sections) {
4154 // Always ignore this section.
4155 if (Section.sh_type == ELF::SHT_NULL) {
4156 OutputSections.emplace_back(nullptr, Section);
4157 continue;
4160 if (!(Section.sh_flags & ELF::SHF_ALLOC))
4161 continue;
4163 SectionRef SecRef = File->toSectionRef(&Section);
4164 BinarySection *BinSec = BC->getSectionForSectionRef(SecRef);
4165 assert(BinSec && "Matching BinarySection should exist.");
4167 addSection(Section, BinSec);
4170 for (BinarySection &Section : BC->allocatableSections()) {
4171 if (!Section.isFinalized())
4172 continue;
4174 if (Section.hasSectionRef() || Section.isAnonymous()) {
4175 if (opts::Verbosity)
4176 outs() << "BOLT-INFO: not writing section header for section "
4177 << Section.getOutputName() << '\n';
4178 continue;
4181 if (opts::Verbosity >= 1)
4182 outs() << "BOLT-INFO: writing section header for "
4183 << Section.getOutputName() << '\n';
4184 ELFShdrTy NewSection;
4185 NewSection.sh_type = ELF::SHT_PROGBITS;
4186 NewSection.sh_addr = Section.getOutputAddress();
4187 NewSection.sh_offset = Section.getOutputFileOffset();
4188 NewSection.sh_size = Section.getOutputSize();
4189 NewSection.sh_entsize = 0;
4190 NewSection.sh_flags = Section.getELFFlags();
4191 NewSection.sh_link = 0;
4192 NewSection.sh_info = 0;
4193 NewSection.sh_addralign = Section.getAlignment();
4194 addSection(NewSection, &Section);
4197 // Sort all allocatable sections by their offset.
4198 llvm::stable_sort(OutputSections, [](const auto &A, const auto &B) {
4199 return A.second.sh_offset < B.second.sh_offset;
4202 // Fix section sizes to prevent overlapping.
4203 ELFShdrTy *PrevSection = nullptr;
4204 BinarySection *PrevBinSec = nullptr;
4205 for (auto &SectionKV : OutputSections) {
4206 ELFShdrTy &Section = SectionKV.second;
4208 // TBSS section does not take file or memory space. Ignore it for layout
4209 // purposes.
4210 if (Section.sh_type == ELF::SHT_NOBITS && (Section.sh_flags & ELF::SHF_TLS))
4211 continue;
4213 if (PrevSection &&
4214 PrevSection->sh_addr + PrevSection->sh_size > Section.sh_addr) {
4215 if (opts::Verbosity > 1)
4216 outs() << "BOLT-INFO: adjusting size for section "
4217 << PrevBinSec->getOutputName() << '\n';
4218 PrevSection->sh_size = Section.sh_addr > PrevSection->sh_addr
4219 ? Section.sh_addr - PrevSection->sh_addr
4220 : 0;
4223 PrevSection = &Section;
4224 PrevBinSec = SectionKV.first;
4227 uint64_t LastFileOffset = 0;
4229 // Copy over entries for non-allocatable sections performing necessary
4230 // adjustments.
4231 for (const ELFShdrTy &Section : Sections) {
4232 if (Section.sh_type == ELF::SHT_NULL)
4233 continue;
4234 if (Section.sh_flags & ELF::SHF_ALLOC)
4235 continue;
4237 StringRef SectionName =
4238 cantFail(Obj.getSectionName(Section), "cannot get section name");
4240 if (shouldStrip(Section, SectionName))
4241 continue;
4243 SectionRef SecRef = File->toSectionRef(&Section);
4244 BinarySection *BinSec = BC->getSectionForSectionRef(SecRef);
4245 assert(BinSec && "Matching BinarySection should exist.");
4247 ELFShdrTy NewSection = Section;
4248 NewSection.sh_offset = BinSec->getOutputFileOffset();
4249 NewSection.sh_size = BinSec->getOutputSize();
4251 if (NewSection.sh_type == ELF::SHT_SYMTAB)
4252 NewSection.sh_info = NumLocalSymbols;
4254 addSection(NewSection, BinSec);
4256 LastFileOffset = BinSec->getOutputFileOffset();
4259 // Create entries for new non-allocatable sections.
4260 for (BinarySection &Section : BC->nonAllocatableSections()) {
4261 if (Section.getOutputFileOffset() <= LastFileOffset)
4262 continue;
4264 if (opts::Verbosity >= 1)
4265 outs() << "BOLT-INFO: writing section header for "
4266 << Section.getOutputName() << '\n';
4268 ELFShdrTy NewSection;
4269 NewSection.sh_type = Section.getELFType();
4270 NewSection.sh_addr = 0;
4271 NewSection.sh_offset = Section.getOutputFileOffset();
4272 NewSection.sh_size = Section.getOutputSize();
4273 NewSection.sh_entsize = 0;
4274 NewSection.sh_flags = Section.getELFFlags();
4275 NewSection.sh_link = 0;
4276 NewSection.sh_info = 0;
4277 NewSection.sh_addralign = Section.getAlignment();
4279 addSection(NewSection, &Section);
4282 // Assign indices to sections.
4283 std::unordered_map<std::string, uint64_t> NameToIndex;
4284 for (uint32_t Index = 1; Index < OutputSections.size(); ++Index)
4285 OutputSections[Index].first->setIndex(Index);
4287 // Update section index mapping
4288 NewSectionIndex.clear();
4289 NewSectionIndex.resize(Sections.size(), 0);
4290 for (const ELFShdrTy &Section : Sections) {
4291 if (Section.sh_type == ELF::SHT_NULL)
4292 continue;
4294 size_t OrgIndex = std::distance(Sections.begin(), &Section);
4296 SectionRef SecRef = File->toSectionRef(&Section);
4297 BinarySection *BinSec = BC->getSectionForSectionRef(SecRef);
4298 assert(BinSec && "BinarySection should exist for an input section.");
4300 // Some sections are stripped
4301 if (!BinSec->hasValidIndex())
4302 continue;
4304 NewSectionIndex[OrgIndex] = BinSec->getIndex();
4307 std::vector<ELFShdrTy> SectionsOnly(OutputSections.size());
4308 llvm::copy(llvm::make_second_range(OutputSections), SectionsOnly.begin());
4310 return SectionsOnly;
4313 // Rewrite section header table inserting new entries as needed. The sections
4314 // header table size itself may affect the offsets of other sections,
4315 // so we are placing it at the end of the binary.
4317 // As we rewrite entries we need to track how many sections were inserted
4318 // as it changes the sh_link value. We map old indices to new ones for
4319 // existing sections.
4320 template <typename ELFT>
4321 void RewriteInstance::patchELFSectionHeaderTable(ELFObjectFile<ELFT> *File) {
4322 using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr;
4323 using ELFEhdrTy = typename ELFObjectFile<ELFT>::Elf_Ehdr;
4324 raw_fd_ostream &OS = Out->os();
4325 const ELFFile<ELFT> &Obj = File->getELFFile();
4327 std::vector<uint32_t> NewSectionIndex;
4328 std::vector<ELFShdrTy> OutputSections =
4329 getOutputSections(File, NewSectionIndex);
4330 LLVM_DEBUG(
4331 dbgs() << "BOLT-DEBUG: old to new section index mapping:\n";
4332 for (uint64_t I = 0; I < NewSectionIndex.size(); ++I)
4333 dbgs() << " " << I << " -> " << NewSectionIndex[I] << '\n';
4336 // Align starting address for section header table. There's no architecutal
4337 // need to align this, it is just for pleasant human readability.
4338 uint64_t SHTOffset = OS.tell();
4339 SHTOffset = appendPadding(OS, SHTOffset, 16);
4341 // Write all section header entries while patching section references.
4342 for (ELFShdrTy &Section : OutputSections) {
4343 Section.sh_link = NewSectionIndex[Section.sh_link];
4344 if (Section.sh_type == ELF::SHT_REL || Section.sh_type == ELF::SHT_RELA) {
4345 if (Section.sh_info)
4346 Section.sh_info = NewSectionIndex[Section.sh_info];
4348 OS.write(reinterpret_cast<const char *>(&Section), sizeof(Section));
4351 // Fix ELF header.
4352 ELFEhdrTy NewEhdr = Obj.getHeader();
4354 if (BC->HasRelocations) {
4355 if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary())
4356 NewEhdr.e_entry = RtLibrary->getRuntimeStartAddress();
4357 else
4358 NewEhdr.e_entry = getNewFunctionAddress(NewEhdr.e_entry);
4359 assert((NewEhdr.e_entry || !Obj.getHeader().e_entry) &&
4360 "cannot find new address for entry point");
4362 NewEhdr.e_phoff = PHDRTableOffset;
4363 NewEhdr.e_phnum = Phnum;
4364 NewEhdr.e_shoff = SHTOffset;
4365 NewEhdr.e_shnum = OutputSections.size();
4366 NewEhdr.e_shstrndx = NewSectionIndex[NewEhdr.e_shstrndx];
4367 OS.pwrite(reinterpret_cast<const char *>(&NewEhdr), sizeof(NewEhdr), 0);
4370 template <typename ELFT, typename WriteFuncTy, typename StrTabFuncTy>
4371 void RewriteInstance::updateELFSymbolTable(
4372 ELFObjectFile<ELFT> *File, bool IsDynSym,
4373 const typename object::ELFObjectFile<ELFT>::Elf_Shdr &SymTabSection,
4374 const std::vector<uint32_t> &NewSectionIndex, WriteFuncTy Write,
4375 StrTabFuncTy AddToStrTab) {
4376 const ELFFile<ELFT> &Obj = File->getELFFile();
4377 using ELFSymTy = typename ELFObjectFile<ELFT>::Elf_Sym;
4379 StringRef StringSection =
4380 cantFail(Obj.getStringTableForSymtab(SymTabSection));
4382 unsigned NumHotTextSymsUpdated = 0;
4383 unsigned NumHotDataSymsUpdated = 0;
4385 std::map<const BinaryFunction *, uint64_t> IslandSizes;
4386 auto getConstantIslandSize = [&IslandSizes](const BinaryFunction &BF) {
4387 auto Itr = IslandSizes.find(&BF);
4388 if (Itr != IslandSizes.end())
4389 return Itr->second;
4390 return IslandSizes[&BF] = BF.estimateConstantIslandSize();
4393 // Symbols for the new symbol table.
4394 std::vector<ELFSymTy> Symbols;
4396 auto getNewSectionIndex = [&](uint32_t OldIndex) {
4397 // For dynamic symbol table, the section index could be wrong on the input,
4398 // and its value is ignored by the runtime if it's different from
4399 // SHN_UNDEF and SHN_ABS.
4400 // However, we still need to update dynamic symbol table, so return a
4401 // section index, even though the index is broken.
4402 if (IsDynSym && OldIndex >= NewSectionIndex.size())
4403 return OldIndex;
4405 assert(OldIndex < NewSectionIndex.size() && "section index out of bounds");
4406 const uint32_t NewIndex = NewSectionIndex[OldIndex];
4408 // We may have stripped the section that dynsym was referencing due to
4409 // the linker bug. In that case return the old index avoiding marking
4410 // the symbol as undefined.
4411 if (IsDynSym && NewIndex != OldIndex && NewIndex == ELF::SHN_UNDEF)
4412 return OldIndex;
4413 return NewIndex;
4416 // Get the extra symbol name of a split fragment; used in addExtraSymbols.
4417 auto getSplitSymbolName = [&](const FunctionFragment &FF,
4418 const ELFSymTy &FunctionSymbol) {
4419 SmallString<256> SymbolName;
4420 if (BC->HasWarmSection)
4421 SymbolName =
4422 formatv("{0}.{1}", cantFail(FunctionSymbol.getName(StringSection)),
4423 FF.getFragmentNum() == FragmentNum::warm() ? "warm" : "cold");
4424 else
4425 SymbolName = formatv("{0}.cold.{1}",
4426 cantFail(FunctionSymbol.getName(StringSection)),
4427 FF.getFragmentNum().get() - 1);
4428 return SymbolName;
4431 // Add extra symbols for the function.
4433 // Note that addExtraSymbols() could be called multiple times for the same
4434 // function with different FunctionSymbol matching the main function entry
4435 // point.
4436 auto addExtraSymbols = [&](const BinaryFunction &Function,
4437 const ELFSymTy &FunctionSymbol) {
4438 if (Function.isFolded()) {
4439 BinaryFunction *ICFParent = Function.getFoldedIntoFunction();
4440 while (ICFParent->isFolded())
4441 ICFParent = ICFParent->getFoldedIntoFunction();
4442 ELFSymTy ICFSymbol = FunctionSymbol;
4443 SmallVector<char, 256> Buf;
4444 ICFSymbol.st_name =
4445 AddToStrTab(Twine(cantFail(FunctionSymbol.getName(StringSection)))
4446 .concat(".icf.0")
4447 .toStringRef(Buf));
4448 ICFSymbol.st_value = ICFParent->getOutputAddress();
4449 ICFSymbol.st_size = ICFParent->getOutputSize();
4450 ICFSymbol.st_shndx = ICFParent->getCodeSection()->getIndex();
4451 Symbols.emplace_back(ICFSymbol);
4453 if (Function.isSplit()) {
4454 for (const FunctionFragment &FF :
4455 Function.getLayout().getSplitFragments()) {
4456 if (FF.getAddress()) {
4457 ELFSymTy NewColdSym = FunctionSymbol;
4458 const SmallString<256> SymbolName =
4459 getSplitSymbolName(FF, FunctionSymbol);
4460 NewColdSym.st_name = AddToStrTab(SymbolName);
4461 NewColdSym.st_shndx =
4462 Function.getCodeSection(FF.getFragmentNum())->getIndex();
4463 NewColdSym.st_value = FF.getAddress();
4464 NewColdSym.st_size = FF.getImageSize();
4465 NewColdSym.setBindingAndType(ELF::STB_LOCAL, ELF::STT_FUNC);
4466 Symbols.emplace_back(NewColdSym);
4470 if (Function.hasConstantIsland()) {
4471 uint64_t DataMark = Function.getOutputDataAddress();
4472 uint64_t CISize = getConstantIslandSize(Function);
4473 uint64_t CodeMark = DataMark + CISize;
4474 ELFSymTy DataMarkSym = FunctionSymbol;
4475 DataMarkSym.st_name = AddToStrTab("$d");
4476 DataMarkSym.st_value = DataMark;
4477 DataMarkSym.st_size = 0;
4478 DataMarkSym.setType(ELF::STT_NOTYPE);
4479 DataMarkSym.setBinding(ELF::STB_LOCAL);
4480 ELFSymTy CodeMarkSym = DataMarkSym;
4481 CodeMarkSym.st_name = AddToStrTab("$x");
4482 CodeMarkSym.st_value = CodeMark;
4483 Symbols.emplace_back(DataMarkSym);
4484 Symbols.emplace_back(CodeMarkSym);
4486 if (Function.hasConstantIsland() && Function.isSplit()) {
4487 uint64_t DataMark = Function.getOutputColdDataAddress();
4488 uint64_t CISize = getConstantIslandSize(Function);
4489 uint64_t CodeMark = DataMark + CISize;
4490 ELFSymTy DataMarkSym = FunctionSymbol;
4491 DataMarkSym.st_name = AddToStrTab("$d");
4492 DataMarkSym.st_value = DataMark;
4493 DataMarkSym.st_size = 0;
4494 DataMarkSym.setType(ELF::STT_NOTYPE);
4495 DataMarkSym.setBinding(ELF::STB_LOCAL);
4496 ELFSymTy CodeMarkSym = DataMarkSym;
4497 CodeMarkSym.st_name = AddToStrTab("$x");
4498 CodeMarkSym.st_value = CodeMark;
4499 Symbols.emplace_back(DataMarkSym);
4500 Symbols.emplace_back(CodeMarkSym);
4504 // For regular (non-dynamic) symbol table, exclude symbols referring
4505 // to non-allocatable sections.
4506 auto shouldStrip = [&](const ELFSymTy &Symbol) {
4507 if (Symbol.isAbsolute() || !Symbol.isDefined())
4508 return false;
4510 // If we cannot link the symbol to a section, leave it as is.
4511 Expected<const typename ELFT::Shdr *> Section =
4512 Obj.getSection(Symbol.st_shndx);
4513 if (!Section)
4514 return false;
4516 // Remove the section symbol iif the corresponding section was stripped.
4517 if (Symbol.getType() == ELF::STT_SECTION) {
4518 if (!getNewSectionIndex(Symbol.st_shndx))
4519 return true;
4520 return false;
4523 // Symbols in non-allocatable sections are typically remnants of relocations
4524 // emitted under "-emit-relocs" linker option. Delete those as we delete
4525 // relocations against non-allocatable sections.
4526 if (!((*Section)->sh_flags & ELF::SHF_ALLOC))
4527 return true;
4529 return false;
4532 for (const ELFSymTy &Symbol : cantFail(Obj.symbols(&SymTabSection))) {
4533 // For regular (non-dynamic) symbol table strip unneeded symbols.
4534 if (!IsDynSym && shouldStrip(Symbol))
4535 continue;
4537 const BinaryFunction *Function =
4538 BC->getBinaryFunctionAtAddress(Symbol.st_value);
4539 // Ignore false function references, e.g. when the section address matches
4540 // the address of the function.
4541 if (Function && Symbol.getType() == ELF::STT_SECTION)
4542 Function = nullptr;
4544 // For non-dynamic symtab, make sure the symbol section matches that of
4545 // the function. It can mismatch e.g. if the symbol is a section marker
4546 // in which case we treat the symbol separately from the function.
4547 // For dynamic symbol table, the section index could be wrong on the input,
4548 // and its value is ignored by the runtime if it's different from
4549 // SHN_UNDEF and SHN_ABS.
4550 if (!IsDynSym && Function &&
4551 Symbol.st_shndx !=
4552 Function->getOriginSection()->getSectionRef().getIndex())
4553 Function = nullptr;
4555 // Create a new symbol based on the existing symbol.
4556 ELFSymTy NewSymbol = Symbol;
4558 if (Function) {
4559 // If the symbol matched a function that was not emitted, update the
4560 // corresponding section index but otherwise leave it unchanged.
4561 if (Function->isEmitted()) {
4562 NewSymbol.st_value = Function->getOutputAddress();
4563 NewSymbol.st_size = Function->getOutputSize();
4564 NewSymbol.st_shndx = Function->getCodeSection()->getIndex();
4565 } else if (Symbol.st_shndx < ELF::SHN_LORESERVE) {
4566 NewSymbol.st_shndx = getNewSectionIndex(Symbol.st_shndx);
4569 // Add new symbols to the symbol table if necessary.
4570 if (!IsDynSym)
4571 addExtraSymbols(*Function, NewSymbol);
4572 } else {
4573 // Check if the function symbol matches address inside a function, i.e.
4574 // it marks a secondary entry point.
4575 Function =
4576 (Symbol.getType() == ELF::STT_FUNC)
4577 ? BC->getBinaryFunctionContainingAddress(Symbol.st_value,
4578 /*CheckPastEnd=*/false,
4579 /*UseMaxSize=*/true)
4580 : nullptr;
4582 if (Function && Function->isEmitted()) {
4583 assert(Function->getLayout().isHotColdSplit() &&
4584 "Adding symbols based on cold fragment when there are more than "
4585 "2 fragments");
4586 const uint64_t OutputAddress =
4587 Function->translateInputToOutputAddress(Symbol.st_value);
4589 NewSymbol.st_value = OutputAddress;
4590 // Force secondary entry points to have zero size.
4591 NewSymbol.st_size = 0;
4593 // Find fragment containing entrypoint
4594 FunctionLayout::fragment_const_iterator FF = llvm::find_if(
4595 Function->getLayout().fragments(), [&](const FunctionFragment &FF) {
4596 uint64_t Lo = FF.getAddress();
4597 uint64_t Hi = Lo + FF.getImageSize();
4598 return Lo <= OutputAddress && OutputAddress < Hi;
4601 if (FF == Function->getLayout().fragment_end()) {
4602 assert(
4603 OutputAddress >= Function->getCodeSection()->getOutputAddress() &&
4604 OutputAddress < (Function->getCodeSection()->getOutputAddress() +
4605 Function->getCodeSection()->getOutputSize()) &&
4606 "Cannot locate fragment containing secondary entrypoint");
4607 FF = Function->getLayout().fragment_begin();
4610 NewSymbol.st_shndx =
4611 Function->getCodeSection(FF->getFragmentNum())->getIndex();
4612 } else {
4613 // Check if the symbol belongs to moved data object and update it.
4614 BinaryData *BD = opts::ReorderData.empty()
4615 ? nullptr
4616 : BC->getBinaryDataAtAddress(Symbol.st_value);
4617 if (BD && BD->isMoved() && !BD->isJumpTable()) {
4618 assert((!BD->getSize() || !Symbol.st_size ||
4619 Symbol.st_size == BD->getSize()) &&
4620 "sizes must match");
4622 BinarySection &OutputSection = BD->getOutputSection();
4623 assert(OutputSection.getIndex());
4624 LLVM_DEBUG(dbgs()
4625 << "BOLT-DEBUG: moving " << BD->getName() << " from "
4626 << *BC->getSectionNameForAddress(Symbol.st_value) << " ("
4627 << Symbol.st_shndx << ") to " << OutputSection.getName()
4628 << " (" << OutputSection.getIndex() << ")\n");
4629 NewSymbol.st_shndx = OutputSection.getIndex();
4630 NewSymbol.st_value = BD->getOutputAddress();
4631 } else {
4632 // Otherwise just update the section for the symbol.
4633 if (Symbol.st_shndx < ELF::SHN_LORESERVE)
4634 NewSymbol.st_shndx = getNewSectionIndex(Symbol.st_shndx);
4637 // Detect local syms in the text section that we didn't update
4638 // and that were preserved by the linker to support relocations against
4639 // .text. Remove them from the symtab.
4640 if (Symbol.getType() == ELF::STT_NOTYPE &&
4641 Symbol.getBinding() == ELF::STB_LOCAL && Symbol.st_size == 0) {
4642 if (BC->getBinaryFunctionContainingAddress(Symbol.st_value,
4643 /*CheckPastEnd=*/false,
4644 /*UseMaxSize=*/true)) {
4645 // Can only delete the symbol if not patching. Such symbols should
4646 // not exist in the dynamic symbol table.
4647 assert(!IsDynSym && "cannot delete symbol");
4648 continue;
4654 // Handle special symbols based on their name.
4655 Expected<StringRef> SymbolName = Symbol.getName(StringSection);
4656 assert(SymbolName && "cannot get symbol name");
4658 auto updateSymbolValue = [&](const StringRef Name,
4659 std::optional<uint64_t> Value = std::nullopt) {
4660 NewSymbol.st_value = Value ? *Value : getNewValueForSymbol(Name);
4661 NewSymbol.st_shndx = ELF::SHN_ABS;
4662 outs() << "BOLT-INFO: setting " << Name << " to 0x"
4663 << Twine::utohexstr(NewSymbol.st_value) << '\n';
4666 if (opts::HotText &&
4667 (*SymbolName == "__hot_start" || *SymbolName == "__hot_end")) {
4668 updateSymbolValue(*SymbolName);
4669 ++NumHotTextSymsUpdated;
4672 if (opts::HotData && (*SymbolName == "__hot_data_start" ||
4673 *SymbolName == "__hot_data_end")) {
4674 updateSymbolValue(*SymbolName);
4675 ++NumHotDataSymsUpdated;
4678 if (*SymbolName == "_end")
4679 updateSymbolValue(*SymbolName, NextAvailableAddress);
4681 if (IsDynSym)
4682 Write((&Symbol - cantFail(Obj.symbols(&SymTabSection)).begin()) *
4683 sizeof(ELFSymTy),
4684 NewSymbol);
4685 else
4686 Symbols.emplace_back(NewSymbol);
4689 if (IsDynSym) {
4690 assert(Symbols.empty());
4691 return;
4694 // Add symbols of injected functions
4695 for (BinaryFunction *Function : BC->getInjectedBinaryFunctions()) {
4696 ELFSymTy NewSymbol;
4697 BinarySection *OriginSection = Function->getOriginSection();
4698 NewSymbol.st_shndx =
4699 OriginSection
4700 ? getNewSectionIndex(OriginSection->getSectionRef().getIndex())
4701 : Function->getCodeSection()->getIndex();
4702 NewSymbol.st_value = Function->getOutputAddress();
4703 NewSymbol.st_name = AddToStrTab(Function->getOneName());
4704 NewSymbol.st_size = Function->getOutputSize();
4705 NewSymbol.st_other = 0;
4706 NewSymbol.setBindingAndType(ELF::STB_LOCAL, ELF::STT_FUNC);
4707 Symbols.emplace_back(NewSymbol);
4709 if (Function->isSplit()) {
4710 assert(Function->getLayout().isHotColdSplit() &&
4711 "Adding symbols based on cold fragment when there are more than "
4712 "2 fragments");
4713 ELFSymTy NewColdSym = NewSymbol;
4714 NewColdSym.setType(ELF::STT_NOTYPE);
4715 SmallVector<char, 256> Buf;
4716 NewColdSym.st_name = AddToStrTab(
4717 Twine(Function->getPrintName()).concat(".cold.0").toStringRef(Buf));
4718 const FunctionFragment &ColdFF =
4719 Function->getLayout().getFragment(FragmentNum::cold());
4720 NewColdSym.st_value = ColdFF.getAddress();
4721 NewColdSym.st_size = ColdFF.getImageSize();
4722 Symbols.emplace_back(NewColdSym);
4726 auto AddSymbol = [&](const StringRef &Name, uint64_t Address) {
4727 if (!Address)
4728 return;
4730 ELFSymTy Symbol;
4731 Symbol.st_value = Address;
4732 Symbol.st_shndx = ELF::SHN_ABS;
4733 Symbol.st_name = AddToStrTab(Name);
4734 Symbol.st_size = 0;
4735 Symbol.st_other = 0;
4736 Symbol.setBindingAndType(ELF::STB_WEAK, ELF::STT_NOTYPE);
4738 outs() << "BOLT-INFO: setting " << Name << " to 0x"
4739 << Twine::utohexstr(Symbol.st_value) << '\n';
4741 Symbols.emplace_back(Symbol);
4744 // Add runtime library start and fini address symbols
4745 if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) {
4746 AddSymbol("__bolt_runtime_start", RtLibrary->getRuntimeStartAddress());
4747 AddSymbol("__bolt_runtime_fini", RtLibrary->getRuntimeFiniAddress());
4750 assert((!NumHotTextSymsUpdated || NumHotTextSymsUpdated == 2) &&
4751 "either none or both __hot_start/__hot_end symbols were expected");
4752 assert((!NumHotDataSymsUpdated || NumHotDataSymsUpdated == 2) &&
4753 "either none or both __hot_data_start/__hot_data_end symbols were "
4754 "expected");
4756 auto AddEmittedSymbol = [&](const StringRef &Name) {
4757 AddSymbol(Name, getNewValueForSymbol(Name));
4760 if (opts::HotText && !NumHotTextSymsUpdated) {
4761 AddEmittedSymbol("__hot_start");
4762 AddEmittedSymbol("__hot_end");
4765 if (opts::HotData && !NumHotDataSymsUpdated) {
4766 AddEmittedSymbol("__hot_data_start");
4767 AddEmittedSymbol("__hot_data_end");
4770 // Put local symbols at the beginning.
4771 llvm::stable_sort(Symbols, [](const ELFSymTy &A, const ELFSymTy &B) {
4772 if (A.getBinding() == ELF::STB_LOCAL && B.getBinding() != ELF::STB_LOCAL)
4773 return true;
4774 return false;
4777 for (const ELFSymTy &Symbol : Symbols)
4778 Write(0, Symbol);
4781 template <typename ELFT>
4782 void RewriteInstance::patchELFSymTabs(ELFObjectFile<ELFT> *File) {
4783 const ELFFile<ELFT> &Obj = File->getELFFile();
4784 using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr;
4785 using ELFSymTy = typename ELFObjectFile<ELFT>::Elf_Sym;
4787 // Compute a preview of how section indices will change after rewriting, so
4788 // we can properly update the symbol table based on new section indices.
4789 std::vector<uint32_t> NewSectionIndex;
4790 getOutputSections(File, NewSectionIndex);
4792 // Set pointer at the end of the output file, so we can pwrite old symbol
4793 // tables if we need to.
4794 uint64_t NextAvailableOffset = getFileOffsetForAddress(NextAvailableAddress);
4795 assert(NextAvailableOffset >= FirstNonAllocatableOffset &&
4796 "next available offset calculation failure");
4797 Out->os().seek(NextAvailableOffset);
4799 // Update dynamic symbol table.
4800 const ELFShdrTy *DynSymSection = nullptr;
4801 for (const ELFShdrTy &Section : cantFail(Obj.sections())) {
4802 if (Section.sh_type == ELF::SHT_DYNSYM) {
4803 DynSymSection = &Section;
4804 break;
4807 assert((DynSymSection || BC->IsStaticExecutable) &&
4808 "dynamic symbol table expected");
4809 if (DynSymSection) {
4810 updateELFSymbolTable(
4811 File,
4812 /*IsDynSym=*/true,
4813 *DynSymSection,
4814 NewSectionIndex,
4815 [&](size_t Offset, const ELFSymTy &Sym) {
4816 Out->os().pwrite(reinterpret_cast<const char *>(&Sym),
4817 sizeof(ELFSymTy),
4818 DynSymSection->sh_offset + Offset);
4820 [](StringRef) -> size_t { return 0; });
4823 if (opts::RemoveSymtab)
4824 return;
4826 // (re)create regular symbol table.
4827 const ELFShdrTy *SymTabSection = nullptr;
4828 for (const ELFShdrTy &Section : cantFail(Obj.sections())) {
4829 if (Section.sh_type == ELF::SHT_SYMTAB) {
4830 SymTabSection = &Section;
4831 break;
4834 if (!SymTabSection) {
4835 errs() << "BOLT-WARNING: no symbol table found\n";
4836 return;
4839 const ELFShdrTy *StrTabSection =
4840 cantFail(Obj.getSection(SymTabSection->sh_link));
4841 std::string NewContents;
4842 std::string NewStrTab = std::string(
4843 File->getData().substr(StrTabSection->sh_offset, StrTabSection->sh_size));
4844 StringRef SecName = cantFail(Obj.getSectionName(*SymTabSection));
4845 StringRef StrSecName = cantFail(Obj.getSectionName(*StrTabSection));
4847 NumLocalSymbols = 0;
4848 updateELFSymbolTable(
4849 File,
4850 /*IsDynSym=*/false,
4851 *SymTabSection,
4852 NewSectionIndex,
4853 [&](size_t Offset, const ELFSymTy &Sym) {
4854 if (Sym.getBinding() == ELF::STB_LOCAL)
4855 ++NumLocalSymbols;
4856 NewContents.append(reinterpret_cast<const char *>(&Sym),
4857 sizeof(ELFSymTy));
4859 [&](StringRef Str) {
4860 size_t Idx = NewStrTab.size();
4861 NewStrTab.append(NameResolver::restore(Str).str());
4862 NewStrTab.append(1, '\0');
4863 return Idx;
4866 BC->registerOrUpdateNoteSection(SecName,
4867 copyByteArray(NewContents),
4868 NewContents.size(),
4869 /*Alignment=*/1,
4870 /*IsReadOnly=*/true,
4871 ELF::SHT_SYMTAB);
4873 BC->registerOrUpdateNoteSection(StrSecName,
4874 copyByteArray(NewStrTab),
4875 NewStrTab.size(),
4876 /*Alignment=*/1,
4877 /*IsReadOnly=*/true,
4878 ELF::SHT_STRTAB);
4881 template <typename ELFT>
4882 void RewriteInstance::patchELFAllocatableRelrSection(
4883 ELFObjectFile<ELFT> *File) {
4884 if (!DynamicRelrAddress)
4885 return;
4887 raw_fd_ostream &OS = Out->os();
4888 const uint8_t PSize = BC->AsmInfo->getCodePointerSize();
4889 const uint64_t MaxDelta = ((CHAR_BIT * DynamicRelrEntrySize) - 1) * PSize;
4891 auto FixAddend = [&](const BinarySection &Section, const Relocation &Rel,
4892 uint64_t FileOffset) {
4893 // Fix relocation symbol value in place if no static relocation found
4894 // on the same address. We won't check the BF relocations here since it
4895 // is rare case and no optimization is required.
4896 if (Section.getRelocationAt(Rel.Offset))
4897 return;
4899 // No fixup needed if symbol address was not changed
4900 const uint64_t Addend = getNewFunctionOrDataAddress(Rel.Addend);
4901 if (!Addend)
4902 return;
4904 OS.pwrite(reinterpret_cast<const char *>(&Addend), PSize, FileOffset);
4907 // Fill new relative relocation offsets set
4908 std::set<uint64_t> RelOffsets;
4909 for (const BinarySection &Section : BC->allocatableSections()) {
4910 const uint64_t SectionInputAddress = Section.getAddress();
4911 uint64_t SectionAddress = Section.getOutputAddress();
4912 if (!SectionAddress)
4913 SectionAddress = SectionInputAddress;
4915 for (const Relocation &Rel : Section.dynamicRelocations()) {
4916 if (!Rel.isRelative())
4917 continue;
4919 uint64_t RelOffset =
4920 getNewFunctionOrDataAddress(SectionInputAddress + Rel.Offset);
4922 RelOffset = RelOffset == 0 ? SectionAddress + Rel.Offset : RelOffset;
4923 assert((RelOffset & 1) == 0 && "Wrong relocation offset");
4924 RelOffsets.emplace(RelOffset);
4925 FixAddend(Section, Rel, RelOffset);
4929 ErrorOr<BinarySection &> Section =
4930 BC->getSectionForAddress(*DynamicRelrAddress);
4931 assert(Section && "cannot get .relr.dyn section");
4932 assert(Section->isRelr() && "Expected section to be SHT_RELR type");
4933 uint64_t RelrDynOffset = Section->getInputFileOffset();
4934 const uint64_t RelrDynEndOffset = RelrDynOffset + Section->getSize();
4936 auto WriteRelr = [&](uint64_t Value) {
4937 if (RelrDynOffset + DynamicRelrEntrySize > RelrDynEndOffset) {
4938 errs() << "BOLT-ERROR: Offset overflow for relr.dyn section\n";
4939 exit(1);
4942 OS.pwrite(reinterpret_cast<const char *>(&Value), DynamicRelrEntrySize,
4943 RelrDynOffset);
4944 RelrDynOffset += DynamicRelrEntrySize;
4947 for (auto RelIt = RelOffsets.begin(); RelIt != RelOffsets.end();) {
4948 WriteRelr(*RelIt);
4949 uint64_t Base = *RelIt++ + PSize;
4950 while (1) {
4951 uint64_t Bitmap = 0;
4952 for (; RelIt != RelOffsets.end(); ++RelIt) {
4953 const uint64_t Delta = *RelIt - Base;
4954 if (Delta >= MaxDelta || Delta % PSize)
4955 break;
4957 Bitmap |= (1ULL << (Delta / PSize));
4960 if (!Bitmap)
4961 break;
4963 WriteRelr((Bitmap << 1) | 1);
4964 Base += MaxDelta;
4968 // Fill the rest of the section with empty bitmap value
4969 while (RelrDynOffset != RelrDynEndOffset)
4970 WriteRelr(1);
4973 template <typename ELFT>
4974 void
4975 RewriteInstance::patchELFAllocatableRelaSections(ELFObjectFile<ELFT> *File) {
4976 using Elf_Rela = typename ELFT::Rela;
4977 raw_fd_ostream &OS = Out->os();
4978 const ELFFile<ELFT> &EF = File->getELFFile();
4980 uint64_t RelDynOffset = 0, RelDynEndOffset = 0;
4981 uint64_t RelPltOffset = 0, RelPltEndOffset = 0;
4983 auto setSectionFileOffsets = [&](uint64_t Address, uint64_t &Start,
4984 uint64_t &End) {
4985 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(Address);
4986 assert(Section && "cannot get relocation section");
4987 Start = Section->getInputFileOffset();
4988 End = Start + Section->getSize();
4991 if (!DynamicRelocationsAddress && !PLTRelocationsAddress)
4992 return;
4994 if (DynamicRelocationsAddress)
4995 setSectionFileOffsets(*DynamicRelocationsAddress, RelDynOffset,
4996 RelDynEndOffset);
4998 if (PLTRelocationsAddress)
4999 setSectionFileOffsets(*PLTRelocationsAddress, RelPltOffset,
5000 RelPltEndOffset);
5002 DynamicRelativeRelocationsCount = 0;
5004 auto writeRela = [&OS](const Elf_Rela *RelA, uint64_t &Offset) {
5005 OS.pwrite(reinterpret_cast<const char *>(RelA), sizeof(*RelA), Offset);
5006 Offset += sizeof(*RelA);
5009 auto writeRelocations = [&](bool PatchRelative) {
5010 for (BinarySection &Section : BC->allocatableSections()) {
5011 const uint64_t SectionInputAddress = Section.getAddress();
5012 uint64_t SectionAddress = Section.getOutputAddress();
5013 if (!SectionAddress)
5014 SectionAddress = SectionInputAddress;
5016 for (const Relocation &Rel : Section.dynamicRelocations()) {
5017 const bool IsRelative = Rel.isRelative();
5018 if (PatchRelative != IsRelative)
5019 continue;
5021 if (IsRelative)
5022 ++DynamicRelativeRelocationsCount;
5024 Elf_Rela NewRelA;
5025 MCSymbol *Symbol = Rel.Symbol;
5026 uint32_t SymbolIdx = 0;
5027 uint64_t Addend = Rel.Addend;
5028 uint64_t RelOffset =
5029 getNewFunctionOrDataAddress(SectionInputAddress + Rel.Offset);
5031 RelOffset = RelOffset == 0 ? SectionAddress + Rel.Offset : RelOffset;
5032 if (Rel.Symbol) {
5033 SymbolIdx = getOutputDynamicSymbolIndex(Symbol);
5034 } else {
5035 // Usually this case is used for R_*_(I)RELATIVE relocations
5036 const uint64_t Address = getNewFunctionOrDataAddress(Addend);
5037 if (Address)
5038 Addend = Address;
5041 NewRelA.setSymbolAndType(SymbolIdx, Rel.Type, EF.isMips64EL());
5042 NewRelA.r_offset = RelOffset;
5043 NewRelA.r_addend = Addend;
5045 const bool IsJmpRel = IsJmpRelocation.contains(Rel.Type);
5046 uint64_t &Offset = IsJmpRel ? RelPltOffset : RelDynOffset;
5047 const uint64_t &EndOffset =
5048 IsJmpRel ? RelPltEndOffset : RelDynEndOffset;
5049 if (!Offset || !EndOffset) {
5050 errs() << "BOLT-ERROR: Invalid offsets for dynamic relocation\n";
5051 exit(1);
5054 if (Offset + sizeof(NewRelA) > EndOffset) {
5055 errs() << "BOLT-ERROR: Offset overflow for dynamic relocation\n";
5056 exit(1);
5059 writeRela(&NewRelA, Offset);
5064 // Place R_*_RELATIVE relocations in RELA section if RELR is not presented.
5065 // The dynamic linker expects all R_*_RELATIVE relocations in RELA
5066 // to be emitted first.
5067 if (!DynamicRelrAddress)
5068 writeRelocations(/* PatchRelative */ true);
5069 writeRelocations(/* PatchRelative */ false);
5071 auto fillNone = [&](uint64_t &Offset, uint64_t EndOffset) {
5072 if (!Offset)
5073 return;
5075 typename ELFObjectFile<ELFT>::Elf_Rela RelA;
5076 RelA.setSymbolAndType(0, Relocation::getNone(), EF.isMips64EL());
5077 RelA.r_offset = 0;
5078 RelA.r_addend = 0;
5079 while (Offset < EndOffset)
5080 writeRela(&RelA, Offset);
5082 assert(Offset == EndOffset && "Unexpected section overflow");
5085 // Fill the rest of the sections with R_*_NONE relocations
5086 fillNone(RelDynOffset, RelDynEndOffset);
5087 fillNone(RelPltOffset, RelPltEndOffset);
5090 template <typename ELFT>
5091 void RewriteInstance::patchELFGOT(ELFObjectFile<ELFT> *File) {
5092 raw_fd_ostream &OS = Out->os();
5094 SectionRef GOTSection;
5095 for (const SectionRef &Section : File->sections()) {
5096 StringRef SectionName = cantFail(Section.getName());
5097 if (SectionName == ".got") {
5098 GOTSection = Section;
5099 break;
5102 if (!GOTSection.getObject()) {
5103 if (!BC->IsStaticExecutable)
5104 errs() << "BOLT-INFO: no .got section found\n";
5105 return;
5108 StringRef GOTContents = cantFail(GOTSection.getContents());
5109 for (const uint64_t *GOTEntry =
5110 reinterpret_cast<const uint64_t *>(GOTContents.data());
5111 GOTEntry < reinterpret_cast<const uint64_t *>(GOTContents.data() +
5112 GOTContents.size());
5113 ++GOTEntry) {
5114 if (uint64_t NewAddress = getNewFunctionAddress(*GOTEntry)) {
5115 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: patching GOT entry 0x"
5116 << Twine::utohexstr(*GOTEntry) << " with 0x"
5117 << Twine::utohexstr(NewAddress) << '\n');
5118 OS.pwrite(reinterpret_cast<const char *>(&NewAddress), sizeof(NewAddress),
5119 reinterpret_cast<const char *>(GOTEntry) -
5120 File->getData().data());
5125 template <typename ELFT>
5126 void RewriteInstance::patchELFDynamic(ELFObjectFile<ELFT> *File) {
5127 if (BC->IsStaticExecutable)
5128 return;
5130 const ELFFile<ELFT> &Obj = File->getELFFile();
5131 raw_fd_ostream &OS = Out->os();
5133 using Elf_Phdr = typename ELFFile<ELFT>::Elf_Phdr;
5134 using Elf_Dyn = typename ELFFile<ELFT>::Elf_Dyn;
5136 // Locate DYNAMIC by looking through program headers.
5137 uint64_t DynamicOffset = 0;
5138 const Elf_Phdr *DynamicPhdr = nullptr;
5139 for (const Elf_Phdr &Phdr : cantFail(Obj.program_headers())) {
5140 if (Phdr.p_type == ELF::PT_DYNAMIC) {
5141 DynamicOffset = Phdr.p_offset;
5142 DynamicPhdr = &Phdr;
5143 assert(Phdr.p_memsz == Phdr.p_filesz && "dynamic sizes should match");
5144 break;
5147 assert(DynamicPhdr && "missing dynamic in ELF binary");
5149 bool ZNowSet = false;
5151 // Go through all dynamic entries and patch functions addresses with
5152 // new ones.
5153 typename ELFT::DynRange DynamicEntries =
5154 cantFail(Obj.dynamicEntries(), "error accessing dynamic table");
5155 auto DTB = DynamicEntries.begin();
5156 for (const Elf_Dyn &Dyn : DynamicEntries) {
5157 Elf_Dyn NewDE = Dyn;
5158 bool ShouldPatch = true;
5159 switch (Dyn.d_tag) {
5160 default:
5161 ShouldPatch = false;
5162 break;
5163 case ELF::DT_RELACOUNT:
5164 NewDE.d_un.d_val = DynamicRelativeRelocationsCount;
5165 break;
5166 case ELF::DT_INIT:
5167 case ELF::DT_FINI: {
5168 if (BC->HasRelocations) {
5169 if (uint64_t NewAddress = getNewFunctionAddress(Dyn.getPtr())) {
5170 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: patching dynamic entry of type "
5171 << Dyn.getTag() << '\n');
5172 NewDE.d_un.d_ptr = NewAddress;
5175 RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary();
5176 if (RtLibrary && Dyn.getTag() == ELF::DT_FINI) {
5177 if (uint64_t Addr = RtLibrary->getRuntimeFiniAddress())
5178 NewDE.d_un.d_ptr = Addr;
5180 if (RtLibrary && Dyn.getTag() == ELF::DT_INIT && !BC->HasInterpHeader) {
5181 if (auto Addr = RtLibrary->getRuntimeStartAddress()) {
5182 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set DT_INIT to 0x"
5183 << Twine::utohexstr(Addr) << '\n');
5184 NewDE.d_un.d_ptr = Addr;
5187 break;
5189 case ELF::DT_FLAGS:
5190 if (BC->RequiresZNow) {
5191 NewDE.d_un.d_val |= ELF::DF_BIND_NOW;
5192 ZNowSet = true;
5194 break;
5195 case ELF::DT_FLAGS_1:
5196 if (BC->RequiresZNow) {
5197 NewDE.d_un.d_val |= ELF::DF_1_NOW;
5198 ZNowSet = true;
5200 break;
5202 if (ShouldPatch)
5203 OS.pwrite(reinterpret_cast<const char *>(&NewDE), sizeof(NewDE),
5204 DynamicOffset + (&Dyn - DTB) * sizeof(Dyn));
5207 if (BC->RequiresZNow && !ZNowSet) {
5208 errs() << "BOLT-ERROR: output binary requires immediate relocation "
5209 "processing which depends on DT_FLAGS or DT_FLAGS_1 presence in "
5210 ".dynamic. Please re-link the binary with -znow.\n";
5211 exit(1);
5215 template <typename ELFT>
5216 Error RewriteInstance::readELFDynamic(ELFObjectFile<ELFT> *File) {
5217 const ELFFile<ELFT> &Obj = File->getELFFile();
5219 using Elf_Phdr = typename ELFFile<ELFT>::Elf_Phdr;
5220 using Elf_Dyn = typename ELFFile<ELFT>::Elf_Dyn;
5222 // Locate DYNAMIC by looking through program headers.
5223 const Elf_Phdr *DynamicPhdr = nullptr;
5224 for (const Elf_Phdr &Phdr : cantFail(Obj.program_headers())) {
5225 if (Phdr.p_type == ELF::PT_DYNAMIC) {
5226 DynamicPhdr = &Phdr;
5227 break;
5231 if (!DynamicPhdr) {
5232 outs() << "BOLT-INFO: static input executable detected\n";
5233 // TODO: static PIE executable might have dynamic header
5234 BC->IsStaticExecutable = true;
5235 return Error::success();
5238 if (DynamicPhdr->p_memsz != DynamicPhdr->p_filesz)
5239 return createStringError(errc::executable_format_error,
5240 "dynamic section sizes should match");
5242 // Go through all dynamic entries to locate entries of interest.
5243 auto DynamicEntriesOrErr = Obj.dynamicEntries();
5244 if (!DynamicEntriesOrErr)
5245 return DynamicEntriesOrErr.takeError();
5246 typename ELFT::DynRange DynamicEntries = DynamicEntriesOrErr.get();
5248 for (const Elf_Dyn &Dyn : DynamicEntries) {
5249 switch (Dyn.d_tag) {
5250 case ELF::DT_INIT:
5251 if (!BC->HasInterpHeader) {
5252 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set start function address\n");
5253 BC->StartFunctionAddress = Dyn.getPtr();
5255 break;
5256 case ELF::DT_FINI:
5257 BC->FiniAddress = Dyn.getPtr();
5258 break;
5259 case ELF::DT_FINI_ARRAY:
5260 BC->FiniArrayAddress = Dyn.getPtr();
5261 break;
5262 case ELF::DT_FINI_ARRAYSZ:
5263 BC->FiniArraySize = Dyn.getPtr();
5264 break;
5265 case ELF::DT_RELA:
5266 DynamicRelocationsAddress = Dyn.getPtr();
5267 break;
5268 case ELF::DT_RELASZ:
5269 DynamicRelocationsSize = Dyn.getVal();
5270 break;
5271 case ELF::DT_JMPREL:
5272 PLTRelocationsAddress = Dyn.getPtr();
5273 break;
5274 case ELF::DT_PLTRELSZ:
5275 PLTRelocationsSize = Dyn.getVal();
5276 break;
5277 case ELF::DT_RELACOUNT:
5278 DynamicRelativeRelocationsCount = Dyn.getVal();
5279 break;
5280 case ELF::DT_RELR:
5281 DynamicRelrAddress = Dyn.getPtr();
5282 break;
5283 case ELF::DT_RELRSZ:
5284 DynamicRelrSize = Dyn.getVal();
5285 break;
5286 case ELF::DT_RELRENT:
5287 DynamicRelrEntrySize = Dyn.getVal();
5288 break;
5292 if (!DynamicRelocationsAddress || !DynamicRelocationsSize) {
5293 DynamicRelocationsAddress.reset();
5294 DynamicRelocationsSize = 0;
5297 if (!PLTRelocationsAddress || !PLTRelocationsSize) {
5298 PLTRelocationsAddress.reset();
5299 PLTRelocationsSize = 0;
5302 if (!DynamicRelrAddress || !DynamicRelrSize) {
5303 DynamicRelrAddress.reset();
5304 DynamicRelrSize = 0;
5305 } else if (!DynamicRelrEntrySize) {
5306 errs() << "BOLT-ERROR: expected DT_RELRENT to be presented "
5307 << "in DYNAMIC section\n";
5308 exit(1);
5309 } else if (DynamicRelrSize % DynamicRelrEntrySize) {
5310 errs() << "BOLT-ERROR: expected RELR table size to be divisible "
5311 << "by RELR entry size\n";
5312 exit(1);
5315 return Error::success();
5318 uint64_t RewriteInstance::getNewFunctionAddress(uint64_t OldAddress) {
5319 const BinaryFunction *Function = BC->getBinaryFunctionAtAddress(OldAddress);
5320 if (!Function)
5321 return 0;
5323 return Function->getOutputAddress();
5326 uint64_t RewriteInstance::getNewFunctionOrDataAddress(uint64_t OldAddress) {
5327 if (uint64_t Function = getNewFunctionAddress(OldAddress))
5328 return Function;
5330 const BinaryData *BD = BC->getBinaryDataAtAddress(OldAddress);
5331 if (BD && BD->isMoved())
5332 return BD->getOutputAddress();
5334 return 0;
5337 void RewriteInstance::rewriteFile() {
5338 std::error_code EC;
5339 Out = std::make_unique<ToolOutputFile>(opts::OutputFilename, EC,
5340 sys::fs::OF_None);
5341 check_error(EC, "cannot create output executable file");
5343 raw_fd_ostream &OS = Out->os();
5345 // Copy allocatable part of the input.
5346 OS << InputFile->getData().substr(0, FirstNonAllocatableOffset);
5348 auto Streamer = BC->createStreamer(OS);
5349 // Make sure output stream has enough reserved space, otherwise
5350 // pwrite() will fail.
5351 uint64_t Offset = OS.seek(getFileOffsetForAddress(NextAvailableAddress));
5352 (void)Offset;
5353 assert(Offset == getFileOffsetForAddress(NextAvailableAddress) &&
5354 "error resizing output file");
5356 // Overwrite functions with fixed output address. This is mostly used by
5357 // non-relocation mode, with one exception: injected functions are covered
5358 // here in both modes.
5359 uint64_t CountOverwrittenFunctions = 0;
5360 uint64_t OverwrittenScore = 0;
5361 for (BinaryFunction *Function : BC->getAllBinaryFunctions()) {
5362 if (Function->getImageAddress() == 0 || Function->getImageSize() == 0)
5363 continue;
5365 if (Function->getImageSize() > Function->getMaxSize()) {
5366 if (opts::Verbosity >= 1)
5367 errs() << "BOLT-WARNING: new function size (0x"
5368 << Twine::utohexstr(Function->getImageSize())
5369 << ") is larger than maximum allowed size (0x"
5370 << Twine::utohexstr(Function->getMaxSize()) << ") for function "
5371 << *Function << '\n';
5373 // Remove jump table sections that this function owns in non-reloc mode
5374 // because we don't want to write them anymore.
5375 if (!BC->HasRelocations && opts::JumpTables == JTS_BASIC) {
5376 for (auto &JTI : Function->JumpTables) {
5377 JumpTable *JT = JTI.second;
5378 BinarySection &Section = JT->getOutputSection();
5379 BC->deregisterSection(Section);
5382 continue;
5385 const auto HasAddress = [](const FunctionFragment &FF) {
5386 return FF.empty() ||
5387 (FF.getImageAddress() != 0 && FF.getImageSize() != 0);
5389 const bool SplitFragmentsHaveAddress =
5390 llvm::all_of(Function->getLayout().getSplitFragments(), HasAddress);
5391 if (Function->isSplit() && !SplitFragmentsHaveAddress) {
5392 const auto HasNoAddress = [](const FunctionFragment &FF) {
5393 return FF.getImageAddress() == 0 && FF.getImageSize() == 0;
5395 assert(llvm::all_of(Function->getLayout().getSplitFragments(),
5396 HasNoAddress) &&
5397 "Some split fragments have an address while others do not");
5398 (void)HasNoAddress;
5399 continue;
5402 OverwrittenScore += Function->getFunctionScore();
5403 ++CountOverwrittenFunctions;
5405 // Overwrite function in the output file.
5406 if (opts::Verbosity >= 2)
5407 outs() << "BOLT: rewriting function \"" << *Function << "\"\n";
5409 OS.pwrite(reinterpret_cast<char *>(Function->getImageAddress()),
5410 Function->getImageSize(), Function->getFileOffset());
5412 // Write nops at the end of the function.
5413 if (Function->getMaxSize() != std::numeric_limits<uint64_t>::max()) {
5414 uint64_t Pos = OS.tell();
5415 OS.seek(Function->getFileOffset() + Function->getImageSize());
5416 BC->MAB->writeNopData(
5417 OS, Function->getMaxSize() - Function->getImageSize(), &*BC->STI);
5419 OS.seek(Pos);
5422 if (!Function->isSplit())
5423 continue;
5425 // Write cold part
5426 if (opts::Verbosity >= 2) {
5427 outs() << formatv("BOLT: rewriting function \"{0}\" (split parts)\n",
5428 *Function);
5431 for (const FunctionFragment &FF :
5432 Function->getLayout().getSplitFragments()) {
5433 OS.pwrite(reinterpret_cast<char *>(FF.getImageAddress()),
5434 FF.getImageSize(), FF.getFileOffset());
5438 // Print function statistics for non-relocation mode.
5439 if (!BC->HasRelocations) {
5440 outs() << "BOLT: " << CountOverwrittenFunctions << " out of "
5441 << BC->getBinaryFunctions().size()
5442 << " functions were overwritten.\n";
5443 if (BC->TotalScore != 0) {
5444 double Coverage = OverwrittenScore / (double)BC->TotalScore * 100.0;
5445 outs() << format("BOLT-INFO: rewritten functions cover %.2lf", Coverage)
5446 << "% of the execution count of simple functions of "
5447 "this binary\n";
5451 if (BC->HasRelocations && opts::TrapOldCode) {
5452 uint64_t SavedPos = OS.tell();
5453 // Overwrite function body to make sure we never execute these instructions.
5454 for (auto &BFI : BC->getBinaryFunctions()) {
5455 BinaryFunction &BF = BFI.second;
5456 if (!BF.getFileOffset() || !BF.isEmitted())
5457 continue;
5458 OS.seek(BF.getFileOffset());
5459 StringRef TrapInstr = BC->MIB->getTrapFillValue();
5460 unsigned NInstr = BF.getMaxSize() / TrapInstr.size();
5461 for (unsigned I = 0; I < NInstr; ++I)
5462 OS.write(TrapInstr.data(), TrapInstr.size());
5464 OS.seek(SavedPos);
5467 // Write all allocatable sections - reloc-mode text is written here as well
5468 for (BinarySection &Section : BC->allocatableSections()) {
5469 if (!Section.isFinalized() || !Section.getOutputData())
5470 continue;
5471 if (Section.isLinkOnly())
5472 continue;
5474 if (opts::Verbosity >= 1)
5475 outs() << "BOLT: writing new section " << Section.getName()
5476 << "\n data at 0x" << Twine::utohexstr(Section.getAllocAddress())
5477 << "\n of size " << Section.getOutputSize() << "\n at offset "
5478 << Section.getOutputFileOffset() << '\n';
5479 OS.pwrite(reinterpret_cast<const char *>(Section.getOutputData()),
5480 Section.getOutputSize(), Section.getOutputFileOffset());
5483 for (BinarySection &Section : BC->allocatableSections())
5484 Section.flushPendingRelocations(OS, [this](const MCSymbol *S) {
5485 return getNewValueForSymbol(S->getName());
5488 // If .eh_frame is present create .eh_frame_hdr.
5489 if (EHFrameSection)
5490 writeEHFrameHeader();
5492 // Add BOLT Addresses Translation maps to allow profile collection to
5493 // happen in the output binary
5494 if (opts::EnableBAT)
5495 addBATSection();
5497 // Patch program header table.
5498 patchELFPHDRTable();
5500 // Finalize memory image of section string table.
5501 finalizeSectionStringTable();
5503 // Update symbol tables.
5504 patchELFSymTabs();
5506 patchBuildID();
5508 if (opts::EnableBAT)
5509 encodeBATSection();
5511 // Copy non-allocatable sections once allocatable part is finished.
5512 rewriteNoteSections();
5514 if (BC->HasRelocations) {
5515 patchELFAllocatableRelaSections();
5516 patchELFAllocatableRelrSection();
5517 patchELFGOT();
5520 // Patch dynamic section/segment.
5521 patchELFDynamic();
5523 // Update ELF book-keeping info.
5524 patchELFSectionHeaderTable();
5526 if (opts::PrintSections) {
5527 outs() << "BOLT-INFO: Sections after processing:\n";
5528 BC->printSections(outs());
5531 Out->keep();
5532 EC = sys::fs::setPermissions(
5533 opts::OutputFilename,
5534 static_cast<sys::fs::perms>(sys::fs::perms::all_all &
5535 ~sys::fs::getUmask()));
5536 check_error(EC, "cannot set permissions of output file");
5539 void RewriteInstance::writeEHFrameHeader() {
5540 BinarySection *NewEHFrameSection =
5541 getSection(getNewSecPrefix() + getEHFrameSectionName());
5543 // No need to update the header if no new .eh_frame was created.
5544 if (!NewEHFrameSection)
5545 return;
5547 DWARFDebugFrame NewEHFrame(BC->TheTriple->getArch(), true,
5548 NewEHFrameSection->getOutputAddress());
5549 Error E = NewEHFrame.parse(DWARFDataExtractor(
5550 NewEHFrameSection->getOutputContents(), BC->AsmInfo->isLittleEndian(),
5551 BC->AsmInfo->getCodePointerSize()));
5552 check_error(std::move(E), "failed to parse EH frame");
5554 uint64_t RelocatedEHFrameAddress = 0;
5555 StringRef RelocatedEHFrameContents;
5556 BinarySection *RelocatedEHFrameSection =
5557 getSection(".relocated" + getEHFrameSectionName());
5558 if (RelocatedEHFrameSection) {
5559 RelocatedEHFrameAddress = RelocatedEHFrameSection->getOutputAddress();
5560 RelocatedEHFrameContents = RelocatedEHFrameSection->getOutputContents();
5562 DWARFDebugFrame RelocatedEHFrame(BC->TheTriple->getArch(), true,
5563 RelocatedEHFrameAddress);
5564 Error Er = RelocatedEHFrame.parse(DWARFDataExtractor(
5565 RelocatedEHFrameContents, BC->AsmInfo->isLittleEndian(),
5566 BC->AsmInfo->getCodePointerSize()));
5567 check_error(std::move(Er), "failed to parse EH frame");
5569 LLVM_DEBUG(dbgs() << "BOLT: writing a new .eh_frame_hdr\n");
5571 NextAvailableAddress =
5572 appendPadding(Out->os(), NextAvailableAddress, EHFrameHdrAlign);
5574 const uint64_t EHFrameHdrOutputAddress = NextAvailableAddress;
5575 const uint64_t EHFrameHdrFileOffset =
5576 getFileOffsetForAddress(NextAvailableAddress);
5578 std::vector<char> NewEHFrameHdr = CFIRdWrt->generateEHFrameHeader(
5579 RelocatedEHFrame, NewEHFrame, EHFrameHdrOutputAddress, FailedAddresses);
5581 assert(Out->os().tell() == EHFrameHdrFileOffset && "offset mismatch");
5582 Out->os().write(NewEHFrameHdr.data(), NewEHFrameHdr.size());
5584 const unsigned Flags = BinarySection::getFlags(/*IsReadOnly=*/true,
5585 /*IsText=*/false,
5586 /*IsAllocatable=*/true);
5587 BinarySection *OldEHFrameHdrSection = getSection(".eh_frame_hdr");
5588 if (OldEHFrameHdrSection)
5589 OldEHFrameHdrSection->setOutputName(getOrgSecPrefix() + ".eh_frame_hdr");
5591 BinarySection &EHFrameHdrSec = BC->registerOrUpdateSection(
5592 getNewSecPrefix() + ".eh_frame_hdr", ELF::SHT_PROGBITS, Flags, nullptr,
5593 NewEHFrameHdr.size(), /*Alignment=*/1);
5594 EHFrameHdrSec.setOutputFileOffset(EHFrameHdrFileOffset);
5595 EHFrameHdrSec.setOutputAddress(EHFrameHdrOutputAddress);
5596 EHFrameHdrSec.setOutputName(".eh_frame_hdr");
5598 NextAvailableAddress += EHFrameHdrSec.getOutputSize();
5600 // Merge new .eh_frame with the relocated original so that gdb can locate all
5601 // FDEs.
5602 if (RelocatedEHFrameSection) {
5603 const uint64_t NewEHFrameSectionSize =
5604 RelocatedEHFrameSection->getOutputAddress() +
5605 RelocatedEHFrameSection->getOutputSize() -
5606 NewEHFrameSection->getOutputAddress();
5607 NewEHFrameSection->updateContents(NewEHFrameSection->getOutputData(),
5608 NewEHFrameSectionSize);
5609 BC->deregisterSection(*RelocatedEHFrameSection);
5612 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: size of .eh_frame after merge is "
5613 << NewEHFrameSection->getOutputSize() << '\n');
5616 uint64_t RewriteInstance::getNewValueForSymbol(const StringRef Name) {
5617 auto Value = Linker->lookupSymbol(Name);
5618 if (Value)
5619 return *Value;
5621 // Return the original value if we haven't emitted the symbol.
5622 BinaryData *BD = BC->getBinaryDataByName(Name);
5623 if (!BD)
5624 return 0;
5626 return BD->getAddress();
5629 uint64_t RewriteInstance::getFileOffsetForAddress(uint64_t Address) const {
5630 // Check if it's possibly part of the new segment.
5631 if (Address >= NewTextSegmentAddress)
5632 return Address - NewTextSegmentAddress + NewTextSegmentOffset;
5634 // Find an existing segment that matches the address.
5635 const auto SegmentInfoI = BC->SegmentMapInfo.upper_bound(Address);
5636 if (SegmentInfoI == BC->SegmentMapInfo.begin())
5637 return 0;
5639 const SegmentInfo &SegmentInfo = std::prev(SegmentInfoI)->second;
5640 if (Address < SegmentInfo.Address ||
5641 Address >= SegmentInfo.Address + SegmentInfo.FileSize)
5642 return 0;
5644 return SegmentInfo.FileOffset + Address - SegmentInfo.Address;
5647 bool RewriteInstance::willOverwriteSection(StringRef SectionName) {
5648 if (llvm::is_contained(SectionsToOverwrite, SectionName))
5649 return true;
5650 if (llvm::is_contained(DebugSectionsToOverwrite, SectionName))
5651 return true;
5653 ErrorOr<BinarySection &> Section = BC->getUniqueSectionByName(SectionName);
5654 return Section && Section->isAllocatable() && Section->isFinalized();
5657 bool RewriteInstance::isDebugSection(StringRef SectionName) {
5658 if (SectionName.starts_with(".debug_") ||
5659 SectionName.starts_with(".zdebug_") || SectionName == ".gdb_index" ||
5660 SectionName == ".stab" || SectionName == ".stabstr")
5661 return true;
5663 return false;
5666 bool RewriteInstance::isKSymtabSection(StringRef SectionName) {
5667 if (SectionName.starts_with("__ksymtab"))
5668 return true;
5670 return false;