1 //===- bolt/Rewrite/MachORewriteInstance.cpp - MachO rewriter -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "bolt/Rewrite/MachORewriteInstance.h"
10 #include "bolt/Core/BinaryContext.h"
11 #include "bolt/Core/BinaryEmitter.h"
12 #include "bolt/Core/BinaryFunction.h"
13 #include "bolt/Core/JumpTable.h"
14 #include "bolt/Core/MCPlusBuilder.h"
15 #include "bolt/Passes/Instrumentation.h"
16 #include "bolt/Passes/PatchEntries.h"
17 #include "bolt/Profile/DataReader.h"
18 #include "bolt/Rewrite/BinaryPassManager.h"
19 #include "bolt/Rewrite/ExecutableFileMemoryManager.h"
20 #include "bolt/Rewrite/JITLinkLinker.h"
21 #include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h"
22 #include "bolt/Utils/Utils.h"
23 #include "llvm/MC/MCObjectStreamer.h"
24 #include "llvm/Support/Errc.h"
25 #include "llvm/Support/FileSystem.h"
26 #include "llvm/Support/ToolOutputFile.h"
33 extern cl::opt
<unsigned> AlignText
;
34 //FIXME! Upstream change
35 //extern cl::opt<bool> CheckOverlappingElements;
36 extern cl::opt
<bool> ForcePatch
;
37 extern cl::opt
<bool> Instrument
;
38 extern cl::opt
<bool> InstrumentCalls
;
39 extern cl::opt
<bolt::JumpTableSupportLevel
> JumpTables
;
40 extern cl::opt
<bool> KeepTmp
;
41 extern cl::opt
<bool> NeverPrint
;
42 extern cl::opt
<std::string
> OutputFilename
;
43 extern cl::opt
<bool> PrintAfterBranchFixup
;
44 extern cl::opt
<bool> PrintFinalized
;
45 extern cl::opt
<bool> PrintNormalized
;
46 extern cl::opt
<bool> PrintReordered
;
47 extern cl::opt
<bool> PrintSections
;
48 extern cl::opt
<bool> PrintDisasm
;
49 extern cl::opt
<bool> PrintCFG
;
50 extern cl::opt
<std::string
> RuntimeInstrumentationLib
;
51 extern cl::opt
<unsigned> Verbosity
;
57 extern MCPlusBuilder
*createX86MCPlusBuilder(const MCInstrAnalysis
*,
59 const MCRegisterInfo
*,
60 const MCSubtargetInfo
*);
61 extern MCPlusBuilder
*createAArch64MCPlusBuilder(const MCInstrAnalysis
*,
63 const MCRegisterInfo
*,
64 const MCSubtargetInfo
*);
68 MCPlusBuilder
*createMCPlusBuilder(const Triple::ArchType Arch
,
69 const MCInstrAnalysis
*Analysis
,
70 const MCInstrInfo
*Info
,
71 const MCRegisterInfo
*RegInfo
,
72 const MCSubtargetInfo
*STI
) {
74 if (Arch
== Triple::x86_64
)
75 return createX86MCPlusBuilder(Analysis
, Info
, RegInfo
, STI
);
78 #ifdef AARCH64_AVAILABLE
79 if (Arch
== Triple::aarch64
)
80 return createAArch64MCPlusBuilder(Analysis
, Info
, RegInfo
, STI
);
83 llvm_unreachable("architecture unsupported by MCPlusBuilder");
86 } // anonymous namespace
88 #define DEBUG_TYPE "bolt"
90 Expected
<std::unique_ptr
<MachORewriteInstance
>>
91 MachORewriteInstance::create(object::MachOObjectFile
*InputFile
,
93 Error Err
= Error::success();
95 std::make_unique
<MachORewriteInstance
>(InputFile
, ToolPath
, Err
);
97 return std::move(Err
);
98 return std::move(MachORI
);
101 MachORewriteInstance::MachORewriteInstance(object::MachOObjectFile
*InputFile
,
102 StringRef ToolPath
, Error
&Err
)
103 : InputFile(InputFile
), ToolPath(ToolPath
) {
104 ErrorAsOutParameter
EAO(&Err
);
105 auto BCOrErr
= BinaryContext::createBinaryContext(
106 InputFile
, /* IsPIC */ true, DWARFContext::create(*InputFile
));
107 if (Error E
= BCOrErr
.takeError()) {
111 BC
= std::move(BCOrErr
.get());
112 BC
->initializeTarget(std::unique_ptr
<MCPlusBuilder
>(
113 createMCPlusBuilder(BC
->TheTriple
->getArch(), BC
->MIA
.get(),
114 BC
->MII
.get(), BC
->MRI
.get(), BC
->STI
.get())));
115 if (opts::Instrument
)
116 BC
->setRuntimeLibrary(std::make_unique
<InstrumentationRuntimeLibrary
>());
119 Error
MachORewriteInstance::setProfile(StringRef Filename
) {
120 if (!sys::fs::exists(Filename
))
121 return errorCodeToError(make_error_code(errc::no_such_file_or_directory
));
125 return make_error
<StringError
>(
126 Twine("multiple profiles specified: ") + ProfileReader
->getFilename() +
127 " and " + Filename
, inconvertibleErrorCode());
130 ProfileReader
= std::make_unique
<DataReader
>(Filename
);
131 return Error::success();
134 void MachORewriteInstance::preprocessProfileData() {
137 if (Error E
= ProfileReader
->preprocessProfile(*BC
.get()))
138 report_error("cannot pre-process profile", std::move(E
));
141 void MachORewriteInstance::processProfileDataPreCFG() {
144 if (Error E
= ProfileReader
->readProfilePreCFG(*BC
.get()))
145 report_error("cannot read profile pre-CFG", std::move(E
));
148 void MachORewriteInstance::processProfileData() {
151 if (Error E
= ProfileReader
->readProfile(*BC
.get()))
152 report_error("cannot read profile", std::move(E
));
155 void MachORewriteInstance::readSpecialSections() {
156 for (const object::SectionRef
&Section
: InputFile
->sections()) {
157 Expected
<StringRef
> SectionName
= Section
.getName();;
158 check_error(SectionName
.takeError(), "cannot get section name");
159 // Only register sections with names.
160 if (!SectionName
->empty()) {
161 BC
->registerSection(Section
);
163 dbgs() << "BOLT-DEBUG: registering section " << *SectionName
164 << " @ 0x" << Twine::utohexstr(Section
.getAddress()) << ":0x"
165 << Twine::utohexstr(Section
.getAddress() + Section
.getSize())
170 if (opts::PrintSections
) {
171 outs() << "BOLT-INFO: Sections from original binary:\n";
172 BC
->printSections(outs());
178 struct DataInCodeRegion
{
179 explicit DataInCodeRegion(DiceRef D
) {
190 std::vector
<DataInCodeRegion
> readDataInCode(const MachOObjectFile
&O
) {
191 const MachO::linkedit_data_command DataInCodeLC
=
192 O
.getDataInCodeLoadCommand();
193 const uint32_t NumberOfEntries
=
194 DataInCodeLC
.datasize
/ sizeof(MachO::data_in_code_entry
);
195 std::vector
<DataInCodeRegion
> DataInCode
;
196 DataInCode
.reserve(NumberOfEntries
);
197 for (auto I
= O
.begin_dices(), E
= O
.end_dices(); I
!= E
; ++I
)
198 DataInCode
.emplace_back(*I
);
199 llvm::stable_sort(DataInCode
, [](DataInCodeRegion LHS
, DataInCodeRegion RHS
) {
200 return LHS
.Offset
< RHS
.Offset
;
205 std::optional
<uint64_t> readStartAddress(const MachOObjectFile
&O
) {
206 std::optional
<uint64_t> StartOffset
;
207 std::optional
<uint64_t> TextVMAddr
;
208 for (const object::MachOObjectFile::LoadCommandInfo
&LC
: O
.load_commands()) {
210 case MachO::LC_MAIN
: {
211 MachO::entry_point_command LCMain
= O
.getEntryPointCommand(LC
);
212 StartOffset
= LCMain
.entryoff
;
215 case MachO::LC_SEGMENT
: {
216 MachO::segment_command LCSeg
= O
.getSegmentLoadCommand(LC
);
217 StringRef
SegmentName(LCSeg
.segname
,
218 strnlen(LCSeg
.segname
, sizeof(LCSeg
.segname
)));
219 if (SegmentName
== "__TEXT")
220 TextVMAddr
= LCSeg
.vmaddr
;
223 case MachO::LC_SEGMENT_64
: {
224 MachO::segment_command_64 LCSeg
= O
.getSegment64LoadCommand(LC
);
225 StringRef
SegmentName(LCSeg
.segname
,
226 strnlen(LCSeg
.segname
, sizeof(LCSeg
.segname
)));
227 if (SegmentName
== "__TEXT")
228 TextVMAddr
= LCSeg
.vmaddr
;
235 return (TextVMAddr
&& StartOffset
)
236 ? std::optional
<uint64_t>(*TextVMAddr
+ *StartOffset
)
240 } // anonymous namespace
242 void MachORewriteInstance::discoverFileObjects() {
243 std::vector
<SymbolRef
> FunctionSymbols
;
244 for (const SymbolRef
&S
: InputFile
->symbols()) {
245 SymbolRef::Type Type
= cantFail(S
.getType(), "cannot get symbol type");
246 if (Type
== SymbolRef::ST_Function
)
247 FunctionSymbols
.push_back(S
);
249 if (FunctionSymbols
.empty())
252 FunctionSymbols
, [](const SymbolRef
&LHS
, const SymbolRef
&RHS
) {
253 return cantFail(LHS
.getValue()) < cantFail(RHS
.getValue());
255 for (size_t Index
= 0; Index
< FunctionSymbols
.size(); ++Index
) {
256 const uint64_t Address
= cantFail(FunctionSymbols
[Index
].getValue());
257 ErrorOr
<BinarySection
&> Section
= BC
->getSectionForAddress(Address
);
258 // TODO: It happens for some symbols (e.g. __mh_execute_header).
259 // Add proper logic to handle them correctly.
261 errs() << "BOLT-WARNING: no section found for address " << Address
266 std::string SymbolName
=
267 cantFail(FunctionSymbols
[Index
].getName(), "cannot get symbol name")
269 // Uniquify names of local symbols.
270 if (!(cantFail(FunctionSymbols
[Index
].getFlags()) & SymbolRef::SF_Global
))
271 SymbolName
= NR
.uniquify(SymbolName
);
273 section_iterator S
= cantFail(FunctionSymbols
[Index
].getSection());
274 uint64_t EndAddress
= S
->getAddress() + S
->getSize();
276 size_t NFIndex
= Index
+ 1;
278 while (NFIndex
< FunctionSymbols
.size() &&
279 cantFail(FunctionSymbols
[NFIndex
].getValue()) == Address
)
281 if (NFIndex
< FunctionSymbols
.size() &&
282 S
== cantFail(FunctionSymbols
[NFIndex
].getSection()))
283 EndAddress
= cantFail(FunctionSymbols
[NFIndex
].getValue());
285 const uint64_t SymbolSize
= EndAddress
- Address
;
286 const auto It
= BC
->getBinaryFunctions().find(Address
);
287 if (It
== BC
->getBinaryFunctions().end()) {
288 BinaryFunction
*Function
= BC
->createBinaryFunction(
289 std::move(SymbolName
), *Section
, Address
, SymbolSize
);
290 if (!opts::Instrument
)
291 Function
->setOutputAddress(Function
->getAddress());
294 It
->second
.addAlternativeName(std::move(SymbolName
));
298 const std::vector
<DataInCodeRegion
> DataInCode
= readDataInCode(*InputFile
);
300 for (auto &BFI
: BC
->getBinaryFunctions()) {
301 BinaryFunction
&Function
= BFI
.second
;
302 Function
.setMaxSize(Function
.getSize());
304 ErrorOr
<ArrayRef
<uint8_t>> FunctionData
= Function
.getData();
306 errs() << "BOLT-ERROR: corresponding section is non-executable or "
307 << "empty for function " << Function
<< '\n';
311 // Treat zero-sized functions as non-simple ones.
312 if (Function
.getSize() == 0) {
313 Function
.setSimple(false);
317 // Offset of the function in the file.
318 const auto *FileBegin
=
319 reinterpret_cast<const uint8_t *>(InputFile
->getData().data());
320 Function
.setFileOffset(FunctionData
->begin() - FileBegin
);
322 // Treat functions which contain data in code as non-simple ones.
323 const auto It
= std::lower_bound(
324 DataInCode
.cbegin(), DataInCode
.cend(), Function
.getFileOffset(),
325 [](DataInCodeRegion D
, uint64_t Offset
) { return D
.Offset
< Offset
; });
326 if (It
!= DataInCode
.cend() &&
327 It
->Offset
+ It
->Length
<=
328 Function
.getFileOffset() + Function
.getMaxSize())
329 Function
.setSimple(false);
332 BC
->StartFunctionAddress
= readStartAddress(*InputFile
);
335 void MachORewriteInstance::disassembleFunctions() {
336 for (auto &BFI
: BC
->getBinaryFunctions()) {
337 BinaryFunction
&Function
= BFI
.second
;
338 if (!Function
.isSimple())
340 Function
.disassemble();
341 if (opts::PrintDisasm
)
342 Function
.print(outs(), "after disassembly");
346 void MachORewriteInstance::buildFunctionsCFG() {
347 for (auto &BFI
: BC
->getBinaryFunctions()) {
348 BinaryFunction
&Function
= BFI
.second
;
349 if (!Function
.isSimple())
351 if (!Function
.buildCFG(/*AllocId*/ 0)) {
352 errs() << "BOLT-WARNING: failed to build CFG for the function "
358 void MachORewriteInstance::postProcessFunctions() {
359 for (auto &BFI
: BC
->getBinaryFunctions()) {
360 BinaryFunction
&Function
= BFI
.second
;
361 if (Function
.empty())
363 Function
.postProcessCFG();
365 Function
.print(outs(), "after building cfg");
369 void MachORewriteInstance::runOptimizationPasses() {
370 BinaryFunctionPassManager
Manager(*BC
);
371 if (opts::Instrument
) {
372 Manager
.registerPass(std::make_unique
<PatchEntries
>());
373 Manager
.registerPass(std::make_unique
<Instrumentation
>(opts::NeverPrint
));
376 Manager
.registerPass(std::make_unique
<ShortenInstructions
>(opts::NeverPrint
));
378 Manager
.registerPass(std::make_unique
<RemoveNops
>(opts::NeverPrint
));
380 Manager
.registerPass(std::make_unique
<NormalizeCFG
>(opts::PrintNormalized
));
382 Manager
.registerPass(
383 std::make_unique
<ReorderBasicBlocks
>(opts::PrintReordered
));
384 Manager
.registerPass(
385 std::make_unique
<FixupBranches
>(opts::PrintAfterBranchFixup
));
386 // This pass should always run last.*
387 Manager
.registerPass(
388 std::make_unique
<FinalizeFunctions
>(opts::PrintFinalized
));
393 void MachORewriteInstance::mapInstrumentationSection(
394 StringRef SectionName
, BOLTLinker::SectionMapper MapSection
) {
395 if (!opts::Instrument
)
397 ErrorOr
<BinarySection
&> Section
= BC
->getUniqueSectionByName(SectionName
);
399 llvm::errs() << "Cannot find " + SectionName
+ " section\n";
402 if (!Section
->hasValidSectionID())
404 MapSection(*Section
, Section
->getAddress());
407 void MachORewriteInstance::mapCodeSections(
408 BOLTLinker::SectionMapper MapSection
) {
409 for (BinaryFunction
*Function
: BC
->getAllBinaryFunctions()) {
410 if (!Function
->isEmitted())
412 if (Function
->getOutputAddress() == 0)
414 ErrorOr
<BinarySection
&> FuncSection
= Function
->getCodeSection();
417 (Twine("Cannot find section for function ") + Function
->getOneName())
419 FuncSection
.getError());
421 FuncSection
->setOutputAddress(Function
->getOutputAddress());
422 LLVM_DEBUG(dbgs() << "BOLT: mapping 0x"
423 << Twine::utohexstr(FuncSection
->getAllocAddress()) << " to 0x"
424 << Twine::utohexstr(Function
->getOutputAddress()) << '\n');
425 MapSection(*FuncSection
, Function
->getOutputAddress());
426 Function
->setImageAddress(FuncSection
->getAllocAddress());
427 Function
->setImageSize(FuncSection
->getOutputSize());
430 if (opts::Instrument
) {
431 ErrorOr
<BinarySection
&> BOLT
= BC
->getUniqueSectionByName("__bolt");
433 llvm::errs() << "Cannot find __bolt section\n";
436 uint64_t Addr
= BOLT
->getAddress();
437 for (BinaryFunction
*Function
: BC
->getAllBinaryFunctions()) {
438 if (!Function
->isEmitted())
440 if (Function
->getOutputAddress() != 0)
442 ErrorOr
<BinarySection
&> FuncSection
= Function
->getCodeSection();
443 assert(FuncSection
&& "cannot find section for function");
444 Addr
= llvm::alignTo(Addr
, 4);
445 FuncSection
->setOutputAddress(Addr
);
446 MapSection(*FuncSection
, Addr
);
447 Function
->setFileOffset(Addr
- BOLT
->getAddress() +
448 BOLT
->getInputFileOffset());
449 Function
->setImageAddress(FuncSection
->getAllocAddress());
450 Function
->setImageSize(FuncSection
->getOutputSize());
451 BC
->registerNameAtAddress(Function
->getOneName(), Addr
, 0, 0);
452 Addr
+= FuncSection
->getOutputSize();
457 void MachORewriteInstance::emitAndLink() {
459 std::unique_ptr
<::llvm::ToolOutputFile
> TempOut
=
460 std::make_unique
<::llvm::ToolOutputFile
>(
461 opts::OutputFilename
+ ".bolt.o", EC
, sys::fs::OF_None
);
462 check_error(EC
, "cannot create output object file");
467 std::unique_ptr
<buffer_ostream
> BOS
=
468 std::make_unique
<buffer_ostream
>(TempOut
->os());
469 raw_pwrite_stream
*OS
= BOS
.get();
470 auto Streamer
= BC
->createStreamer(*OS
);
472 emitBinaryContext(*Streamer
, *BC
, getOrgSecPrefix());
475 std::unique_ptr
<MemoryBuffer
> ObjectMemBuffer
=
476 MemoryBuffer::getMemBuffer(BOS
->str(), "in-memory object file", false);
477 std::unique_ptr
<object::ObjectFile
> Obj
= cantFail(
478 object::ObjectFile::createObjectFile(ObjectMemBuffer
->getMemBufferRef()),
479 "error creating in-memory object");
480 assert(Obj
&& "createObjectFile cannot return nullptr");
482 auto EFMM
= std::make_unique
<ExecutableFileMemoryManager
>(*BC
);
483 EFMM
->setNewSecPrefix(getNewSecPrefix());
484 EFMM
->setOrgSecPrefix(getOrgSecPrefix());
486 Linker
= std::make_unique
<JITLinkLinker
>(*BC
, std::move(EFMM
));
487 Linker
->loadObject(ObjectMemBuffer
->getMemBufferRef(),
488 [this](auto MapSection
) {
489 // Assign addresses to all sections. If key corresponds
490 // to the object created by ourselves, call our regular
491 // mapping function. If we are loading additional objects
492 // as part of runtime libraries for instrumentation,
493 // treat them as extra sections.
494 mapCodeSections(MapSection
);
495 mapInstrumentationSection("__counters", MapSection
);
496 mapInstrumentationSection("__tables", MapSection
);
499 // TODO: Refactor addRuntimeLibSections to work properly on Mach-O
501 // if (auto *RtLibrary = BC->getRuntimeLibrary()) {
502 // RtLibrary->link(*BC, ToolPath, *Linker, [this](auto MapSection) {
503 // mapInstrumentationSection("I__setup", MapSection);
504 // mapInstrumentationSection("I__fini", MapSection);
505 // mapInstrumentationSection("I__data", MapSection);
506 // mapInstrumentationSection("I__text", MapSection);
507 // mapInstrumentationSection("I__cstring", MapSection);
508 // mapInstrumentationSection("I__literal16", MapSection);
513 void MachORewriteInstance::writeInstrumentationSection(StringRef SectionName
,
514 raw_pwrite_stream
&OS
) {
515 if (!opts::Instrument
)
517 ErrorOr
<BinarySection
&> Section
= BC
->getUniqueSectionByName(SectionName
);
519 llvm::errs() << "Cannot find " + SectionName
+ " section\n";
522 if (!Section
->hasValidSectionID())
524 assert(Section
->getInputFileOffset() &&
525 "Section input offset cannot be zero");
526 assert(Section
->getAllocAddress() && "Section alloc address cannot be zero");
527 assert(Section
->getOutputSize() && "Section output size cannot be zero");
528 OS
.pwrite(reinterpret_cast<char *>(Section
->getAllocAddress()),
529 Section
->getOutputSize(), Section
->getInputFileOffset());
532 void MachORewriteInstance::rewriteFile() {
534 Out
= std::make_unique
<ToolOutputFile
>(opts::OutputFilename
, EC
,
536 check_error(EC
, "cannot create output executable file");
537 raw_fd_ostream
&OS
= Out
->os();
538 OS
<< InputFile
->getData();
540 for (auto &BFI
: BC
->getBinaryFunctions()) {
541 BinaryFunction
&Function
= BFI
.second
;
542 if (!Function
.isSimple())
544 assert(Function
.isEmitted() && "Simple function has not been emitted");
545 if (!opts::Instrument
&& (Function
.getImageSize() > Function
.getMaxSize()))
547 if (opts::Verbosity
>= 2)
548 outs() << "BOLT: rewriting function \"" << Function
<< "\"\n";
549 OS
.pwrite(reinterpret_cast<char *>(Function
.getImageAddress()),
550 Function
.getImageSize(), Function
.getFileOffset());
553 for (const BinaryFunction
*Function
: BC
->getInjectedBinaryFunctions()) {
554 OS
.pwrite(reinterpret_cast<char *>(Function
->getImageAddress()),
555 Function
->getImageSize(), Function
->getFileOffset());
558 writeInstrumentationSection("__counters", OS
);
559 writeInstrumentationSection("__tables", OS
);
561 // TODO: Refactor addRuntimeLibSections to work properly on Mach-O and
563 writeInstrumentationSection("I__setup", OS
);
564 writeInstrumentationSection("I__fini", OS
);
565 writeInstrumentationSection("I__data", OS
);
566 writeInstrumentationSection("I__text", OS
);
567 writeInstrumentationSection("I__cstring", OS
);
568 writeInstrumentationSection("I__literal16", OS
);
571 EC
= sys::fs::setPermissions(
572 opts::OutputFilename
,
573 static_cast<sys::fs::perms
>(sys::fs::perms::all_all
&
574 ~sys::fs::getUmask()));
575 check_error(EC
, "cannot set permissions of output file");
578 void MachORewriteInstance::adjustCommandLineOptions() {
579 //FIXME! Upstream change
580 // opts::CheckOverlappingElements = false;
581 if (!opts::AlignText
.getNumOccurrences())
582 opts::AlignText
= BC
->PageAlign
;
583 if (opts::Instrument
.getNumOccurrences())
584 opts::ForcePatch
= true;
585 opts::JumpTables
= JTS_MOVE
;
586 opts::InstrumentCalls
= false;
587 opts::RuntimeInstrumentationLib
= "libbolt_rt_instr_osx.a";
590 void MachORewriteInstance::run() {
591 adjustCommandLineOptions();
593 readSpecialSections();
595 discoverFileObjects();
597 preprocessProfileData();
599 disassembleFunctions();
601 processProfileDataPreCFG();
605 processProfileData();
607 postProcessFunctions();
609 runOptimizationPasses();
616 MachORewriteInstance::~MachORewriteInstance() {}