1 //===- bolt/Rewrite/MachORewriteInstance.cpp - MachO rewriter -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "bolt/Rewrite/MachORewriteInstance.h"
10 #include "bolt/Core/BinaryContext.h"
11 #include "bolt/Core/BinaryEmitter.h"
12 #include "bolt/Core/BinaryFunction.h"
13 #include "bolt/Core/JumpTable.h"
14 #include "bolt/Core/MCPlusBuilder.h"
15 #include "bolt/Passes/Instrumentation.h"
16 #include "bolt/Passes/PatchEntries.h"
17 #include "bolt/Profile/DataReader.h"
18 #include "bolt/Rewrite/BinaryPassManager.h"
19 #include "bolt/Rewrite/ExecutableFileMemoryManager.h"
20 #include "bolt/Rewrite/JITLinkLinker.h"
21 #include "bolt/Rewrite/RewriteInstance.h"
22 #include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h"
23 #include "bolt/Utils/Utils.h"
24 #include "llvm/MC/MCObjectStreamer.h"
25 #include "llvm/Support/Errc.h"
26 #include "llvm/Support/FileSystem.h"
27 #include "llvm/Support/ToolOutputFile.h"
34 extern cl::opt
<unsigned> AlignText
;
35 //FIXME! Upstream change
36 //extern cl::opt<bool> CheckOverlappingElements;
37 extern cl::opt
<bool> ForcePatch
;
38 extern cl::opt
<bool> Instrument
;
39 extern cl::opt
<bool> InstrumentCalls
;
40 extern cl::opt
<bolt::JumpTableSupportLevel
> JumpTables
;
41 extern cl::opt
<bool> KeepTmp
;
42 extern cl::opt
<bool> NeverPrint
;
43 extern cl::opt
<std::string
> OutputFilename
;
44 extern cl::opt
<bool> PrintAfterBranchFixup
;
45 extern cl::opt
<bool> PrintFinalized
;
46 extern cl::opt
<bool> PrintNormalized
;
47 extern cl::opt
<bool> PrintReordered
;
48 extern cl::opt
<bool> PrintSections
;
49 extern cl::opt
<bool> PrintDisasm
;
50 extern cl::opt
<bool> PrintCFG
;
51 extern cl::opt
<std::string
> RuntimeInstrumentationLib
;
52 extern cl::opt
<unsigned> Verbosity
;
58 #define DEBUG_TYPE "bolt"
60 Expected
<std::unique_ptr
<MachORewriteInstance
>>
61 MachORewriteInstance::create(object::MachOObjectFile
*InputFile
,
63 Error Err
= Error::success();
65 std::make_unique
<MachORewriteInstance
>(InputFile
, ToolPath
, Err
);
67 return std::move(Err
);
68 return std::move(MachORI
);
71 MachORewriteInstance::MachORewriteInstance(object::MachOObjectFile
*InputFile
,
72 StringRef ToolPath
, Error
&Err
)
73 : InputFile(InputFile
), ToolPath(ToolPath
) {
74 ErrorAsOutParameter
EAO(&Err
);
75 Relocation::Arch
= InputFile
->makeTriple().getArch();
76 auto BCOrErr
= BinaryContext::createBinaryContext(
77 InputFile
->makeTriple(), std::make_shared
<orc::SymbolStringPool
>(),
78 InputFile
->getFileName(), nullptr,
79 /* IsPIC */ true, DWARFContext::create(*InputFile
),
80 {llvm::outs(), llvm::errs()});
81 if (Error E
= BCOrErr
.takeError()) {
85 BC
= std::move(BCOrErr
.get());
86 BC
->initializeTarget(std::unique_ptr
<MCPlusBuilder
>(
87 createMCPlusBuilder(BC
->TheTriple
->getArch(), BC
->MIA
.get(),
88 BC
->MII
.get(), BC
->MRI
.get(), BC
->STI
.get())));
90 BC
->setRuntimeLibrary(std::make_unique
<InstrumentationRuntimeLibrary
>());
93 Error
MachORewriteInstance::setProfile(StringRef Filename
) {
94 if (!sys::fs::exists(Filename
))
95 return errorCodeToError(make_error_code(errc::no_such_file_or_directory
));
99 return make_error
<StringError
>(
100 Twine("multiple profiles specified: ") + ProfileReader
->getFilename() +
101 " and " + Filename
, inconvertibleErrorCode());
104 ProfileReader
= std::make_unique
<DataReader
>(Filename
);
105 return Error::success();
108 void MachORewriteInstance::preprocessProfileData() {
111 if (Error E
= ProfileReader
->preprocessProfile(*BC
.get()))
112 report_error("cannot pre-process profile", std::move(E
));
115 void MachORewriteInstance::processProfileDataPreCFG() {
118 if (Error E
= ProfileReader
->readProfilePreCFG(*BC
.get()))
119 report_error("cannot read profile pre-CFG", std::move(E
));
122 void MachORewriteInstance::processProfileData() {
125 if (Error E
= ProfileReader
->readProfile(*BC
.get()))
126 report_error("cannot read profile", std::move(E
));
129 void MachORewriteInstance::readSpecialSections() {
130 for (const object::SectionRef
&Section
: InputFile
->sections()) {
131 Expected
<StringRef
> SectionName
= Section
.getName();;
132 check_error(SectionName
.takeError(), "cannot get section name");
133 // Only register sections with names.
134 if (!SectionName
->empty()) {
135 BC
->registerSection(Section
);
137 dbgs() << "BOLT-DEBUG: registering section " << *SectionName
138 << " @ 0x" << Twine::utohexstr(Section
.getAddress()) << ":0x"
139 << Twine::utohexstr(Section
.getAddress() + Section
.getSize())
144 if (opts::PrintSections
) {
145 outs() << "BOLT-INFO: Sections from original binary:\n";
146 BC
->printSections(outs());
152 struct DataInCodeRegion
{
153 explicit DataInCodeRegion(DiceRef D
) {
164 std::vector
<DataInCodeRegion
> readDataInCode(const MachOObjectFile
&O
) {
165 const MachO::linkedit_data_command DataInCodeLC
=
166 O
.getDataInCodeLoadCommand();
167 const uint32_t NumberOfEntries
=
168 DataInCodeLC
.datasize
/ sizeof(MachO::data_in_code_entry
);
169 std::vector
<DataInCodeRegion
> DataInCode
;
170 DataInCode
.reserve(NumberOfEntries
);
171 for (auto I
= O
.begin_dices(), E
= O
.end_dices(); I
!= E
; ++I
)
172 DataInCode
.emplace_back(*I
);
173 llvm::stable_sort(DataInCode
, [](DataInCodeRegion LHS
, DataInCodeRegion RHS
) {
174 return LHS
.Offset
< RHS
.Offset
;
179 std::optional
<uint64_t> readStartAddress(const MachOObjectFile
&O
) {
180 std::optional
<uint64_t> StartOffset
;
181 std::optional
<uint64_t> TextVMAddr
;
182 for (const object::MachOObjectFile::LoadCommandInfo
&LC
: O
.load_commands()) {
184 case MachO::LC_MAIN
: {
185 MachO::entry_point_command LCMain
= O
.getEntryPointCommand(LC
);
186 StartOffset
= LCMain
.entryoff
;
189 case MachO::LC_SEGMENT
: {
190 MachO::segment_command LCSeg
= O
.getSegmentLoadCommand(LC
);
191 StringRef
SegmentName(LCSeg
.segname
,
192 strnlen(LCSeg
.segname
, sizeof(LCSeg
.segname
)));
193 if (SegmentName
== "__TEXT")
194 TextVMAddr
= LCSeg
.vmaddr
;
197 case MachO::LC_SEGMENT_64
: {
198 MachO::segment_command_64 LCSeg
= O
.getSegment64LoadCommand(LC
);
199 StringRef
SegmentName(LCSeg
.segname
,
200 strnlen(LCSeg
.segname
, sizeof(LCSeg
.segname
)));
201 if (SegmentName
== "__TEXT")
202 TextVMAddr
= LCSeg
.vmaddr
;
209 return (TextVMAddr
&& StartOffset
)
210 ? std::optional
<uint64_t>(*TextVMAddr
+ *StartOffset
)
214 } // anonymous namespace
216 void MachORewriteInstance::discoverFileObjects() {
217 std::vector
<SymbolRef
> FunctionSymbols
;
218 for (const SymbolRef
&S
: InputFile
->symbols()) {
219 SymbolRef::Type Type
= cantFail(S
.getType(), "cannot get symbol type");
220 if (Type
== SymbolRef::ST_Function
)
221 FunctionSymbols
.push_back(S
);
223 if (FunctionSymbols
.empty())
226 FunctionSymbols
, [](const SymbolRef
&LHS
, const SymbolRef
&RHS
) {
227 return cantFail(LHS
.getValue()) < cantFail(RHS
.getValue());
229 for (size_t Index
= 0; Index
< FunctionSymbols
.size(); ++Index
) {
230 const uint64_t Address
= cantFail(FunctionSymbols
[Index
].getValue());
231 ErrorOr
<BinarySection
&> Section
= BC
->getSectionForAddress(Address
);
232 // TODO: It happens for some symbols (e.g. __mh_execute_header).
233 // Add proper logic to handle them correctly.
235 errs() << "BOLT-WARNING: no section found for address " << Address
240 std::string SymbolName
=
241 cantFail(FunctionSymbols
[Index
].getName(), "cannot get symbol name")
243 // Uniquify names of local symbols.
244 if (!(cantFail(FunctionSymbols
[Index
].getFlags()) & SymbolRef::SF_Global
))
245 SymbolName
= NR
.uniquify(SymbolName
);
247 section_iterator S
= cantFail(FunctionSymbols
[Index
].getSection());
248 uint64_t EndAddress
= S
->getAddress() + S
->getSize();
250 size_t NFIndex
= Index
+ 1;
252 while (NFIndex
< FunctionSymbols
.size() &&
253 cantFail(FunctionSymbols
[NFIndex
].getValue()) == Address
)
255 if (NFIndex
< FunctionSymbols
.size() &&
256 S
== cantFail(FunctionSymbols
[NFIndex
].getSection()))
257 EndAddress
= cantFail(FunctionSymbols
[NFIndex
].getValue());
259 const uint64_t SymbolSize
= EndAddress
- Address
;
260 const auto It
= BC
->getBinaryFunctions().find(Address
);
261 if (It
== BC
->getBinaryFunctions().end()) {
262 BinaryFunction
*Function
= BC
->createBinaryFunction(
263 std::move(SymbolName
), *Section
, Address
, SymbolSize
);
264 if (!opts::Instrument
)
265 Function
->setOutputAddress(Function
->getAddress());
268 It
->second
.addAlternativeName(std::move(SymbolName
));
272 const std::vector
<DataInCodeRegion
> DataInCode
= readDataInCode(*InputFile
);
274 for (auto &BFI
: BC
->getBinaryFunctions()) {
275 BinaryFunction
&Function
= BFI
.second
;
276 Function
.setMaxSize(Function
.getSize());
278 ErrorOr
<ArrayRef
<uint8_t>> FunctionData
= Function
.getData();
280 errs() << "BOLT-ERROR: corresponding section is non-executable or "
281 << "empty for function " << Function
<< '\n';
285 // Treat zero-sized functions as non-simple ones.
286 if (Function
.getSize() == 0) {
287 Function
.setSimple(false);
291 // Offset of the function in the file.
292 const auto *FileBegin
=
293 reinterpret_cast<const uint8_t *>(InputFile
->getData().data());
294 Function
.setFileOffset(FunctionData
->begin() - FileBegin
);
296 // Treat functions which contain data in code as non-simple ones.
297 const auto It
= std::lower_bound(
298 DataInCode
.cbegin(), DataInCode
.cend(), Function
.getFileOffset(),
299 [](DataInCodeRegion D
, uint64_t Offset
) { return D
.Offset
< Offset
; });
300 if (It
!= DataInCode
.cend() &&
301 It
->Offset
+ It
->Length
<=
302 Function
.getFileOffset() + Function
.getMaxSize())
303 Function
.setSimple(false);
306 BC
->StartFunctionAddress
= readStartAddress(*InputFile
);
309 void MachORewriteInstance::disassembleFunctions() {
310 for (auto &BFI
: BC
->getBinaryFunctions()) {
311 BinaryFunction
&Function
= BFI
.second
;
312 if (!Function
.isSimple())
314 BC
->logBOLTErrorsAndQuitOnFatal(Function
.disassemble());
315 if (opts::PrintDisasm
)
316 Function
.print(outs(), "after disassembly");
320 void MachORewriteInstance::buildFunctionsCFG() {
321 for (auto &BFI
: BC
->getBinaryFunctions()) {
322 BinaryFunction
&Function
= BFI
.second
;
323 if (!Function
.isSimple())
325 BC
->logBOLTErrorsAndQuitOnFatal(Function
.buildCFG(/*AllocId*/ 0));
329 void MachORewriteInstance::postProcessFunctions() {
330 for (auto &BFI
: BC
->getBinaryFunctions()) {
331 BinaryFunction
&Function
= BFI
.second
;
332 if (Function
.empty())
334 Function
.postProcessCFG();
336 Function
.print(outs(), "after building cfg");
340 void MachORewriteInstance::runOptimizationPasses() {
341 BinaryFunctionPassManager
Manager(*BC
);
342 if (opts::Instrument
) {
343 Manager
.registerPass(std::make_unique
<PatchEntries
>());
344 Manager
.registerPass(std::make_unique
<Instrumentation
>(opts::NeverPrint
));
347 Manager
.registerPass(std::make_unique
<ShortenInstructions
>(opts::NeverPrint
));
349 Manager
.registerPass(std::make_unique
<RemoveNops
>(opts::NeverPrint
));
351 Manager
.registerPass(std::make_unique
<NormalizeCFG
>(opts::PrintNormalized
));
353 Manager
.registerPass(
354 std::make_unique
<ReorderBasicBlocks
>(opts::PrintReordered
));
355 Manager
.registerPass(
356 std::make_unique
<FixupBranches
>(opts::PrintAfterBranchFixup
));
357 // This pass should always run last.*
358 Manager
.registerPass(
359 std::make_unique
<FinalizeFunctions
>(opts::PrintFinalized
));
361 BC
->logBOLTErrorsAndQuitOnFatal(Manager
.runPasses());
364 void MachORewriteInstance::mapInstrumentationSection(
365 StringRef SectionName
, BOLTLinker::SectionMapper MapSection
) {
366 if (!opts::Instrument
)
368 ErrorOr
<BinarySection
&> Section
= BC
->getUniqueSectionByName(SectionName
);
370 llvm::errs() << "Cannot find " + SectionName
+ " section\n";
373 if (!Section
->hasValidSectionID())
375 MapSection(*Section
, Section
->getAddress());
378 void MachORewriteInstance::mapCodeSections(
379 BOLTLinker::SectionMapper MapSection
) {
380 for (BinaryFunction
*Function
: BC
->getAllBinaryFunctions()) {
381 if (!Function
->isEmitted())
383 if (Function
->getOutputAddress() == 0)
385 ErrorOr
<BinarySection
&> FuncSection
= Function
->getCodeSection();
388 (Twine("Cannot find section for function ") + Function
->getOneName())
390 FuncSection
.getError());
392 FuncSection
->setOutputAddress(Function
->getOutputAddress());
393 LLVM_DEBUG(dbgs() << "BOLT: mapping 0x"
394 << Twine::utohexstr(FuncSection
->getAllocAddress()) << " to 0x"
395 << Twine::utohexstr(Function
->getOutputAddress()) << '\n');
396 MapSection(*FuncSection
, Function
->getOutputAddress());
397 Function
->setImageAddress(FuncSection
->getAllocAddress());
398 Function
->setImageSize(FuncSection
->getOutputSize());
401 if (opts::Instrument
) {
402 ErrorOr
<BinarySection
&> BOLT
= BC
->getUniqueSectionByName("__bolt");
404 llvm::errs() << "Cannot find __bolt section\n";
407 uint64_t Addr
= BOLT
->getAddress();
408 for (BinaryFunction
*Function
: BC
->getAllBinaryFunctions()) {
409 if (!Function
->isEmitted())
411 if (Function
->getOutputAddress() != 0)
413 ErrorOr
<BinarySection
&> FuncSection
= Function
->getCodeSection();
414 assert(FuncSection
&& "cannot find section for function");
415 Addr
= llvm::alignTo(Addr
, 4);
416 FuncSection
->setOutputAddress(Addr
);
417 MapSection(*FuncSection
, Addr
);
418 Function
->setFileOffset(Addr
- BOLT
->getAddress() +
419 BOLT
->getInputFileOffset());
420 Function
->setImageAddress(FuncSection
->getAllocAddress());
421 Function
->setImageSize(FuncSection
->getOutputSize());
422 BC
->registerNameAtAddress(Function
->getOneName(), Addr
, 0, 0);
423 Addr
+= FuncSection
->getOutputSize();
428 void MachORewriteInstance::emitAndLink() {
430 std::unique_ptr
<::llvm::ToolOutputFile
> TempOut
=
431 std::make_unique
<::llvm::ToolOutputFile
>(
432 opts::OutputFilename
+ ".bolt.o", EC
, sys::fs::OF_None
);
433 check_error(EC
, "cannot create output object file");
438 std::unique_ptr
<buffer_ostream
> BOS
=
439 std::make_unique
<buffer_ostream
>(TempOut
->os());
440 raw_pwrite_stream
*OS
= BOS
.get();
441 auto Streamer
= BC
->createStreamer(*OS
);
443 emitBinaryContext(*Streamer
, *BC
, getOrgSecPrefix());
446 std::unique_ptr
<MemoryBuffer
> ObjectMemBuffer
=
447 MemoryBuffer::getMemBuffer(BOS
->str(), "in-memory object file", false);
448 std::unique_ptr
<object::ObjectFile
> Obj
= cantFail(
449 object::ObjectFile::createObjectFile(ObjectMemBuffer
->getMemBufferRef()),
450 "error creating in-memory object");
451 assert(Obj
&& "createObjectFile cannot return nullptr");
453 auto EFMM
= std::make_unique
<ExecutableFileMemoryManager
>(*BC
);
454 EFMM
->setNewSecPrefix(getNewSecPrefix());
455 EFMM
->setOrgSecPrefix(getOrgSecPrefix());
457 Linker
= std::make_unique
<JITLinkLinker
>(*BC
, std::move(EFMM
));
458 Linker
->loadObject(ObjectMemBuffer
->getMemBufferRef(),
459 [this](auto MapSection
) {
460 // Assign addresses to all sections. If key corresponds
461 // to the object created by ourselves, call our regular
462 // mapping function. If we are loading additional objects
463 // as part of runtime libraries for instrumentation,
464 // treat them as extra sections.
465 mapCodeSections(MapSection
);
466 mapInstrumentationSection("__counters", MapSection
);
467 mapInstrumentationSection("__tables", MapSection
);
470 // TODO: Refactor addRuntimeLibSections to work properly on Mach-O
472 // if (auto *RtLibrary = BC->getRuntimeLibrary()) {
473 // RtLibrary->link(*BC, ToolPath, *Linker, [this](auto MapSection) {
474 // mapInstrumentationSection("I__setup", MapSection);
475 // mapInstrumentationSection("I__fini", MapSection);
476 // mapInstrumentationSection("I__data", MapSection);
477 // mapInstrumentationSection("I__text", MapSection);
478 // mapInstrumentationSection("I__cstring", MapSection);
479 // mapInstrumentationSection("I__literal16", MapSection);
484 void MachORewriteInstance::writeInstrumentationSection(StringRef SectionName
,
485 raw_pwrite_stream
&OS
) {
486 if (!opts::Instrument
)
488 ErrorOr
<BinarySection
&> Section
= BC
->getUniqueSectionByName(SectionName
);
490 llvm::errs() << "Cannot find " + SectionName
+ " section\n";
493 if (!Section
->hasValidSectionID())
495 assert(Section
->getInputFileOffset() &&
496 "Section input offset cannot be zero");
497 assert(Section
->getAllocAddress() && "Section alloc address cannot be zero");
498 assert(Section
->getOutputSize() && "Section output size cannot be zero");
499 OS
.pwrite(reinterpret_cast<char *>(Section
->getAllocAddress()),
500 Section
->getOutputSize(), Section
->getInputFileOffset());
503 void MachORewriteInstance::rewriteFile() {
505 Out
= std::make_unique
<ToolOutputFile
>(opts::OutputFilename
, EC
,
507 check_error(EC
, "cannot create output executable file");
508 raw_fd_ostream
&OS
= Out
->os();
509 OS
<< InputFile
->getData();
511 for (auto &BFI
: BC
->getBinaryFunctions()) {
512 BinaryFunction
&Function
= BFI
.second
;
513 if (!Function
.isSimple())
515 assert(Function
.isEmitted() && "Simple function has not been emitted");
516 if (!opts::Instrument
&& (Function
.getImageSize() > Function
.getMaxSize()))
518 if (opts::Verbosity
>= 2)
519 outs() << "BOLT: rewriting function \"" << Function
<< "\"\n";
520 OS
.pwrite(reinterpret_cast<char *>(Function
.getImageAddress()),
521 Function
.getImageSize(), Function
.getFileOffset());
524 for (const BinaryFunction
*Function
: BC
->getInjectedBinaryFunctions()) {
525 OS
.pwrite(reinterpret_cast<char *>(Function
->getImageAddress()),
526 Function
->getImageSize(), Function
->getFileOffset());
529 writeInstrumentationSection("__counters", OS
);
530 writeInstrumentationSection("__tables", OS
);
532 // TODO: Refactor addRuntimeLibSections to work properly on Mach-O and
534 writeInstrumentationSection("I__setup", OS
);
535 writeInstrumentationSection("I__fini", OS
);
536 writeInstrumentationSection("I__data", OS
);
537 writeInstrumentationSection("I__text", OS
);
538 writeInstrumentationSection("I__cstring", OS
);
539 writeInstrumentationSection("I__literal16", OS
);
542 EC
= sys::fs::setPermissions(
543 opts::OutputFilename
,
544 static_cast<sys::fs::perms
>(sys::fs::perms::all_all
&
545 ~sys::fs::getUmask()));
546 check_error(EC
, "cannot set permissions of output file");
549 void MachORewriteInstance::adjustCommandLineOptions() {
550 //FIXME! Upstream change
551 // opts::CheckOverlappingElements = false;
552 if (!opts::AlignText
.getNumOccurrences())
553 opts::AlignText
= BC
->PageAlign
;
554 if (opts::Instrument
.getNumOccurrences())
555 opts::ForcePatch
= true;
556 opts::JumpTables
= JTS_MOVE
;
557 opts::InstrumentCalls
= false;
558 opts::RuntimeInstrumentationLib
= "libbolt_rt_instr_osx.a";
561 void MachORewriteInstance::run() {
562 adjustCommandLineOptions();
564 readSpecialSections();
566 discoverFileObjects();
568 preprocessProfileData();
570 disassembleFunctions();
572 processProfileDataPreCFG();
576 processProfileData();
578 postProcessFunctions();
580 runOptimizationPasses();
587 MachORewriteInstance::~MachORewriteInstance() {}