1 //===- bolt/Rewrite/MachORewriteInstance.cpp - MachO rewriter -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "bolt/Rewrite/MachORewriteInstance.h"
10 #include "bolt/Core/BinaryContext.h"
11 #include "bolt/Core/BinaryEmitter.h"
12 #include "bolt/Core/BinaryFunction.h"
13 #include "bolt/Core/JumpTable.h"
14 #include "bolt/Core/MCPlusBuilder.h"
15 #include "bolt/Passes/Instrumentation.h"
16 #include "bolt/Passes/PatchEntries.h"
17 #include "bolt/Profile/DataReader.h"
18 #include "bolt/Rewrite/BinaryPassManager.h"
19 #include "bolt/Rewrite/ExecutableFileMemoryManager.h"
20 #include "bolt/Rewrite/JITLinkLinker.h"
21 #include "bolt/Rewrite/RewriteInstance.h"
22 #include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h"
23 #include "bolt/Utils/Utils.h"
24 #include "llvm/MC/MCObjectStreamer.h"
25 #include "llvm/Support/Errc.h"
26 #include "llvm/Support/FileSystem.h"
27 #include "llvm/Support/ToolOutputFile.h"
34 extern cl::opt
<unsigned> AlignText
;
35 //FIXME! Upstream change
36 //extern cl::opt<bool> CheckOverlappingElements;
37 extern cl::opt
<bool> ForcePatch
;
38 extern cl::opt
<bool> Instrument
;
39 extern cl::opt
<bool> InstrumentCalls
;
40 extern cl::opt
<bolt::JumpTableSupportLevel
> JumpTables
;
41 extern cl::opt
<bool> KeepTmp
;
42 extern cl::opt
<bool> NeverPrint
;
43 extern cl::opt
<std::string
> OutputFilename
;
44 extern cl::opt
<bool> PrintAfterBranchFixup
;
45 extern cl::opt
<bool> PrintFinalized
;
46 extern cl::opt
<bool> PrintNormalized
;
47 extern cl::opt
<bool> PrintReordered
;
48 extern cl::opt
<bool> PrintSections
;
49 extern cl::opt
<bool> PrintDisasm
;
50 extern cl::opt
<bool> PrintCFG
;
51 extern cl::opt
<std::string
> RuntimeInstrumentationLib
;
52 extern cl::opt
<unsigned> Verbosity
;
58 #define DEBUG_TYPE "bolt"
60 Expected
<std::unique_ptr
<MachORewriteInstance
>>
61 MachORewriteInstance::create(object::MachOObjectFile
*InputFile
,
63 Error Err
= Error::success();
65 std::make_unique
<MachORewriteInstance
>(InputFile
, ToolPath
, Err
);
67 return std::move(Err
);
68 return std::move(MachORI
);
71 MachORewriteInstance::MachORewriteInstance(object::MachOObjectFile
*InputFile
,
72 StringRef ToolPath
, Error
&Err
)
73 : InputFile(InputFile
), ToolPath(ToolPath
) {
74 ErrorAsOutParameter
EAO(&Err
);
75 Relocation::Arch
= InputFile
->makeTriple().getArch();
76 auto BCOrErr
= BinaryContext::createBinaryContext(
77 InputFile
->makeTriple(), InputFile
->getFileName(), nullptr,
78 /* IsPIC */ true, DWARFContext::create(*InputFile
),
79 {llvm::outs(), llvm::errs()});
80 if (Error E
= BCOrErr
.takeError()) {
84 BC
= std::move(BCOrErr
.get());
85 BC
->initializeTarget(std::unique_ptr
<MCPlusBuilder
>(
86 createMCPlusBuilder(BC
->TheTriple
->getArch(), BC
->MIA
.get(),
87 BC
->MII
.get(), BC
->MRI
.get(), BC
->STI
.get())));
89 BC
->setRuntimeLibrary(std::make_unique
<InstrumentationRuntimeLibrary
>());
92 Error
MachORewriteInstance::setProfile(StringRef Filename
) {
93 if (!sys::fs::exists(Filename
))
94 return errorCodeToError(make_error_code(errc::no_such_file_or_directory
));
98 return make_error
<StringError
>(
99 Twine("multiple profiles specified: ") + ProfileReader
->getFilename() +
100 " and " + Filename
, inconvertibleErrorCode());
103 ProfileReader
= std::make_unique
<DataReader
>(Filename
);
104 return Error::success();
107 void MachORewriteInstance::preprocessProfileData() {
110 if (Error E
= ProfileReader
->preprocessProfile(*BC
.get()))
111 report_error("cannot pre-process profile", std::move(E
));
114 void MachORewriteInstance::processProfileDataPreCFG() {
117 if (Error E
= ProfileReader
->readProfilePreCFG(*BC
.get()))
118 report_error("cannot read profile pre-CFG", std::move(E
));
121 void MachORewriteInstance::processProfileData() {
124 if (Error E
= ProfileReader
->readProfile(*BC
.get()))
125 report_error("cannot read profile", std::move(E
));
128 void MachORewriteInstance::readSpecialSections() {
129 for (const object::SectionRef
&Section
: InputFile
->sections()) {
130 Expected
<StringRef
> SectionName
= Section
.getName();;
131 check_error(SectionName
.takeError(), "cannot get section name");
132 // Only register sections with names.
133 if (!SectionName
->empty()) {
134 BC
->registerSection(Section
);
136 dbgs() << "BOLT-DEBUG: registering section " << *SectionName
137 << " @ 0x" << Twine::utohexstr(Section
.getAddress()) << ":0x"
138 << Twine::utohexstr(Section
.getAddress() + Section
.getSize())
143 if (opts::PrintSections
) {
144 outs() << "BOLT-INFO: Sections from original binary:\n";
145 BC
->printSections(outs());
151 struct DataInCodeRegion
{
152 explicit DataInCodeRegion(DiceRef D
) {
163 std::vector
<DataInCodeRegion
> readDataInCode(const MachOObjectFile
&O
) {
164 const MachO::linkedit_data_command DataInCodeLC
=
165 O
.getDataInCodeLoadCommand();
166 const uint32_t NumberOfEntries
=
167 DataInCodeLC
.datasize
/ sizeof(MachO::data_in_code_entry
);
168 std::vector
<DataInCodeRegion
> DataInCode
;
169 DataInCode
.reserve(NumberOfEntries
);
170 for (auto I
= O
.begin_dices(), E
= O
.end_dices(); I
!= E
; ++I
)
171 DataInCode
.emplace_back(*I
);
172 llvm::stable_sort(DataInCode
, [](DataInCodeRegion LHS
, DataInCodeRegion RHS
) {
173 return LHS
.Offset
< RHS
.Offset
;
178 std::optional
<uint64_t> readStartAddress(const MachOObjectFile
&O
) {
179 std::optional
<uint64_t> StartOffset
;
180 std::optional
<uint64_t> TextVMAddr
;
181 for (const object::MachOObjectFile::LoadCommandInfo
&LC
: O
.load_commands()) {
183 case MachO::LC_MAIN
: {
184 MachO::entry_point_command LCMain
= O
.getEntryPointCommand(LC
);
185 StartOffset
= LCMain
.entryoff
;
188 case MachO::LC_SEGMENT
: {
189 MachO::segment_command LCSeg
= O
.getSegmentLoadCommand(LC
);
190 StringRef
SegmentName(LCSeg
.segname
,
191 strnlen(LCSeg
.segname
, sizeof(LCSeg
.segname
)));
192 if (SegmentName
== "__TEXT")
193 TextVMAddr
= LCSeg
.vmaddr
;
196 case MachO::LC_SEGMENT_64
: {
197 MachO::segment_command_64 LCSeg
= O
.getSegment64LoadCommand(LC
);
198 StringRef
SegmentName(LCSeg
.segname
,
199 strnlen(LCSeg
.segname
, sizeof(LCSeg
.segname
)));
200 if (SegmentName
== "__TEXT")
201 TextVMAddr
= LCSeg
.vmaddr
;
208 return (TextVMAddr
&& StartOffset
)
209 ? std::optional
<uint64_t>(*TextVMAddr
+ *StartOffset
)
213 } // anonymous namespace
215 void MachORewriteInstance::discoverFileObjects() {
216 std::vector
<SymbolRef
> FunctionSymbols
;
217 for (const SymbolRef
&S
: InputFile
->symbols()) {
218 SymbolRef::Type Type
= cantFail(S
.getType(), "cannot get symbol type");
219 if (Type
== SymbolRef::ST_Function
)
220 FunctionSymbols
.push_back(S
);
222 if (FunctionSymbols
.empty())
225 FunctionSymbols
, [](const SymbolRef
&LHS
, const SymbolRef
&RHS
) {
226 return cantFail(LHS
.getValue()) < cantFail(RHS
.getValue());
228 for (size_t Index
= 0; Index
< FunctionSymbols
.size(); ++Index
) {
229 const uint64_t Address
= cantFail(FunctionSymbols
[Index
].getValue());
230 ErrorOr
<BinarySection
&> Section
= BC
->getSectionForAddress(Address
);
231 // TODO: It happens for some symbols (e.g. __mh_execute_header).
232 // Add proper logic to handle them correctly.
234 errs() << "BOLT-WARNING: no section found for address " << Address
239 std::string SymbolName
=
240 cantFail(FunctionSymbols
[Index
].getName(), "cannot get symbol name")
242 // Uniquify names of local symbols.
243 if (!(cantFail(FunctionSymbols
[Index
].getFlags()) & SymbolRef::SF_Global
))
244 SymbolName
= NR
.uniquify(SymbolName
);
246 section_iterator S
= cantFail(FunctionSymbols
[Index
].getSection());
247 uint64_t EndAddress
= S
->getAddress() + S
->getSize();
249 size_t NFIndex
= Index
+ 1;
251 while (NFIndex
< FunctionSymbols
.size() &&
252 cantFail(FunctionSymbols
[NFIndex
].getValue()) == Address
)
254 if (NFIndex
< FunctionSymbols
.size() &&
255 S
== cantFail(FunctionSymbols
[NFIndex
].getSection()))
256 EndAddress
= cantFail(FunctionSymbols
[NFIndex
].getValue());
258 const uint64_t SymbolSize
= EndAddress
- Address
;
259 const auto It
= BC
->getBinaryFunctions().find(Address
);
260 if (It
== BC
->getBinaryFunctions().end()) {
261 BinaryFunction
*Function
= BC
->createBinaryFunction(
262 std::move(SymbolName
), *Section
, Address
, SymbolSize
);
263 if (!opts::Instrument
)
264 Function
->setOutputAddress(Function
->getAddress());
267 It
->second
.addAlternativeName(std::move(SymbolName
));
271 const std::vector
<DataInCodeRegion
> DataInCode
= readDataInCode(*InputFile
);
273 for (auto &BFI
: BC
->getBinaryFunctions()) {
274 BinaryFunction
&Function
= BFI
.second
;
275 Function
.setMaxSize(Function
.getSize());
277 ErrorOr
<ArrayRef
<uint8_t>> FunctionData
= Function
.getData();
279 errs() << "BOLT-ERROR: corresponding section is non-executable or "
280 << "empty for function " << Function
<< '\n';
284 // Treat zero-sized functions as non-simple ones.
285 if (Function
.getSize() == 0) {
286 Function
.setSimple(false);
290 // Offset of the function in the file.
291 const auto *FileBegin
=
292 reinterpret_cast<const uint8_t *>(InputFile
->getData().data());
293 Function
.setFileOffset(FunctionData
->begin() - FileBegin
);
295 // Treat functions which contain data in code as non-simple ones.
296 const auto It
= std::lower_bound(
297 DataInCode
.cbegin(), DataInCode
.cend(), Function
.getFileOffset(),
298 [](DataInCodeRegion D
, uint64_t Offset
) { return D
.Offset
< Offset
; });
299 if (It
!= DataInCode
.cend() &&
300 It
->Offset
+ It
->Length
<=
301 Function
.getFileOffset() + Function
.getMaxSize())
302 Function
.setSimple(false);
305 BC
->StartFunctionAddress
= readStartAddress(*InputFile
);
308 void MachORewriteInstance::disassembleFunctions() {
309 for (auto &BFI
: BC
->getBinaryFunctions()) {
310 BinaryFunction
&Function
= BFI
.second
;
311 if (!Function
.isSimple())
313 BC
->logBOLTErrorsAndQuitOnFatal(Function
.disassemble());
314 if (opts::PrintDisasm
)
315 Function
.print(outs(), "after disassembly");
319 void MachORewriteInstance::buildFunctionsCFG() {
320 for (auto &BFI
: BC
->getBinaryFunctions()) {
321 BinaryFunction
&Function
= BFI
.second
;
322 if (!Function
.isSimple())
324 BC
->logBOLTErrorsAndQuitOnFatal(Function
.buildCFG(/*AllocId*/ 0));
328 void MachORewriteInstance::postProcessFunctions() {
329 for (auto &BFI
: BC
->getBinaryFunctions()) {
330 BinaryFunction
&Function
= BFI
.second
;
331 if (Function
.empty())
333 Function
.postProcessCFG();
335 Function
.print(outs(), "after building cfg");
339 void MachORewriteInstance::runOptimizationPasses() {
340 BinaryFunctionPassManager
Manager(*BC
);
341 if (opts::Instrument
) {
342 Manager
.registerPass(std::make_unique
<PatchEntries
>());
343 Manager
.registerPass(std::make_unique
<Instrumentation
>(opts::NeverPrint
));
346 Manager
.registerPass(std::make_unique
<ShortenInstructions
>(opts::NeverPrint
));
348 Manager
.registerPass(std::make_unique
<RemoveNops
>(opts::NeverPrint
));
350 Manager
.registerPass(std::make_unique
<NormalizeCFG
>(opts::PrintNormalized
));
352 Manager
.registerPass(
353 std::make_unique
<ReorderBasicBlocks
>(opts::PrintReordered
));
354 Manager
.registerPass(
355 std::make_unique
<FixupBranches
>(opts::PrintAfterBranchFixup
));
356 // This pass should always run last.*
357 Manager
.registerPass(
358 std::make_unique
<FinalizeFunctions
>(opts::PrintFinalized
));
360 BC
->logBOLTErrorsAndQuitOnFatal(Manager
.runPasses());
363 void MachORewriteInstance::mapInstrumentationSection(
364 StringRef SectionName
, BOLTLinker::SectionMapper MapSection
) {
365 if (!opts::Instrument
)
367 ErrorOr
<BinarySection
&> Section
= BC
->getUniqueSectionByName(SectionName
);
369 llvm::errs() << "Cannot find " + SectionName
+ " section\n";
372 if (!Section
->hasValidSectionID())
374 MapSection(*Section
, Section
->getAddress());
377 void MachORewriteInstance::mapCodeSections(
378 BOLTLinker::SectionMapper MapSection
) {
379 for (BinaryFunction
*Function
: BC
->getAllBinaryFunctions()) {
380 if (!Function
->isEmitted())
382 if (Function
->getOutputAddress() == 0)
384 ErrorOr
<BinarySection
&> FuncSection
= Function
->getCodeSection();
387 (Twine("Cannot find section for function ") + Function
->getOneName())
389 FuncSection
.getError());
391 FuncSection
->setOutputAddress(Function
->getOutputAddress());
392 LLVM_DEBUG(dbgs() << "BOLT: mapping 0x"
393 << Twine::utohexstr(FuncSection
->getAllocAddress()) << " to 0x"
394 << Twine::utohexstr(Function
->getOutputAddress()) << '\n');
395 MapSection(*FuncSection
, Function
->getOutputAddress());
396 Function
->setImageAddress(FuncSection
->getAllocAddress());
397 Function
->setImageSize(FuncSection
->getOutputSize());
400 if (opts::Instrument
) {
401 ErrorOr
<BinarySection
&> BOLT
= BC
->getUniqueSectionByName("__bolt");
403 llvm::errs() << "Cannot find __bolt section\n";
406 uint64_t Addr
= BOLT
->getAddress();
407 for (BinaryFunction
*Function
: BC
->getAllBinaryFunctions()) {
408 if (!Function
->isEmitted())
410 if (Function
->getOutputAddress() != 0)
412 ErrorOr
<BinarySection
&> FuncSection
= Function
->getCodeSection();
413 assert(FuncSection
&& "cannot find section for function");
414 Addr
= llvm::alignTo(Addr
, 4);
415 FuncSection
->setOutputAddress(Addr
);
416 MapSection(*FuncSection
, Addr
);
417 Function
->setFileOffset(Addr
- BOLT
->getAddress() +
418 BOLT
->getInputFileOffset());
419 Function
->setImageAddress(FuncSection
->getAllocAddress());
420 Function
->setImageSize(FuncSection
->getOutputSize());
421 BC
->registerNameAtAddress(Function
->getOneName(), Addr
, 0, 0);
422 Addr
+= FuncSection
->getOutputSize();
427 void MachORewriteInstance::emitAndLink() {
429 std::unique_ptr
<::llvm::ToolOutputFile
> TempOut
=
430 std::make_unique
<::llvm::ToolOutputFile
>(
431 opts::OutputFilename
+ ".bolt.o", EC
, sys::fs::OF_None
);
432 check_error(EC
, "cannot create output object file");
437 std::unique_ptr
<buffer_ostream
> BOS
=
438 std::make_unique
<buffer_ostream
>(TempOut
->os());
439 raw_pwrite_stream
*OS
= BOS
.get();
440 auto Streamer
= BC
->createStreamer(*OS
);
442 emitBinaryContext(*Streamer
, *BC
, getOrgSecPrefix());
445 std::unique_ptr
<MemoryBuffer
> ObjectMemBuffer
=
446 MemoryBuffer::getMemBuffer(BOS
->str(), "in-memory object file", false);
447 std::unique_ptr
<object::ObjectFile
> Obj
= cantFail(
448 object::ObjectFile::createObjectFile(ObjectMemBuffer
->getMemBufferRef()),
449 "error creating in-memory object");
450 assert(Obj
&& "createObjectFile cannot return nullptr");
452 auto EFMM
= std::make_unique
<ExecutableFileMemoryManager
>(*BC
);
453 EFMM
->setNewSecPrefix(getNewSecPrefix());
454 EFMM
->setOrgSecPrefix(getOrgSecPrefix());
456 Linker
= std::make_unique
<JITLinkLinker
>(*BC
, std::move(EFMM
));
457 Linker
->loadObject(ObjectMemBuffer
->getMemBufferRef(),
458 [this](auto MapSection
) {
459 // Assign addresses to all sections. If key corresponds
460 // to the object created by ourselves, call our regular
461 // mapping function. If we are loading additional objects
462 // as part of runtime libraries for instrumentation,
463 // treat them as extra sections.
464 mapCodeSections(MapSection
);
465 mapInstrumentationSection("__counters", MapSection
);
466 mapInstrumentationSection("__tables", MapSection
);
469 // TODO: Refactor addRuntimeLibSections to work properly on Mach-O
471 // if (auto *RtLibrary = BC->getRuntimeLibrary()) {
472 // RtLibrary->link(*BC, ToolPath, *Linker, [this](auto MapSection) {
473 // mapInstrumentationSection("I__setup", MapSection);
474 // mapInstrumentationSection("I__fini", MapSection);
475 // mapInstrumentationSection("I__data", MapSection);
476 // mapInstrumentationSection("I__text", MapSection);
477 // mapInstrumentationSection("I__cstring", MapSection);
478 // mapInstrumentationSection("I__literal16", MapSection);
483 void MachORewriteInstance::writeInstrumentationSection(StringRef SectionName
,
484 raw_pwrite_stream
&OS
) {
485 if (!opts::Instrument
)
487 ErrorOr
<BinarySection
&> Section
= BC
->getUniqueSectionByName(SectionName
);
489 llvm::errs() << "Cannot find " + SectionName
+ " section\n";
492 if (!Section
->hasValidSectionID())
494 assert(Section
->getInputFileOffset() &&
495 "Section input offset cannot be zero");
496 assert(Section
->getAllocAddress() && "Section alloc address cannot be zero");
497 assert(Section
->getOutputSize() && "Section output size cannot be zero");
498 OS
.pwrite(reinterpret_cast<char *>(Section
->getAllocAddress()),
499 Section
->getOutputSize(), Section
->getInputFileOffset());
502 void MachORewriteInstance::rewriteFile() {
504 Out
= std::make_unique
<ToolOutputFile
>(opts::OutputFilename
, EC
,
506 check_error(EC
, "cannot create output executable file");
507 raw_fd_ostream
&OS
= Out
->os();
508 OS
<< InputFile
->getData();
510 for (auto &BFI
: BC
->getBinaryFunctions()) {
511 BinaryFunction
&Function
= BFI
.second
;
512 if (!Function
.isSimple())
514 assert(Function
.isEmitted() && "Simple function has not been emitted");
515 if (!opts::Instrument
&& (Function
.getImageSize() > Function
.getMaxSize()))
517 if (opts::Verbosity
>= 2)
518 outs() << "BOLT: rewriting function \"" << Function
<< "\"\n";
519 OS
.pwrite(reinterpret_cast<char *>(Function
.getImageAddress()),
520 Function
.getImageSize(), Function
.getFileOffset());
523 for (const BinaryFunction
*Function
: BC
->getInjectedBinaryFunctions()) {
524 OS
.pwrite(reinterpret_cast<char *>(Function
->getImageAddress()),
525 Function
->getImageSize(), Function
->getFileOffset());
528 writeInstrumentationSection("__counters", OS
);
529 writeInstrumentationSection("__tables", OS
);
531 // TODO: Refactor addRuntimeLibSections to work properly on Mach-O and
533 writeInstrumentationSection("I__setup", OS
);
534 writeInstrumentationSection("I__fini", OS
);
535 writeInstrumentationSection("I__data", OS
);
536 writeInstrumentationSection("I__text", OS
);
537 writeInstrumentationSection("I__cstring", OS
);
538 writeInstrumentationSection("I__literal16", OS
);
541 EC
= sys::fs::setPermissions(
542 opts::OutputFilename
,
543 static_cast<sys::fs::perms
>(sys::fs::perms::all_all
&
544 ~sys::fs::getUmask()));
545 check_error(EC
, "cannot set permissions of output file");
548 void MachORewriteInstance::adjustCommandLineOptions() {
549 //FIXME! Upstream change
550 // opts::CheckOverlappingElements = false;
551 if (!opts::AlignText
.getNumOccurrences())
552 opts::AlignText
= BC
->PageAlign
;
553 if (opts::Instrument
.getNumOccurrences())
554 opts::ForcePatch
= true;
555 opts::JumpTables
= JTS_MOVE
;
556 opts::InstrumentCalls
= false;
557 opts::RuntimeInstrumentationLib
= "libbolt_rt_instr_osx.a";
560 void MachORewriteInstance::run() {
561 adjustCommandLineOptions();
563 readSpecialSections();
565 discoverFileObjects();
567 preprocessProfileData();
569 disassembleFunctions();
571 processProfileDataPreCFG();
575 processProfileData();
577 postProcessFunctions();
579 runOptimizationPasses();
586 MachORewriteInstance::~MachORewriteInstance() {}