[LLVM][IR] Use splat syntax when printing ConstantExpr based splats. (#116856)
[llvm-project.git] / bolt / lib / Rewrite / MachORewriteInstance.cpp
blobc328232de61a34182b02e4f8b9a1498702a9793b
1 //===- bolt/Rewrite/MachORewriteInstance.cpp - MachO rewriter -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "bolt/Rewrite/MachORewriteInstance.h"
10 #include "bolt/Core/BinaryContext.h"
11 #include "bolt/Core/BinaryEmitter.h"
12 #include "bolt/Core/BinaryFunction.h"
13 #include "bolt/Core/JumpTable.h"
14 #include "bolt/Core/MCPlusBuilder.h"
15 #include "bolt/Passes/Instrumentation.h"
16 #include "bolt/Passes/PatchEntries.h"
17 #include "bolt/Profile/DataReader.h"
18 #include "bolt/Rewrite/BinaryPassManager.h"
19 #include "bolt/Rewrite/ExecutableFileMemoryManager.h"
20 #include "bolt/Rewrite/JITLinkLinker.h"
21 #include "bolt/Rewrite/RewriteInstance.h"
22 #include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h"
23 #include "bolt/Utils/Utils.h"
24 #include "llvm/MC/MCObjectStreamer.h"
25 #include "llvm/Support/Errc.h"
26 #include "llvm/Support/FileSystem.h"
27 #include "llvm/Support/ToolOutputFile.h"
28 #include <memory>
29 #include <optional>
31 namespace opts {
33 using namespace llvm;
34 extern cl::opt<unsigned> AlignText;
35 //FIXME! Upstream change
36 //extern cl::opt<bool> CheckOverlappingElements;
37 extern cl::opt<bool> ForcePatch;
38 extern cl::opt<bool> Instrument;
39 extern cl::opt<bool> InstrumentCalls;
40 extern cl::opt<bolt::JumpTableSupportLevel> JumpTables;
41 extern cl::opt<bool> KeepTmp;
42 extern cl::opt<bool> NeverPrint;
43 extern cl::opt<std::string> OutputFilename;
44 extern cl::opt<bool> PrintAfterBranchFixup;
45 extern cl::opt<bool> PrintFinalized;
46 extern cl::opt<bool> PrintNormalized;
47 extern cl::opt<bool> PrintReordered;
48 extern cl::opt<bool> PrintSections;
49 extern cl::opt<bool> PrintDisasm;
50 extern cl::opt<bool> PrintCFG;
51 extern cl::opt<std::string> RuntimeInstrumentationLib;
52 extern cl::opt<unsigned> Verbosity;
53 } // namespace opts
55 namespace llvm {
56 namespace bolt {
58 #define DEBUG_TYPE "bolt"
60 Expected<std::unique_ptr<MachORewriteInstance>>
61 MachORewriteInstance::create(object::MachOObjectFile *InputFile,
62 StringRef ToolPath) {
63 Error Err = Error::success();
64 auto MachORI =
65 std::make_unique<MachORewriteInstance>(InputFile, ToolPath, Err);
66 if (Err)
67 return std::move(Err);
68 return std::move(MachORI);
71 MachORewriteInstance::MachORewriteInstance(object::MachOObjectFile *InputFile,
72 StringRef ToolPath, Error &Err)
73 : InputFile(InputFile), ToolPath(ToolPath) {
74 ErrorAsOutParameter EAO(&Err);
75 Relocation::Arch = InputFile->makeTriple().getArch();
76 auto BCOrErr = BinaryContext::createBinaryContext(
77 InputFile->makeTriple(), InputFile->getFileName(), nullptr,
78 /* IsPIC */ true, DWARFContext::create(*InputFile),
79 {llvm::outs(), llvm::errs()});
80 if (Error E = BCOrErr.takeError()) {
81 Err = std::move(E);
82 return;
84 BC = std::move(BCOrErr.get());
85 BC->initializeTarget(std::unique_ptr<MCPlusBuilder>(
86 createMCPlusBuilder(BC->TheTriple->getArch(), BC->MIA.get(),
87 BC->MII.get(), BC->MRI.get(), BC->STI.get())));
88 if (opts::Instrument)
89 BC->setRuntimeLibrary(std::make_unique<InstrumentationRuntimeLibrary>());
92 Error MachORewriteInstance::setProfile(StringRef Filename) {
93 if (!sys::fs::exists(Filename))
94 return errorCodeToError(make_error_code(errc::no_such_file_or_directory));
96 if (ProfileReader) {
97 // Already exists
98 return make_error<StringError>(
99 Twine("multiple profiles specified: ") + ProfileReader->getFilename() +
100 " and " + Filename, inconvertibleErrorCode());
103 ProfileReader = std::make_unique<DataReader>(Filename);
104 return Error::success();
107 void MachORewriteInstance::preprocessProfileData() {
108 if (!ProfileReader)
109 return;
110 if (Error E = ProfileReader->preprocessProfile(*BC.get()))
111 report_error("cannot pre-process profile", std::move(E));
114 void MachORewriteInstance::processProfileDataPreCFG() {
115 if (!ProfileReader)
116 return;
117 if (Error E = ProfileReader->readProfilePreCFG(*BC.get()))
118 report_error("cannot read profile pre-CFG", std::move(E));
121 void MachORewriteInstance::processProfileData() {
122 if (!ProfileReader)
123 return;
124 if (Error E = ProfileReader->readProfile(*BC.get()))
125 report_error("cannot read profile", std::move(E));
128 void MachORewriteInstance::readSpecialSections() {
129 for (const object::SectionRef &Section : InputFile->sections()) {
130 Expected<StringRef> SectionName = Section.getName();;
131 check_error(SectionName.takeError(), "cannot get section name");
132 // Only register sections with names.
133 if (!SectionName->empty()) {
134 BC->registerSection(Section);
135 LLVM_DEBUG(
136 dbgs() << "BOLT-DEBUG: registering section " << *SectionName
137 << " @ 0x" << Twine::utohexstr(Section.getAddress()) << ":0x"
138 << Twine::utohexstr(Section.getAddress() + Section.getSize())
139 << "\n");
143 if (opts::PrintSections) {
144 outs() << "BOLT-INFO: Sections from original binary:\n";
145 BC->printSections(outs());
149 namespace {
151 struct DataInCodeRegion {
152 explicit DataInCodeRegion(DiceRef D) {
153 D.getOffset(Offset);
154 D.getLength(Length);
155 D.getKind(Kind);
158 uint32_t Offset;
159 uint16_t Length;
160 uint16_t Kind;
163 std::vector<DataInCodeRegion> readDataInCode(const MachOObjectFile &O) {
164 const MachO::linkedit_data_command DataInCodeLC =
165 O.getDataInCodeLoadCommand();
166 const uint32_t NumberOfEntries =
167 DataInCodeLC.datasize / sizeof(MachO::data_in_code_entry);
168 std::vector<DataInCodeRegion> DataInCode;
169 DataInCode.reserve(NumberOfEntries);
170 for (auto I = O.begin_dices(), E = O.end_dices(); I != E; ++I)
171 DataInCode.emplace_back(*I);
172 llvm::stable_sort(DataInCode, [](DataInCodeRegion LHS, DataInCodeRegion RHS) {
173 return LHS.Offset < RHS.Offset;
175 return DataInCode;
178 std::optional<uint64_t> readStartAddress(const MachOObjectFile &O) {
179 std::optional<uint64_t> StartOffset;
180 std::optional<uint64_t> TextVMAddr;
181 for (const object::MachOObjectFile::LoadCommandInfo &LC : O.load_commands()) {
182 switch (LC.C.cmd) {
183 case MachO::LC_MAIN: {
184 MachO::entry_point_command LCMain = O.getEntryPointCommand(LC);
185 StartOffset = LCMain.entryoff;
186 break;
188 case MachO::LC_SEGMENT: {
189 MachO::segment_command LCSeg = O.getSegmentLoadCommand(LC);
190 StringRef SegmentName(LCSeg.segname,
191 strnlen(LCSeg.segname, sizeof(LCSeg.segname)));
192 if (SegmentName == "__TEXT")
193 TextVMAddr = LCSeg.vmaddr;
194 break;
196 case MachO::LC_SEGMENT_64: {
197 MachO::segment_command_64 LCSeg = O.getSegment64LoadCommand(LC);
198 StringRef SegmentName(LCSeg.segname,
199 strnlen(LCSeg.segname, sizeof(LCSeg.segname)));
200 if (SegmentName == "__TEXT")
201 TextVMAddr = LCSeg.vmaddr;
202 break;
204 default:
205 continue;
208 return (TextVMAddr && StartOffset)
209 ? std::optional<uint64_t>(*TextVMAddr + *StartOffset)
210 : std::nullopt;
213 } // anonymous namespace
215 void MachORewriteInstance::discoverFileObjects() {
216 std::vector<SymbolRef> FunctionSymbols;
217 for (const SymbolRef &S : InputFile->symbols()) {
218 SymbolRef::Type Type = cantFail(S.getType(), "cannot get symbol type");
219 if (Type == SymbolRef::ST_Function)
220 FunctionSymbols.push_back(S);
222 if (FunctionSymbols.empty())
223 return;
224 llvm::stable_sort(
225 FunctionSymbols, [](const SymbolRef &LHS, const SymbolRef &RHS) {
226 return cantFail(LHS.getValue()) < cantFail(RHS.getValue());
228 for (size_t Index = 0; Index < FunctionSymbols.size(); ++Index) {
229 const uint64_t Address = cantFail(FunctionSymbols[Index].getValue());
230 ErrorOr<BinarySection &> Section = BC->getSectionForAddress(Address);
231 // TODO: It happens for some symbols (e.g. __mh_execute_header).
232 // Add proper logic to handle them correctly.
233 if (!Section) {
234 errs() << "BOLT-WARNING: no section found for address " << Address
235 << "\n";
236 continue;
239 std::string SymbolName =
240 cantFail(FunctionSymbols[Index].getName(), "cannot get symbol name")
241 .str();
242 // Uniquify names of local symbols.
243 if (!(cantFail(FunctionSymbols[Index].getFlags()) & SymbolRef::SF_Global))
244 SymbolName = NR.uniquify(SymbolName);
246 section_iterator S = cantFail(FunctionSymbols[Index].getSection());
247 uint64_t EndAddress = S->getAddress() + S->getSize();
249 size_t NFIndex = Index + 1;
250 // Skip aliases.
251 while (NFIndex < FunctionSymbols.size() &&
252 cantFail(FunctionSymbols[NFIndex].getValue()) == Address)
253 ++NFIndex;
254 if (NFIndex < FunctionSymbols.size() &&
255 S == cantFail(FunctionSymbols[NFIndex].getSection()))
256 EndAddress = cantFail(FunctionSymbols[NFIndex].getValue());
258 const uint64_t SymbolSize = EndAddress - Address;
259 const auto It = BC->getBinaryFunctions().find(Address);
260 if (It == BC->getBinaryFunctions().end()) {
261 BinaryFunction *Function = BC->createBinaryFunction(
262 std::move(SymbolName), *Section, Address, SymbolSize);
263 if (!opts::Instrument)
264 Function->setOutputAddress(Function->getAddress());
266 } else {
267 It->second.addAlternativeName(std::move(SymbolName));
271 const std::vector<DataInCodeRegion> DataInCode = readDataInCode(*InputFile);
273 for (auto &BFI : BC->getBinaryFunctions()) {
274 BinaryFunction &Function = BFI.second;
275 Function.setMaxSize(Function.getSize());
277 ErrorOr<ArrayRef<uint8_t>> FunctionData = Function.getData();
278 if (!FunctionData) {
279 errs() << "BOLT-ERROR: corresponding section is non-executable or "
280 << "empty for function " << Function << '\n';
281 continue;
284 // Treat zero-sized functions as non-simple ones.
285 if (Function.getSize() == 0) {
286 Function.setSimple(false);
287 continue;
290 // Offset of the function in the file.
291 const auto *FileBegin =
292 reinterpret_cast<const uint8_t *>(InputFile->getData().data());
293 Function.setFileOffset(FunctionData->begin() - FileBegin);
295 // Treat functions which contain data in code as non-simple ones.
296 const auto It = std::lower_bound(
297 DataInCode.cbegin(), DataInCode.cend(), Function.getFileOffset(),
298 [](DataInCodeRegion D, uint64_t Offset) { return D.Offset < Offset; });
299 if (It != DataInCode.cend() &&
300 It->Offset + It->Length <=
301 Function.getFileOffset() + Function.getMaxSize())
302 Function.setSimple(false);
305 BC->StartFunctionAddress = readStartAddress(*InputFile);
308 void MachORewriteInstance::disassembleFunctions() {
309 for (auto &BFI : BC->getBinaryFunctions()) {
310 BinaryFunction &Function = BFI.second;
311 if (!Function.isSimple())
312 continue;
313 BC->logBOLTErrorsAndQuitOnFatal(Function.disassemble());
314 if (opts::PrintDisasm)
315 Function.print(outs(), "after disassembly");
319 void MachORewriteInstance::buildFunctionsCFG() {
320 for (auto &BFI : BC->getBinaryFunctions()) {
321 BinaryFunction &Function = BFI.second;
322 if (!Function.isSimple())
323 continue;
324 BC->logBOLTErrorsAndQuitOnFatal(Function.buildCFG(/*AllocId*/ 0));
328 void MachORewriteInstance::postProcessFunctions() {
329 for (auto &BFI : BC->getBinaryFunctions()) {
330 BinaryFunction &Function = BFI.second;
331 if (Function.empty())
332 continue;
333 Function.postProcessCFG();
334 if (opts::PrintCFG)
335 Function.print(outs(), "after building cfg");
339 void MachORewriteInstance::runOptimizationPasses() {
340 BinaryFunctionPassManager Manager(*BC);
341 if (opts::Instrument) {
342 Manager.registerPass(std::make_unique<PatchEntries>());
343 Manager.registerPass(std::make_unique<Instrumentation>(opts::NeverPrint));
346 Manager.registerPass(std::make_unique<ShortenInstructions>(opts::NeverPrint));
348 Manager.registerPass(std::make_unique<RemoveNops>(opts::NeverPrint));
350 Manager.registerPass(std::make_unique<NormalizeCFG>(opts::PrintNormalized));
352 Manager.registerPass(
353 std::make_unique<ReorderBasicBlocks>(opts::PrintReordered));
354 Manager.registerPass(
355 std::make_unique<FixupBranches>(opts::PrintAfterBranchFixup));
356 // This pass should always run last.*
357 Manager.registerPass(
358 std::make_unique<FinalizeFunctions>(opts::PrintFinalized));
360 BC->logBOLTErrorsAndQuitOnFatal(Manager.runPasses());
363 void MachORewriteInstance::mapInstrumentationSection(
364 StringRef SectionName, BOLTLinker::SectionMapper MapSection) {
365 if (!opts::Instrument)
366 return;
367 ErrorOr<BinarySection &> Section = BC->getUniqueSectionByName(SectionName);
368 if (!Section) {
369 llvm::errs() << "Cannot find " + SectionName + " section\n";
370 exit(1);
372 if (!Section->hasValidSectionID())
373 return;
374 MapSection(*Section, Section->getAddress());
377 void MachORewriteInstance::mapCodeSections(
378 BOLTLinker::SectionMapper MapSection) {
379 for (BinaryFunction *Function : BC->getAllBinaryFunctions()) {
380 if (!Function->isEmitted())
381 continue;
382 if (Function->getOutputAddress() == 0)
383 continue;
384 ErrorOr<BinarySection &> FuncSection = Function->getCodeSection();
385 if (!FuncSection)
386 report_error(
387 (Twine("Cannot find section for function ") + Function->getOneName())
388 .str(),
389 FuncSection.getError());
391 FuncSection->setOutputAddress(Function->getOutputAddress());
392 LLVM_DEBUG(dbgs() << "BOLT: mapping 0x"
393 << Twine::utohexstr(FuncSection->getAllocAddress()) << " to 0x"
394 << Twine::utohexstr(Function->getOutputAddress()) << '\n');
395 MapSection(*FuncSection, Function->getOutputAddress());
396 Function->setImageAddress(FuncSection->getAllocAddress());
397 Function->setImageSize(FuncSection->getOutputSize());
400 if (opts::Instrument) {
401 ErrorOr<BinarySection &> BOLT = BC->getUniqueSectionByName("__bolt");
402 if (!BOLT) {
403 llvm::errs() << "Cannot find __bolt section\n";
404 exit(1);
406 uint64_t Addr = BOLT->getAddress();
407 for (BinaryFunction *Function : BC->getAllBinaryFunctions()) {
408 if (!Function->isEmitted())
409 continue;
410 if (Function->getOutputAddress() != 0)
411 continue;
412 ErrorOr<BinarySection &> FuncSection = Function->getCodeSection();
413 assert(FuncSection && "cannot find section for function");
414 Addr = llvm::alignTo(Addr, 4);
415 FuncSection->setOutputAddress(Addr);
416 MapSection(*FuncSection, Addr);
417 Function->setFileOffset(Addr - BOLT->getAddress() +
418 BOLT->getInputFileOffset());
419 Function->setImageAddress(FuncSection->getAllocAddress());
420 Function->setImageSize(FuncSection->getOutputSize());
421 BC->registerNameAtAddress(Function->getOneName(), Addr, 0, 0);
422 Addr += FuncSection->getOutputSize();
427 void MachORewriteInstance::emitAndLink() {
428 std::error_code EC;
429 std::unique_ptr<::llvm::ToolOutputFile> TempOut =
430 std::make_unique<::llvm::ToolOutputFile>(
431 opts::OutputFilename + ".bolt.o", EC, sys::fs::OF_None);
432 check_error(EC, "cannot create output object file");
434 if (opts::KeepTmp)
435 TempOut->keep();
437 std::unique_ptr<buffer_ostream> BOS =
438 std::make_unique<buffer_ostream>(TempOut->os());
439 raw_pwrite_stream *OS = BOS.get();
440 auto Streamer = BC->createStreamer(*OS);
442 emitBinaryContext(*Streamer, *BC, getOrgSecPrefix());
443 Streamer->finish();
445 std::unique_ptr<MemoryBuffer> ObjectMemBuffer =
446 MemoryBuffer::getMemBuffer(BOS->str(), "in-memory object file", false);
447 std::unique_ptr<object::ObjectFile> Obj = cantFail(
448 object::ObjectFile::createObjectFile(ObjectMemBuffer->getMemBufferRef()),
449 "error creating in-memory object");
450 assert(Obj && "createObjectFile cannot return nullptr");
452 auto EFMM = std::make_unique<ExecutableFileMemoryManager>(*BC);
453 EFMM->setNewSecPrefix(getNewSecPrefix());
454 EFMM->setOrgSecPrefix(getOrgSecPrefix());
456 Linker = std::make_unique<JITLinkLinker>(*BC, std::move(EFMM));
457 Linker->loadObject(ObjectMemBuffer->getMemBufferRef(),
458 [this](auto MapSection) {
459 // Assign addresses to all sections. If key corresponds
460 // to the object created by ourselves, call our regular
461 // mapping function. If we are loading additional objects
462 // as part of runtime libraries for instrumentation,
463 // treat them as extra sections.
464 mapCodeSections(MapSection);
465 mapInstrumentationSection("__counters", MapSection);
466 mapInstrumentationSection("__tables", MapSection);
469 // TODO: Refactor addRuntimeLibSections to work properly on Mach-O
470 // and use it here.
471 // if (auto *RtLibrary = BC->getRuntimeLibrary()) {
472 // RtLibrary->link(*BC, ToolPath, *Linker, [this](auto MapSection) {
473 // mapInstrumentationSection("I__setup", MapSection);
474 // mapInstrumentationSection("I__fini", MapSection);
475 // mapInstrumentationSection("I__data", MapSection);
476 // mapInstrumentationSection("I__text", MapSection);
477 // mapInstrumentationSection("I__cstring", MapSection);
478 // mapInstrumentationSection("I__literal16", MapSection);
479 // });
480 // }
483 void MachORewriteInstance::writeInstrumentationSection(StringRef SectionName,
484 raw_pwrite_stream &OS) {
485 if (!opts::Instrument)
486 return;
487 ErrorOr<BinarySection &> Section = BC->getUniqueSectionByName(SectionName);
488 if (!Section) {
489 llvm::errs() << "Cannot find " + SectionName + " section\n";
490 exit(1);
492 if (!Section->hasValidSectionID())
493 return;
494 assert(Section->getInputFileOffset() &&
495 "Section input offset cannot be zero");
496 assert(Section->getAllocAddress() && "Section alloc address cannot be zero");
497 assert(Section->getOutputSize() && "Section output size cannot be zero");
498 OS.pwrite(reinterpret_cast<char *>(Section->getAllocAddress()),
499 Section->getOutputSize(), Section->getInputFileOffset());
502 void MachORewriteInstance::rewriteFile() {
503 std::error_code EC;
504 Out = std::make_unique<ToolOutputFile>(opts::OutputFilename, EC,
505 sys::fs::OF_None);
506 check_error(EC, "cannot create output executable file");
507 raw_fd_ostream &OS = Out->os();
508 OS << InputFile->getData();
510 for (auto &BFI : BC->getBinaryFunctions()) {
511 BinaryFunction &Function = BFI.second;
512 if (!Function.isSimple())
513 continue;
514 assert(Function.isEmitted() && "Simple function has not been emitted");
515 if (!opts::Instrument && (Function.getImageSize() > Function.getMaxSize()))
516 continue;
517 if (opts::Verbosity >= 2)
518 outs() << "BOLT: rewriting function \"" << Function << "\"\n";
519 OS.pwrite(reinterpret_cast<char *>(Function.getImageAddress()),
520 Function.getImageSize(), Function.getFileOffset());
523 for (const BinaryFunction *Function : BC->getInjectedBinaryFunctions()) {
524 OS.pwrite(reinterpret_cast<char *>(Function->getImageAddress()),
525 Function->getImageSize(), Function->getFileOffset());
528 writeInstrumentationSection("__counters", OS);
529 writeInstrumentationSection("__tables", OS);
531 // TODO: Refactor addRuntimeLibSections to work properly on Mach-O and
532 // use it here.
533 writeInstrumentationSection("I__setup", OS);
534 writeInstrumentationSection("I__fini", OS);
535 writeInstrumentationSection("I__data", OS);
536 writeInstrumentationSection("I__text", OS);
537 writeInstrumentationSection("I__cstring", OS);
538 writeInstrumentationSection("I__literal16", OS);
540 Out->keep();
541 EC = sys::fs::setPermissions(
542 opts::OutputFilename,
543 static_cast<sys::fs::perms>(sys::fs::perms::all_all &
544 ~sys::fs::getUmask()));
545 check_error(EC, "cannot set permissions of output file");
548 void MachORewriteInstance::adjustCommandLineOptions() {
549 //FIXME! Upstream change
550 // opts::CheckOverlappingElements = false;
551 if (!opts::AlignText.getNumOccurrences())
552 opts::AlignText = BC->PageAlign;
553 if (opts::Instrument.getNumOccurrences())
554 opts::ForcePatch = true;
555 opts::JumpTables = JTS_MOVE;
556 opts::InstrumentCalls = false;
557 opts::RuntimeInstrumentationLib = "libbolt_rt_instr_osx.a";
560 void MachORewriteInstance::run() {
561 adjustCommandLineOptions();
563 readSpecialSections();
565 discoverFileObjects();
567 preprocessProfileData();
569 disassembleFunctions();
571 processProfileDataPreCFG();
573 buildFunctionsCFG();
575 processProfileData();
577 postProcessFunctions();
579 runOptimizationPasses();
581 emitAndLink();
583 rewriteFile();
586 MachORewriteInstance::~MachORewriteInstance() {}
588 } // namespace bolt
589 } // namespace llvm