[MIParser] Set RegClassOrRegBank during instruction parsing
[llvm-complete.git] / tools / dsymutil / MachOUtils.cpp
blobec9df299ebb22bbabe2f4b3a21ce0729dfa3b429
1 //===-- MachOUtils.cpp - Mach-o specific helpers for dsymutil ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "MachOUtils.h"
10 #include "BinaryHolder.h"
11 #include "DebugMap.h"
12 #include "LinkUtils.h"
13 #include "NonRelocatableStringpool.h"
14 #include "llvm/MC/MCAsmLayout.h"
15 #include "llvm/MC/MCMachObjectWriter.h"
16 #include "llvm/MC/MCObjectStreamer.h"
17 #include "llvm/MC/MCSectionMachO.h"
18 #include "llvm/MC/MCStreamer.h"
19 #include "llvm/Object/MachO.h"
20 #include "llvm/Support/FileUtilities.h"
21 #include "llvm/Support/Program.h"
22 #include "llvm/Support/WithColor.h"
23 #include "llvm/Support/raw_ostream.h"
25 namespace llvm {
26 namespace dsymutil {
27 namespace MachOUtils {
29 llvm::Error ArchAndFile::createTempFile() {
30 llvm::SmallString<128> TmpModel;
31 llvm::sys::path::system_temp_directory(true, TmpModel);
32 llvm::sys::path::append(TmpModel, "dsym.tmp%%%%%.dwarf");
33 Expected<sys::fs::TempFile> T = sys::fs::TempFile::create(TmpModel);
35 if (!T)
36 return T.takeError();
38 File = std::make_unique<sys::fs::TempFile>(std::move(*T));
39 return Error::success();
42 llvm::StringRef ArchAndFile::path() const { return File->TmpName; }
44 ArchAndFile::~ArchAndFile() {
45 if (File)
46 if (auto E = File->discard())
47 llvm::consumeError(std::move(E));
50 std::string getArchName(StringRef Arch) {
51 if (Arch.startswith("thumb"))
52 return (llvm::Twine("arm") + Arch.drop_front(5)).str();
53 return Arch;
56 static bool runLipo(StringRef SDKPath, SmallVectorImpl<StringRef> &Args) {
57 auto Path = sys::findProgramByName("lipo", makeArrayRef(SDKPath));
58 if (!Path)
59 Path = sys::findProgramByName("lipo");
61 if (!Path) {
62 WithColor::error() << "lipo: " << Path.getError().message() << "\n";
63 return false;
66 std::string ErrMsg;
67 int result = sys::ExecuteAndWait(*Path, Args, None, {}, 0, 0, &ErrMsg);
68 if (result) {
69 WithColor::error() << "lipo: " << ErrMsg << "\n";
70 return false;
73 return true;
76 bool generateUniversalBinary(SmallVectorImpl<ArchAndFile> &ArchFiles,
77 StringRef OutputFileName,
78 const LinkOptions &Options, StringRef SDKPath) {
79 // No need to merge one file into a universal fat binary.
80 if (ArchFiles.size() == 1) {
81 if (auto E = ArchFiles.front().File->keep(OutputFileName)) {
82 WithColor::error() << "while keeping " << ArchFiles.front().path()
83 << " as " << OutputFileName << ": "
84 << toString(std::move(E)) << "\n";
85 return false;
87 return true;
90 SmallVector<StringRef, 8> Args;
91 Args.push_back("lipo");
92 Args.push_back("-create");
94 for (auto &Thin : ArchFiles)
95 Args.push_back(Thin.path());
97 // Align segments to match dsymutil-classic alignment
98 for (auto &Thin : ArchFiles) {
99 Thin.Arch = getArchName(Thin.Arch);
100 Args.push_back("-segalign");
101 Args.push_back(Thin.Arch);
102 Args.push_back("20");
105 Args.push_back("-output");
106 Args.push_back(OutputFileName.data());
108 if (Options.Verbose) {
109 outs() << "Running lipo\n";
110 for (auto Arg : Args)
111 outs() << ' ' << Arg;
112 outs() << "\n";
115 return Options.NoOutput ? true : runLipo(SDKPath, Args);
118 // Return a MachO::segment_command_64 that holds the same values as the passed
119 // MachO::segment_command. We do that to avoid having to duplicate the logic
120 // for 32bits and 64bits segments.
121 struct MachO::segment_command_64 adaptFrom32bits(MachO::segment_command Seg) {
122 MachO::segment_command_64 Seg64;
123 Seg64.cmd = Seg.cmd;
124 Seg64.cmdsize = Seg.cmdsize;
125 memcpy(Seg64.segname, Seg.segname, sizeof(Seg.segname));
126 Seg64.vmaddr = Seg.vmaddr;
127 Seg64.vmsize = Seg.vmsize;
128 Seg64.fileoff = Seg.fileoff;
129 Seg64.filesize = Seg.filesize;
130 Seg64.maxprot = Seg.maxprot;
131 Seg64.initprot = Seg.initprot;
132 Seg64.nsects = Seg.nsects;
133 Seg64.flags = Seg.flags;
134 return Seg64;
137 // Iterate on all \a Obj segments, and apply \a Handler to them.
138 template <typename FunctionTy>
139 static void iterateOnSegments(const object::MachOObjectFile &Obj,
140 FunctionTy Handler) {
141 for (const auto &LCI : Obj.load_commands()) {
142 MachO::segment_command_64 Segment;
143 if (LCI.C.cmd == MachO::LC_SEGMENT)
144 Segment = adaptFrom32bits(Obj.getSegmentLoadCommand(LCI));
145 else if (LCI.C.cmd == MachO::LC_SEGMENT_64)
146 Segment = Obj.getSegment64LoadCommand(LCI);
147 else
148 continue;
150 Handler(Segment);
154 // Transfer the symbols described by \a NList to \a NewSymtab which is just the
155 // raw contents of the symbol table for the dSYM companion file. \returns
156 // whether the symbol was transferred or not.
157 template <typename NListTy>
158 static bool transferSymbol(NListTy NList, bool IsLittleEndian,
159 StringRef Strings, SmallVectorImpl<char> &NewSymtab,
160 NonRelocatableStringpool &NewStrings,
161 bool &InDebugNote) {
162 // Do not transfer undefined symbols, we want real addresses.
163 if ((NList.n_type & MachO::N_TYPE) == MachO::N_UNDF)
164 return false;
166 StringRef Name = StringRef(Strings.begin() + NList.n_strx);
167 if (InDebugNote) {
168 InDebugNote =
169 (NList.n_type != MachO::N_SO) || (!Name.empty() && Name[0] != '\0');
170 return false;
171 } else if (NList.n_type == MachO::N_SO) {
172 InDebugNote = true;
173 return false;
176 // FIXME: The + 1 is here to mimic dsymutil-classic that has 2 empty
177 // strings at the start of the generated string table (There is
178 // corresponding code in the string table emission).
179 NList.n_strx = NewStrings.getStringOffset(Name) + 1;
180 if (IsLittleEndian != sys::IsLittleEndianHost)
181 MachO::swapStruct(NList);
183 NewSymtab.append(reinterpret_cast<char *>(&NList),
184 reinterpret_cast<char *>(&NList + 1));
185 return true;
188 // Wrapper around transferSymbol to transfer all of \a Obj symbols
189 // to \a NewSymtab. This function does not write in the output file.
190 // \returns the number of symbols in \a NewSymtab.
191 static unsigned transferSymbols(const object::MachOObjectFile &Obj,
192 SmallVectorImpl<char> &NewSymtab,
193 NonRelocatableStringpool &NewStrings) {
194 unsigned Syms = 0;
195 StringRef Strings = Obj.getStringTableData();
196 bool IsLittleEndian = Obj.isLittleEndian();
197 bool InDebugNote = false;
199 if (Obj.is64Bit()) {
200 for (const object::SymbolRef &Symbol : Obj.symbols()) {
201 object::DataRefImpl DRI = Symbol.getRawDataRefImpl();
202 if (transferSymbol(Obj.getSymbol64TableEntry(DRI), IsLittleEndian,
203 Strings, NewSymtab, NewStrings, InDebugNote))
204 ++Syms;
206 } else {
207 for (const object::SymbolRef &Symbol : Obj.symbols()) {
208 object::DataRefImpl DRI = Symbol.getRawDataRefImpl();
209 if (transferSymbol(Obj.getSymbolTableEntry(DRI), IsLittleEndian, Strings,
210 NewSymtab, NewStrings, InDebugNote))
211 ++Syms;
214 return Syms;
217 static MachO::section
218 getSection(const object::MachOObjectFile &Obj,
219 const MachO::segment_command &Seg,
220 const object::MachOObjectFile::LoadCommandInfo &LCI, unsigned Idx) {
221 return Obj.getSection(LCI, Idx);
224 static MachO::section_64
225 getSection(const object::MachOObjectFile &Obj,
226 const MachO::segment_command_64 &Seg,
227 const object::MachOObjectFile::LoadCommandInfo &LCI, unsigned Idx) {
228 return Obj.getSection64(LCI, Idx);
231 // Transfer \a Segment from \a Obj to the output file. This calls into \a Writer
232 // to write these load commands directly in the output file at the current
233 // position.
234 // The function also tries to find a hole in the address map to fit the __DWARF
235 // segment of \a DwarfSegmentSize size. \a EndAddress is updated to point at the
236 // highest segment address.
237 // When the __LINKEDIT segment is transferred, its offset and size are set resp.
238 // to \a LinkeditOffset and \a LinkeditSize.
239 template <typename SegmentTy>
240 static void transferSegmentAndSections(
241 const object::MachOObjectFile::LoadCommandInfo &LCI, SegmentTy Segment,
242 const object::MachOObjectFile &Obj, MachObjectWriter &Writer,
243 uint64_t LinkeditOffset, uint64_t LinkeditSize, uint64_t DwarfSegmentSize,
244 uint64_t &GapForDwarf, uint64_t &EndAddress) {
245 if (StringRef("__DWARF") == Segment.segname)
246 return;
248 Segment.fileoff = Segment.filesize = 0;
250 if (StringRef("__LINKEDIT") == Segment.segname) {
251 Segment.fileoff = LinkeditOffset;
252 Segment.filesize = LinkeditSize;
253 // Resize vmsize by rounding to the page size.
254 Segment.vmsize = alignTo(LinkeditSize, 0x1000);
257 // Check if the end address of the last segment and our current
258 // start address leave a sufficient gap to store the __DWARF
259 // segment.
260 uint64_t PrevEndAddress = EndAddress;
261 EndAddress = alignTo(EndAddress, 0x1000);
262 if (GapForDwarf == UINT64_MAX && Segment.vmaddr > EndAddress &&
263 Segment.vmaddr - EndAddress >= DwarfSegmentSize)
264 GapForDwarf = EndAddress;
266 // The segments are not necessarily sorted by their vmaddr.
267 EndAddress =
268 std::max<uint64_t>(PrevEndAddress, Segment.vmaddr + Segment.vmsize);
269 unsigned nsects = Segment.nsects;
270 if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
271 MachO::swapStruct(Segment);
272 Writer.W.OS.write(reinterpret_cast<char *>(&Segment), sizeof(Segment));
273 for (unsigned i = 0; i < nsects; ++i) {
274 auto Sect = getSection(Obj, Segment, LCI, i);
275 Sect.offset = Sect.reloff = Sect.nreloc = 0;
276 if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
277 MachO::swapStruct(Sect);
278 Writer.W.OS.write(reinterpret_cast<char *>(&Sect), sizeof(Sect));
282 // Write the __DWARF segment load command to the output file.
283 static void createDwarfSegment(uint64_t VMAddr, uint64_t FileOffset,
284 uint64_t FileSize, unsigned NumSections,
285 MCAsmLayout &Layout, MachObjectWriter &Writer) {
286 Writer.writeSegmentLoadCommand("__DWARF", NumSections, VMAddr,
287 alignTo(FileSize, 0x1000), FileOffset,
288 FileSize, /* MaxProt */ 7,
289 /* InitProt =*/3);
291 for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
292 MCSection *Sec = Layout.getSectionOrder()[i];
293 if (Sec->begin() == Sec->end() || !Layout.getSectionFileSize(Sec))
294 continue;
296 unsigned Align = Sec->getAlignment();
297 if (Align > 1) {
298 VMAddr = alignTo(VMAddr, Align);
299 FileOffset = alignTo(FileOffset, Align);
301 Writer.writeSection(Layout, *Sec, VMAddr, FileOffset, 0, 0, 0);
303 FileOffset += Layout.getSectionAddressSize(Sec);
304 VMAddr += Layout.getSectionAddressSize(Sec);
308 static bool isExecutable(const object::MachOObjectFile &Obj) {
309 if (Obj.is64Bit())
310 return Obj.getHeader64().filetype != MachO::MH_OBJECT;
311 else
312 return Obj.getHeader().filetype != MachO::MH_OBJECT;
315 static bool hasLinkEditSegment(const object::MachOObjectFile &Obj) {
316 bool HasLinkEditSegment = false;
317 iterateOnSegments(Obj, [&](const MachO::segment_command_64 &Segment) {
318 if (StringRef("__LINKEDIT") == Segment.segname)
319 HasLinkEditSegment = true;
321 return HasLinkEditSegment;
324 static unsigned segmentLoadCommandSize(bool Is64Bit, unsigned NumSections) {
325 if (Is64Bit)
326 return sizeof(MachO::segment_command_64) +
327 NumSections * sizeof(MachO::section_64);
329 return sizeof(MachO::segment_command) + NumSections * sizeof(MachO::section);
332 // Stream a dSYM companion binary file corresponding to the binary referenced
333 // by \a DM to \a OutFile. The passed \a MS MCStreamer is setup to write to
334 // \a OutFile and it must be using a MachObjectWriter object to do so.
335 bool generateDsymCompanion(const DebugMap &DM, SymbolMapTranslator &Translator,
336 MCStreamer &MS, raw_fd_ostream &OutFile) {
337 auto &ObjectStreamer = static_cast<MCObjectStreamer &>(MS);
338 MCAssembler &MCAsm = ObjectStreamer.getAssembler();
339 auto &Writer = static_cast<MachObjectWriter &>(MCAsm.getWriter());
341 // Layout but don't emit.
342 ObjectStreamer.flushPendingLabels();
343 MCAsmLayout Layout(MCAsm);
344 MCAsm.layout(Layout);
346 BinaryHolder InputBinaryHolder(false);
348 auto ObjectEntry = InputBinaryHolder.getObjectEntry(DM.getBinaryPath());
349 if (!ObjectEntry) {
350 auto Err = ObjectEntry.takeError();
351 return error(Twine("opening ") + DM.getBinaryPath() + ": " +
352 toString(std::move(Err)),
353 "output file streaming");
356 auto Object =
357 ObjectEntry->getObjectAs<object::MachOObjectFile>(DM.getTriple());
358 if (!Object) {
359 auto Err = Object.takeError();
360 return error(Twine("opening ") + DM.getBinaryPath() + ": " +
361 toString(std::move(Err)),
362 "output file streaming");
365 auto &InputBinary = *Object;
367 bool Is64Bit = Writer.is64Bit();
368 MachO::symtab_command SymtabCmd = InputBinary.getSymtabLoadCommand();
370 // Compute the number of load commands we will need.
371 unsigned LoadCommandSize = 0;
372 unsigned NumLoadCommands = 0;
374 // Get LC_UUID and LC_BUILD_VERSION.
375 MachO::uuid_command UUIDCmd;
376 SmallVector<MachO::build_version_command, 2> BuildVersionCmd;
377 memset(&UUIDCmd, 0, sizeof(UUIDCmd));
378 for (auto &LCI : InputBinary.load_commands()) {
379 switch (LCI.C.cmd) {
380 case MachO::LC_UUID:
381 if (UUIDCmd.cmd)
382 return error("Binary contains more than one UUID");
383 UUIDCmd = InputBinary.getUuidCommand(LCI);
384 ++NumLoadCommands;
385 LoadCommandSize += sizeof(UUIDCmd);
386 break;
387 case MachO::LC_BUILD_VERSION: {
388 MachO::build_version_command Cmd;
389 memset(&Cmd, 0, sizeof(Cmd));
390 Cmd = InputBinary.getBuildVersionLoadCommand(LCI);
391 ++NumLoadCommands;
392 LoadCommandSize += sizeof(Cmd);
393 // LLDB doesn't care about the build tools for now.
394 Cmd.ntools = 0;
395 BuildVersionCmd.push_back(Cmd);
396 break;
398 default:
399 break;
403 // If we have a valid symtab to copy, do it.
404 bool ShouldEmitSymtab =
405 isExecutable(InputBinary) && hasLinkEditSegment(InputBinary);
406 if (ShouldEmitSymtab) {
407 LoadCommandSize += sizeof(MachO::symtab_command);
408 ++NumLoadCommands;
411 unsigned HeaderSize =
412 Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
413 // We will copy every segment that isn't __DWARF.
414 iterateOnSegments(InputBinary, [&](const MachO::segment_command_64 &Segment) {
415 if (StringRef("__DWARF") == Segment.segname)
416 return;
418 ++NumLoadCommands;
419 LoadCommandSize += segmentLoadCommandSize(Is64Bit, Segment.nsects);
422 // We will add our own brand new __DWARF segment if we have debug
423 // info.
424 unsigned NumDwarfSections = 0;
425 uint64_t DwarfSegmentSize = 0;
427 for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
428 MCSection *Sec = Layout.getSectionOrder()[i];
429 if (Sec->begin() == Sec->end())
430 continue;
432 if (uint64_t Size = Layout.getSectionFileSize(Sec)) {
433 DwarfSegmentSize = alignTo(DwarfSegmentSize, Sec->getAlignment());
434 DwarfSegmentSize += Size;
435 ++NumDwarfSections;
439 if (NumDwarfSections) {
440 ++NumLoadCommands;
441 LoadCommandSize += segmentLoadCommandSize(Is64Bit, NumDwarfSections);
444 SmallString<0> NewSymtab;
445 NonRelocatableStringpool NewStrings(Translator);
446 unsigned NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist);
447 unsigned NumSyms = 0;
448 uint64_t NewStringsSize = 0;
449 if (ShouldEmitSymtab) {
450 NewSymtab.reserve(SymtabCmd.nsyms * NListSize / 2);
451 NumSyms = transferSymbols(InputBinary, NewSymtab, NewStrings);
452 NewStringsSize = NewStrings.getSize() + 1;
455 uint64_t SymtabStart = LoadCommandSize;
456 SymtabStart += HeaderSize;
457 SymtabStart = alignTo(SymtabStart, 0x1000);
459 // We gathered all the information we need, start emitting the output file.
460 Writer.writeHeader(MachO::MH_DSYM, NumLoadCommands, LoadCommandSize, false);
462 // Write the load commands.
463 assert(OutFile.tell() == HeaderSize);
464 if (UUIDCmd.cmd != 0) {
465 Writer.W.write<uint32_t>(UUIDCmd.cmd);
466 Writer.W.write<uint32_t>(sizeof(UUIDCmd));
467 OutFile.write(reinterpret_cast<const char *>(UUIDCmd.uuid), 16);
468 assert(OutFile.tell() == HeaderSize + sizeof(UUIDCmd));
470 for (auto Cmd : BuildVersionCmd) {
471 Writer.W.write<uint32_t>(Cmd.cmd);
472 Writer.W.write<uint32_t>(sizeof(Cmd));
473 Writer.W.write<uint32_t>(Cmd.platform);
474 Writer.W.write<uint32_t>(Cmd.minos);
475 Writer.W.write<uint32_t>(Cmd.sdk);
476 Writer.W.write<uint32_t>(Cmd.ntools);
479 assert(SymtabCmd.cmd && "No symbol table.");
480 uint64_t StringStart = SymtabStart + NumSyms * NListSize;
481 if (ShouldEmitSymtab)
482 Writer.writeSymtabLoadCommand(SymtabStart, NumSyms, StringStart,
483 NewStringsSize);
485 uint64_t DwarfSegmentStart = StringStart + NewStringsSize;
486 DwarfSegmentStart = alignTo(DwarfSegmentStart, 0x1000);
488 // Write the load commands for the segments and sections we 'import' from
489 // the original binary.
490 uint64_t EndAddress = 0;
491 uint64_t GapForDwarf = UINT64_MAX;
492 for (auto &LCI : InputBinary.load_commands()) {
493 if (LCI.C.cmd == MachO::LC_SEGMENT)
494 transferSegmentAndSections(LCI, InputBinary.getSegmentLoadCommand(LCI),
495 InputBinary, Writer, SymtabStart,
496 StringStart + NewStringsSize - SymtabStart,
497 DwarfSegmentSize, GapForDwarf, EndAddress);
498 else if (LCI.C.cmd == MachO::LC_SEGMENT_64)
499 transferSegmentAndSections(LCI, InputBinary.getSegment64LoadCommand(LCI),
500 InputBinary, Writer, SymtabStart,
501 StringStart + NewStringsSize - SymtabStart,
502 DwarfSegmentSize, GapForDwarf, EndAddress);
505 uint64_t DwarfVMAddr = alignTo(EndAddress, 0x1000);
506 uint64_t DwarfVMMax = Is64Bit ? UINT64_MAX : UINT32_MAX;
507 if (DwarfVMAddr + DwarfSegmentSize > DwarfVMMax ||
508 DwarfVMAddr + DwarfSegmentSize < DwarfVMAddr /* Overflow */) {
509 // There is no room for the __DWARF segment at the end of the
510 // address space. Look through segments to find a gap.
511 DwarfVMAddr = GapForDwarf;
512 if (DwarfVMAddr == UINT64_MAX)
513 warn("not enough VM space for the __DWARF segment.",
514 "output file streaming");
517 // Write the load command for the __DWARF segment.
518 createDwarfSegment(DwarfVMAddr, DwarfSegmentStart, DwarfSegmentSize,
519 NumDwarfSections, Layout, Writer);
521 assert(OutFile.tell() == LoadCommandSize + HeaderSize);
522 OutFile.write_zeros(SymtabStart - (LoadCommandSize + HeaderSize));
523 assert(OutFile.tell() == SymtabStart);
525 // Transfer symbols.
526 if (ShouldEmitSymtab) {
527 OutFile << NewSymtab.str();
528 assert(OutFile.tell() == StringStart);
530 // Transfer string table.
531 // FIXME: The NonRelocatableStringpool starts with an empty string, but
532 // dsymutil-classic starts the reconstructed string table with 2 of these.
533 // Reproduce that behavior for now (there is corresponding code in
534 // transferSymbol).
535 OutFile << '\0';
536 std::vector<DwarfStringPoolEntryRef> Strings =
537 NewStrings.getEntriesForEmission();
538 for (auto EntryRef : Strings) {
539 OutFile.write(EntryRef.getString().data(),
540 EntryRef.getString().size() + 1);
544 assert(OutFile.tell() == StringStart + NewStringsSize);
546 // Pad till the Dwarf segment start.
547 OutFile.write_zeros(DwarfSegmentStart - (StringStart + NewStringsSize));
548 assert(OutFile.tell() == DwarfSegmentStart);
550 // Emit the Dwarf sections contents.
551 for (const MCSection &Sec : MCAsm) {
552 if (Sec.begin() == Sec.end())
553 continue;
555 uint64_t Pos = OutFile.tell();
556 OutFile.write_zeros(alignTo(Pos, Sec.getAlignment()) - Pos);
557 MCAsm.writeSectionData(OutFile, &Sec, Layout);
560 return true;
562 } // namespace MachOUtils
563 } // namespace dsymutil
564 } // namespace llvm