1 //===-- MachOUtils.cpp - Mach-o specific helpers for dsymutil ------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "MachOUtils.h"
10 #include "BinaryHolder.h"
12 #include "LinkUtils.h"
13 #include "NonRelocatableStringpool.h"
14 #include "llvm/MC/MCAsmLayout.h"
15 #include "llvm/MC/MCMachObjectWriter.h"
16 #include "llvm/MC/MCObjectStreamer.h"
17 #include "llvm/MC/MCSectionMachO.h"
18 #include "llvm/MC/MCStreamer.h"
19 #include "llvm/Object/MachO.h"
20 #include "llvm/Support/FileUtilities.h"
21 #include "llvm/Support/Program.h"
22 #include "llvm/Support/WithColor.h"
23 #include "llvm/Support/raw_ostream.h"
27 namespace MachOUtils
{
29 llvm::Error
ArchAndFile::createTempFile() {
30 llvm::SmallString
<128> TmpModel
;
31 llvm::sys::path::system_temp_directory(true, TmpModel
);
32 llvm::sys::path::append(TmpModel
, "dsym.tmp%%%%%.dwarf");
33 Expected
<sys::fs::TempFile
> T
= sys::fs::TempFile::create(TmpModel
);
38 File
= std::make_unique
<sys::fs::TempFile
>(std::move(*T
));
39 return Error::success();
42 llvm::StringRef
ArchAndFile::path() const { return File
->TmpName
; }
44 ArchAndFile::~ArchAndFile() {
46 if (auto E
= File
->discard())
47 llvm::consumeError(std::move(E
));
50 std::string
getArchName(StringRef Arch
) {
51 if (Arch
.startswith("thumb"))
52 return (llvm::Twine("arm") + Arch
.drop_front(5)).str();
56 static bool runLipo(StringRef SDKPath
, SmallVectorImpl
<StringRef
> &Args
) {
57 auto Path
= sys::findProgramByName("lipo", makeArrayRef(SDKPath
));
59 Path
= sys::findProgramByName("lipo");
62 WithColor::error() << "lipo: " << Path
.getError().message() << "\n";
67 int result
= sys::ExecuteAndWait(*Path
, Args
, None
, {}, 0, 0, &ErrMsg
);
69 WithColor::error() << "lipo: " << ErrMsg
<< "\n";
76 bool generateUniversalBinary(SmallVectorImpl
<ArchAndFile
> &ArchFiles
,
77 StringRef OutputFileName
,
78 const LinkOptions
&Options
, StringRef SDKPath
) {
79 // No need to merge one file into a universal fat binary.
80 if (ArchFiles
.size() == 1) {
81 if (auto E
= ArchFiles
.front().File
->keep(OutputFileName
)) {
82 WithColor::error() << "while keeping " << ArchFiles
.front().path()
83 << " as " << OutputFileName
<< ": "
84 << toString(std::move(E
)) << "\n";
90 SmallVector
<StringRef
, 8> Args
;
91 Args
.push_back("lipo");
92 Args
.push_back("-create");
94 for (auto &Thin
: ArchFiles
)
95 Args
.push_back(Thin
.path());
97 // Align segments to match dsymutil-classic alignment
98 for (auto &Thin
: ArchFiles
) {
99 Thin
.Arch
= getArchName(Thin
.Arch
);
100 Args
.push_back("-segalign");
101 Args
.push_back(Thin
.Arch
);
102 Args
.push_back("20");
105 Args
.push_back("-output");
106 Args
.push_back(OutputFileName
.data());
108 if (Options
.Verbose
) {
109 outs() << "Running lipo\n";
110 for (auto Arg
: Args
)
111 outs() << ' ' << Arg
;
115 return Options
.NoOutput
? true : runLipo(SDKPath
, Args
);
118 // Return a MachO::segment_command_64 that holds the same values as the passed
119 // MachO::segment_command. We do that to avoid having to duplicate the logic
120 // for 32bits and 64bits segments.
121 struct MachO::segment_command_64
adaptFrom32bits(MachO::segment_command Seg
) {
122 MachO::segment_command_64 Seg64
;
124 Seg64
.cmdsize
= Seg
.cmdsize
;
125 memcpy(Seg64
.segname
, Seg
.segname
, sizeof(Seg
.segname
));
126 Seg64
.vmaddr
= Seg
.vmaddr
;
127 Seg64
.vmsize
= Seg
.vmsize
;
128 Seg64
.fileoff
= Seg
.fileoff
;
129 Seg64
.filesize
= Seg
.filesize
;
130 Seg64
.maxprot
= Seg
.maxprot
;
131 Seg64
.initprot
= Seg
.initprot
;
132 Seg64
.nsects
= Seg
.nsects
;
133 Seg64
.flags
= Seg
.flags
;
137 // Iterate on all \a Obj segments, and apply \a Handler to them.
138 template <typename FunctionTy
>
139 static void iterateOnSegments(const object::MachOObjectFile
&Obj
,
140 FunctionTy Handler
) {
141 for (const auto &LCI
: Obj
.load_commands()) {
142 MachO::segment_command_64 Segment
;
143 if (LCI
.C
.cmd
== MachO::LC_SEGMENT
)
144 Segment
= adaptFrom32bits(Obj
.getSegmentLoadCommand(LCI
));
145 else if (LCI
.C
.cmd
== MachO::LC_SEGMENT_64
)
146 Segment
= Obj
.getSegment64LoadCommand(LCI
);
154 // Transfer the symbols described by \a NList to \a NewSymtab which is just the
155 // raw contents of the symbol table for the dSYM companion file. \returns
156 // whether the symbol was transferred or not.
157 template <typename NListTy
>
158 static bool transferSymbol(NListTy NList
, bool IsLittleEndian
,
159 StringRef Strings
, SmallVectorImpl
<char> &NewSymtab
,
160 NonRelocatableStringpool
&NewStrings
,
162 // Do not transfer undefined symbols, we want real addresses.
163 if ((NList
.n_type
& MachO::N_TYPE
) == MachO::N_UNDF
)
166 StringRef Name
= StringRef(Strings
.begin() + NList
.n_strx
);
169 (NList
.n_type
!= MachO::N_SO
) || (!Name
.empty() && Name
[0] != '\0');
171 } else if (NList
.n_type
== MachO::N_SO
) {
176 // FIXME: The + 1 is here to mimic dsymutil-classic that has 2 empty
177 // strings at the start of the generated string table (There is
178 // corresponding code in the string table emission).
179 NList
.n_strx
= NewStrings
.getStringOffset(Name
) + 1;
180 if (IsLittleEndian
!= sys::IsLittleEndianHost
)
181 MachO::swapStruct(NList
);
183 NewSymtab
.append(reinterpret_cast<char *>(&NList
),
184 reinterpret_cast<char *>(&NList
+ 1));
188 // Wrapper around transferSymbol to transfer all of \a Obj symbols
189 // to \a NewSymtab. This function does not write in the output file.
190 // \returns the number of symbols in \a NewSymtab.
191 static unsigned transferSymbols(const object::MachOObjectFile
&Obj
,
192 SmallVectorImpl
<char> &NewSymtab
,
193 NonRelocatableStringpool
&NewStrings
) {
195 StringRef Strings
= Obj
.getStringTableData();
196 bool IsLittleEndian
= Obj
.isLittleEndian();
197 bool InDebugNote
= false;
200 for (const object::SymbolRef
&Symbol
: Obj
.symbols()) {
201 object::DataRefImpl DRI
= Symbol
.getRawDataRefImpl();
202 if (transferSymbol(Obj
.getSymbol64TableEntry(DRI
), IsLittleEndian
,
203 Strings
, NewSymtab
, NewStrings
, InDebugNote
))
207 for (const object::SymbolRef
&Symbol
: Obj
.symbols()) {
208 object::DataRefImpl DRI
= Symbol
.getRawDataRefImpl();
209 if (transferSymbol(Obj
.getSymbolTableEntry(DRI
), IsLittleEndian
, Strings
,
210 NewSymtab
, NewStrings
, InDebugNote
))
217 static MachO::section
218 getSection(const object::MachOObjectFile
&Obj
,
219 const MachO::segment_command
&Seg
,
220 const object::MachOObjectFile::LoadCommandInfo
&LCI
, unsigned Idx
) {
221 return Obj
.getSection(LCI
, Idx
);
224 static MachO::section_64
225 getSection(const object::MachOObjectFile
&Obj
,
226 const MachO::segment_command_64
&Seg
,
227 const object::MachOObjectFile::LoadCommandInfo
&LCI
, unsigned Idx
) {
228 return Obj
.getSection64(LCI
, Idx
);
231 // Transfer \a Segment from \a Obj to the output file. This calls into \a Writer
232 // to write these load commands directly in the output file at the current
234 // The function also tries to find a hole in the address map to fit the __DWARF
235 // segment of \a DwarfSegmentSize size. \a EndAddress is updated to point at the
236 // highest segment address.
237 // When the __LINKEDIT segment is transferred, its offset and size are set resp.
238 // to \a LinkeditOffset and \a LinkeditSize.
239 template <typename SegmentTy
>
240 static void transferSegmentAndSections(
241 const object::MachOObjectFile::LoadCommandInfo
&LCI
, SegmentTy Segment
,
242 const object::MachOObjectFile
&Obj
, MachObjectWriter
&Writer
,
243 uint64_t LinkeditOffset
, uint64_t LinkeditSize
, uint64_t DwarfSegmentSize
,
244 uint64_t &GapForDwarf
, uint64_t &EndAddress
) {
245 if (StringRef("__DWARF") == Segment
.segname
)
248 Segment
.fileoff
= Segment
.filesize
= 0;
250 if (StringRef("__LINKEDIT") == Segment
.segname
) {
251 Segment
.fileoff
= LinkeditOffset
;
252 Segment
.filesize
= LinkeditSize
;
253 // Resize vmsize by rounding to the page size.
254 Segment
.vmsize
= alignTo(LinkeditSize
, 0x1000);
257 // Check if the end address of the last segment and our current
258 // start address leave a sufficient gap to store the __DWARF
260 uint64_t PrevEndAddress
= EndAddress
;
261 EndAddress
= alignTo(EndAddress
, 0x1000);
262 if (GapForDwarf
== UINT64_MAX
&& Segment
.vmaddr
> EndAddress
&&
263 Segment
.vmaddr
- EndAddress
>= DwarfSegmentSize
)
264 GapForDwarf
= EndAddress
;
266 // The segments are not necessarily sorted by their vmaddr.
268 std::max
<uint64_t>(PrevEndAddress
, Segment
.vmaddr
+ Segment
.vmsize
);
269 unsigned nsects
= Segment
.nsects
;
270 if (Obj
.isLittleEndian() != sys::IsLittleEndianHost
)
271 MachO::swapStruct(Segment
);
272 Writer
.W
.OS
.write(reinterpret_cast<char *>(&Segment
), sizeof(Segment
));
273 for (unsigned i
= 0; i
< nsects
; ++i
) {
274 auto Sect
= getSection(Obj
, Segment
, LCI
, i
);
275 Sect
.offset
= Sect
.reloff
= Sect
.nreloc
= 0;
276 if (Obj
.isLittleEndian() != sys::IsLittleEndianHost
)
277 MachO::swapStruct(Sect
);
278 Writer
.W
.OS
.write(reinterpret_cast<char *>(&Sect
), sizeof(Sect
));
282 // Write the __DWARF segment load command to the output file.
283 static void createDwarfSegment(uint64_t VMAddr
, uint64_t FileOffset
,
284 uint64_t FileSize
, unsigned NumSections
,
285 MCAsmLayout
&Layout
, MachObjectWriter
&Writer
) {
286 Writer
.writeSegmentLoadCommand("__DWARF", NumSections
, VMAddr
,
287 alignTo(FileSize
, 0x1000), FileOffset
,
288 FileSize
, /* MaxProt */ 7,
291 for (unsigned int i
= 0, n
= Layout
.getSectionOrder().size(); i
!= n
; ++i
) {
292 MCSection
*Sec
= Layout
.getSectionOrder()[i
];
293 if (Sec
->begin() == Sec
->end() || !Layout
.getSectionFileSize(Sec
))
296 unsigned Align
= Sec
->getAlignment();
298 VMAddr
= alignTo(VMAddr
, Align
);
299 FileOffset
= alignTo(FileOffset
, Align
);
301 Writer
.writeSection(Layout
, *Sec
, VMAddr
, FileOffset
, 0, 0, 0);
303 FileOffset
+= Layout
.getSectionAddressSize(Sec
);
304 VMAddr
+= Layout
.getSectionAddressSize(Sec
);
308 static bool isExecutable(const object::MachOObjectFile
&Obj
) {
310 return Obj
.getHeader64().filetype
!= MachO::MH_OBJECT
;
312 return Obj
.getHeader().filetype
!= MachO::MH_OBJECT
;
315 static bool hasLinkEditSegment(const object::MachOObjectFile
&Obj
) {
316 bool HasLinkEditSegment
= false;
317 iterateOnSegments(Obj
, [&](const MachO::segment_command_64
&Segment
) {
318 if (StringRef("__LINKEDIT") == Segment
.segname
)
319 HasLinkEditSegment
= true;
321 return HasLinkEditSegment
;
324 static unsigned segmentLoadCommandSize(bool Is64Bit
, unsigned NumSections
) {
326 return sizeof(MachO::segment_command_64
) +
327 NumSections
* sizeof(MachO::section_64
);
329 return sizeof(MachO::segment_command
) + NumSections
* sizeof(MachO::section
);
332 // Stream a dSYM companion binary file corresponding to the binary referenced
333 // by \a DM to \a OutFile. The passed \a MS MCStreamer is setup to write to
334 // \a OutFile and it must be using a MachObjectWriter object to do so.
335 bool generateDsymCompanion(const DebugMap
&DM
, SymbolMapTranslator
&Translator
,
336 MCStreamer
&MS
, raw_fd_ostream
&OutFile
) {
337 auto &ObjectStreamer
= static_cast<MCObjectStreamer
&>(MS
);
338 MCAssembler
&MCAsm
= ObjectStreamer
.getAssembler();
339 auto &Writer
= static_cast<MachObjectWriter
&>(MCAsm
.getWriter());
341 // Layout but don't emit.
342 ObjectStreamer
.flushPendingLabels();
343 MCAsmLayout
Layout(MCAsm
);
344 MCAsm
.layout(Layout
);
346 BinaryHolder
InputBinaryHolder(false);
348 auto ObjectEntry
= InputBinaryHolder
.getObjectEntry(DM
.getBinaryPath());
350 auto Err
= ObjectEntry
.takeError();
351 return error(Twine("opening ") + DM
.getBinaryPath() + ": " +
352 toString(std::move(Err
)),
353 "output file streaming");
357 ObjectEntry
->getObjectAs
<object::MachOObjectFile
>(DM
.getTriple());
359 auto Err
= Object
.takeError();
360 return error(Twine("opening ") + DM
.getBinaryPath() + ": " +
361 toString(std::move(Err
)),
362 "output file streaming");
365 auto &InputBinary
= *Object
;
367 bool Is64Bit
= Writer
.is64Bit();
368 MachO::symtab_command SymtabCmd
= InputBinary
.getSymtabLoadCommand();
370 // Compute the number of load commands we will need.
371 unsigned LoadCommandSize
= 0;
372 unsigned NumLoadCommands
= 0;
374 // Get LC_UUID and LC_BUILD_VERSION.
375 MachO::uuid_command UUIDCmd
;
376 SmallVector
<MachO::build_version_command
, 2> BuildVersionCmd
;
377 memset(&UUIDCmd
, 0, sizeof(UUIDCmd
));
378 for (auto &LCI
: InputBinary
.load_commands()) {
382 return error("Binary contains more than one UUID");
383 UUIDCmd
= InputBinary
.getUuidCommand(LCI
);
385 LoadCommandSize
+= sizeof(UUIDCmd
);
387 case MachO::LC_BUILD_VERSION
: {
388 MachO::build_version_command Cmd
;
389 memset(&Cmd
, 0, sizeof(Cmd
));
390 Cmd
= InputBinary
.getBuildVersionLoadCommand(LCI
);
392 LoadCommandSize
+= sizeof(Cmd
);
393 // LLDB doesn't care about the build tools for now.
395 BuildVersionCmd
.push_back(Cmd
);
403 // If we have a valid symtab to copy, do it.
404 bool ShouldEmitSymtab
=
405 isExecutable(InputBinary
) && hasLinkEditSegment(InputBinary
);
406 if (ShouldEmitSymtab
) {
407 LoadCommandSize
+= sizeof(MachO::symtab_command
);
411 unsigned HeaderSize
=
412 Is64Bit
? sizeof(MachO::mach_header_64
) : sizeof(MachO::mach_header
);
413 // We will copy every segment that isn't __DWARF.
414 iterateOnSegments(InputBinary
, [&](const MachO::segment_command_64
&Segment
) {
415 if (StringRef("__DWARF") == Segment
.segname
)
419 LoadCommandSize
+= segmentLoadCommandSize(Is64Bit
, Segment
.nsects
);
422 // We will add our own brand new __DWARF segment if we have debug
424 unsigned NumDwarfSections
= 0;
425 uint64_t DwarfSegmentSize
= 0;
427 for (unsigned int i
= 0, n
= Layout
.getSectionOrder().size(); i
!= n
; ++i
) {
428 MCSection
*Sec
= Layout
.getSectionOrder()[i
];
429 if (Sec
->begin() == Sec
->end())
432 if (uint64_t Size
= Layout
.getSectionFileSize(Sec
)) {
433 DwarfSegmentSize
= alignTo(DwarfSegmentSize
, Sec
->getAlignment());
434 DwarfSegmentSize
+= Size
;
439 if (NumDwarfSections
) {
441 LoadCommandSize
+= segmentLoadCommandSize(Is64Bit
, NumDwarfSections
);
444 SmallString
<0> NewSymtab
;
445 NonRelocatableStringpool
NewStrings(Translator
);
446 unsigned NListSize
= Is64Bit
? sizeof(MachO::nlist_64
) : sizeof(MachO::nlist
);
447 unsigned NumSyms
= 0;
448 uint64_t NewStringsSize
= 0;
449 if (ShouldEmitSymtab
) {
450 NewSymtab
.reserve(SymtabCmd
.nsyms
* NListSize
/ 2);
451 NumSyms
= transferSymbols(InputBinary
, NewSymtab
, NewStrings
);
452 NewStringsSize
= NewStrings
.getSize() + 1;
455 uint64_t SymtabStart
= LoadCommandSize
;
456 SymtabStart
+= HeaderSize
;
457 SymtabStart
= alignTo(SymtabStart
, 0x1000);
459 // We gathered all the information we need, start emitting the output file.
460 Writer
.writeHeader(MachO::MH_DSYM
, NumLoadCommands
, LoadCommandSize
, false);
462 // Write the load commands.
463 assert(OutFile
.tell() == HeaderSize
);
464 if (UUIDCmd
.cmd
!= 0) {
465 Writer
.W
.write
<uint32_t>(UUIDCmd
.cmd
);
466 Writer
.W
.write
<uint32_t>(sizeof(UUIDCmd
));
467 OutFile
.write(reinterpret_cast<const char *>(UUIDCmd
.uuid
), 16);
468 assert(OutFile
.tell() == HeaderSize
+ sizeof(UUIDCmd
));
470 for (auto Cmd
: BuildVersionCmd
) {
471 Writer
.W
.write
<uint32_t>(Cmd
.cmd
);
472 Writer
.W
.write
<uint32_t>(sizeof(Cmd
));
473 Writer
.W
.write
<uint32_t>(Cmd
.platform
);
474 Writer
.W
.write
<uint32_t>(Cmd
.minos
);
475 Writer
.W
.write
<uint32_t>(Cmd
.sdk
);
476 Writer
.W
.write
<uint32_t>(Cmd
.ntools
);
479 assert(SymtabCmd
.cmd
&& "No symbol table.");
480 uint64_t StringStart
= SymtabStart
+ NumSyms
* NListSize
;
481 if (ShouldEmitSymtab
)
482 Writer
.writeSymtabLoadCommand(SymtabStart
, NumSyms
, StringStart
,
485 uint64_t DwarfSegmentStart
= StringStart
+ NewStringsSize
;
486 DwarfSegmentStart
= alignTo(DwarfSegmentStart
, 0x1000);
488 // Write the load commands for the segments and sections we 'import' from
489 // the original binary.
490 uint64_t EndAddress
= 0;
491 uint64_t GapForDwarf
= UINT64_MAX
;
492 for (auto &LCI
: InputBinary
.load_commands()) {
493 if (LCI
.C
.cmd
== MachO::LC_SEGMENT
)
494 transferSegmentAndSections(LCI
, InputBinary
.getSegmentLoadCommand(LCI
),
495 InputBinary
, Writer
, SymtabStart
,
496 StringStart
+ NewStringsSize
- SymtabStart
,
497 DwarfSegmentSize
, GapForDwarf
, EndAddress
);
498 else if (LCI
.C
.cmd
== MachO::LC_SEGMENT_64
)
499 transferSegmentAndSections(LCI
, InputBinary
.getSegment64LoadCommand(LCI
),
500 InputBinary
, Writer
, SymtabStart
,
501 StringStart
+ NewStringsSize
- SymtabStart
,
502 DwarfSegmentSize
, GapForDwarf
, EndAddress
);
505 uint64_t DwarfVMAddr
= alignTo(EndAddress
, 0x1000);
506 uint64_t DwarfVMMax
= Is64Bit
? UINT64_MAX
: UINT32_MAX
;
507 if (DwarfVMAddr
+ DwarfSegmentSize
> DwarfVMMax
||
508 DwarfVMAddr
+ DwarfSegmentSize
< DwarfVMAddr
/* Overflow */) {
509 // There is no room for the __DWARF segment at the end of the
510 // address space. Look through segments to find a gap.
511 DwarfVMAddr
= GapForDwarf
;
512 if (DwarfVMAddr
== UINT64_MAX
)
513 warn("not enough VM space for the __DWARF segment.",
514 "output file streaming");
517 // Write the load command for the __DWARF segment.
518 createDwarfSegment(DwarfVMAddr
, DwarfSegmentStart
, DwarfSegmentSize
,
519 NumDwarfSections
, Layout
, Writer
);
521 assert(OutFile
.tell() == LoadCommandSize
+ HeaderSize
);
522 OutFile
.write_zeros(SymtabStart
- (LoadCommandSize
+ HeaderSize
));
523 assert(OutFile
.tell() == SymtabStart
);
526 if (ShouldEmitSymtab
) {
527 OutFile
<< NewSymtab
.str();
528 assert(OutFile
.tell() == StringStart
);
530 // Transfer string table.
531 // FIXME: The NonRelocatableStringpool starts with an empty string, but
532 // dsymutil-classic starts the reconstructed string table with 2 of these.
533 // Reproduce that behavior for now (there is corresponding code in
536 std::vector
<DwarfStringPoolEntryRef
> Strings
=
537 NewStrings
.getEntriesForEmission();
538 for (auto EntryRef
: Strings
) {
539 OutFile
.write(EntryRef
.getString().data(),
540 EntryRef
.getString().size() + 1);
544 assert(OutFile
.tell() == StringStart
+ NewStringsSize
);
546 // Pad till the Dwarf segment start.
547 OutFile
.write_zeros(DwarfSegmentStart
- (StringStart
+ NewStringsSize
));
548 assert(OutFile
.tell() == DwarfSegmentStart
);
550 // Emit the Dwarf sections contents.
551 for (const MCSection
&Sec
: MCAsm
) {
552 if (Sec
.begin() == Sec
.end())
555 uint64_t Pos
= OutFile
.tell();
556 OutFile
.write_zeros(alignTo(Pos
, Sec
.getAlignment()) - Pos
);
557 MCAsm
.writeSectionData(OutFile
, &Sec
, Layout
);
562 } // namespace MachOUtils
563 } // namespace dsymutil