1 //===- MachOLayoutBuilder.cpp -----------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "MachOLayoutBuilder.h"
10 #include "llvm/Support/Alignment.h"
11 #include "llvm/Support/Errc.h"
12 #include "llvm/Support/ErrorHandling.h"
13 #include "llvm/Support/SystemZ/zOSSupport.h"
16 using namespace llvm::objcopy::macho
;
18 StringTableBuilder::Kind
19 MachOLayoutBuilder::getStringTableBuilderKind(const Object
&O
, bool Is64Bit
) {
20 if (O
.Header
.FileType
== MachO::HeaderFileType::MH_OBJECT
)
21 return Is64Bit
? StringTableBuilder::MachO64
: StringTableBuilder::MachO
;
22 return Is64Bit
? StringTableBuilder::MachO64Linked
23 : StringTableBuilder::MachOLinked
;
26 uint32_t MachOLayoutBuilder::computeSizeOfCmds() const {
28 for (const LoadCommand
&LC
: O
.LoadCommands
) {
29 const MachO::macho_load_command
&MLC
= LC
.MachOLoadCommand
;
30 auto cmd
= MLC
.load_command_data
.cmd
;
32 case MachO::LC_SEGMENT
:
33 Size
+= sizeof(MachO::segment_command
) +
34 sizeof(MachO::section
) * LC
.Sections
.size();
36 case MachO::LC_SEGMENT_64
:
37 Size
+= sizeof(MachO::segment_command_64
) +
38 sizeof(MachO::section_64
) * LC
.Sections
.size();
43 #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
45 Size += sizeof(MachO::LCStruct) + LC.Payload.size(); \
47 #include "llvm/BinaryFormat/MachO.def"
48 #undef HANDLE_LOAD_COMMAND
55 void MachOLayoutBuilder::constructStringTable() {
56 for (std::unique_ptr
<SymbolEntry
> &Sym
: O
.SymTable
.Symbols
)
57 StrTableBuilder
.add(Sym
->Name
);
58 StrTableBuilder
.finalize();
61 void MachOLayoutBuilder::updateSymbolIndexes() {
63 for (auto &Symbol
: O
.SymTable
.Symbols
)
64 Symbol
->Index
= Index
++;
67 // Updates the index and the number of local/external/undefined symbols.
68 void MachOLayoutBuilder::updateDySymTab(MachO::macho_load_command
&MLC
) {
69 assert(MLC
.load_command_data
.cmd
== MachO::LC_DYSYMTAB
);
70 // Make sure that nlist entries in the symbol table are sorted by the those
71 // types. The order is: local < defined external < undefined external.
72 assert(llvm::is_sorted(O
.SymTable
.Symbols
,
73 [](const std::unique_ptr
<SymbolEntry
> &A
,
74 const std::unique_ptr
<SymbolEntry
> &B
) {
75 bool AL
= A
->isLocalSymbol(),
76 BL
= B
->isLocalSymbol();
79 return !AL
&& !A
->isUndefinedSymbol() &&
80 B
->isUndefinedSymbol();
82 "Symbols are not sorted by their types.");
84 uint32_t NumLocalSymbols
= 0;
85 auto Iter
= O
.SymTable
.Symbols
.begin();
86 auto End
= O
.SymTable
.Symbols
.end();
87 for (; Iter
!= End
; ++Iter
) {
88 if ((*Iter
)->isExternalSymbol())
94 uint32_t NumExtDefSymbols
= 0;
95 for (; Iter
!= End
; ++Iter
) {
96 if ((*Iter
)->isUndefinedSymbol())
102 MLC
.dysymtab_command_data
.ilocalsym
= 0;
103 MLC
.dysymtab_command_data
.nlocalsym
= NumLocalSymbols
;
104 MLC
.dysymtab_command_data
.iextdefsym
= NumLocalSymbols
;
105 MLC
.dysymtab_command_data
.nextdefsym
= NumExtDefSymbols
;
106 MLC
.dysymtab_command_data
.iundefsym
= NumLocalSymbols
+ NumExtDefSymbols
;
107 MLC
.dysymtab_command_data
.nundefsym
=
108 O
.SymTable
.Symbols
.size() - (NumLocalSymbols
+ NumExtDefSymbols
);
111 // Recomputes and updates offset and size fields in load commands and sections
112 // since they could be modified.
113 uint64_t MachOLayoutBuilder::layoutSegments() {
115 Is64Bit
? sizeof(MachO::mach_header_64
) : sizeof(MachO::mach_header
);
116 const bool IsObjectFile
=
117 O
.Header
.FileType
== MachO::HeaderFileType::MH_OBJECT
;
118 uint64_t Offset
= IsObjectFile
? (HeaderSize
+ O
.Header
.SizeOfCmds
) : 0;
119 for (LoadCommand
&LC
: O
.LoadCommands
) {
120 auto &MLC
= LC
.MachOLoadCommand
;
122 uint64_t SegmentVmAddr
;
123 uint64_t SegmentVmSize
;
124 switch (MLC
.load_command_data
.cmd
) {
125 case MachO::LC_SEGMENT
:
126 SegmentVmAddr
= MLC
.segment_command_data
.vmaddr
;
127 SegmentVmSize
= MLC
.segment_command_data
.vmsize
;
128 Segname
= StringRef(MLC
.segment_command_data
.segname
,
129 strnlen(MLC
.segment_command_data
.segname
,
130 sizeof(MLC
.segment_command_data
.segname
)));
132 case MachO::LC_SEGMENT_64
:
133 SegmentVmAddr
= MLC
.segment_command_64_data
.vmaddr
;
134 SegmentVmSize
= MLC
.segment_command_64_data
.vmsize
;
135 Segname
= StringRef(MLC
.segment_command_64_data
.segname
,
136 strnlen(MLC
.segment_command_64_data
.segname
,
137 sizeof(MLC
.segment_command_64_data
.segname
)));
143 if (Segname
== "__LINKEDIT") {
144 // We update the __LINKEDIT segment later (in layoutTail).
145 assert(LC
.Sections
.empty() && "__LINKEDIT segment has sections");
146 LinkEditLoadCommand
= &MLC
;
150 // Update file offsets and sizes of sections.
151 uint64_t SegOffset
= Offset
;
152 uint64_t SegFileSize
= 0;
154 for (std::unique_ptr
<Section
> &Sec
: LC
.Sections
) {
155 assert(SegmentVmAddr
<= Sec
->Addr
&&
156 "Section's address cannot be smaller than Segment's one");
157 uint32_t SectOffset
= Sec
->Addr
- SegmentVmAddr
;
159 if (!Sec
->hasValidOffset()) {
162 uint64_t PaddingSize
=
163 offsetToAlignment(SegFileSize
, Align(1ull << Sec
->Align
));
164 Sec
->Offset
= SegOffset
+ SegFileSize
+ PaddingSize
;
165 Sec
->Size
= Sec
->Content
.size();
166 SegFileSize
+= PaddingSize
+ Sec
->Size
;
169 if (!Sec
->hasValidOffset()) {
172 Sec
->Offset
= SegOffset
+ SectOffset
;
173 Sec
->Size
= Sec
->Content
.size();
174 SegFileSize
= std::max(SegFileSize
, SectOffset
+ Sec
->Size
);
177 VMSize
= std::max(VMSize
, SectOffset
+ Sec
->Size
);
181 Offset
+= SegFileSize
;
183 Offset
= alignTo(Offset
+ SegFileSize
, PageSize
);
184 SegFileSize
= alignTo(SegFileSize
, PageSize
);
185 // Use the original vmsize if the segment is __PAGEZERO.
187 Segname
== "__PAGEZERO" ? SegmentVmSize
: alignTo(VMSize
, PageSize
);
190 switch (MLC
.load_command_data
.cmd
) {
191 case MachO::LC_SEGMENT
:
192 MLC
.segment_command_data
.cmdsize
=
193 sizeof(MachO::segment_command
) +
194 sizeof(MachO::section
) * LC
.Sections
.size();
195 MLC
.segment_command_data
.nsects
= LC
.Sections
.size();
196 MLC
.segment_command_data
.fileoff
= SegOffset
;
197 MLC
.segment_command_data
.vmsize
= VMSize
;
198 MLC
.segment_command_data
.filesize
= SegFileSize
;
200 case MachO::LC_SEGMENT_64
:
201 MLC
.segment_command_64_data
.cmdsize
=
202 sizeof(MachO::segment_command_64
) +
203 sizeof(MachO::section_64
) * LC
.Sections
.size();
204 MLC
.segment_command_64_data
.nsects
= LC
.Sections
.size();
205 MLC
.segment_command_64_data
.fileoff
= SegOffset
;
206 MLC
.segment_command_64_data
.vmsize
= VMSize
;
207 MLC
.segment_command_64_data
.filesize
= SegFileSize
;
215 uint64_t MachOLayoutBuilder::layoutRelocations(uint64_t Offset
) {
216 for (LoadCommand
&LC
: O
.LoadCommands
)
217 for (std::unique_ptr
<Section
> &Sec
: LC
.Sections
) {
218 Sec
->RelOff
= Sec
->Relocations
.empty() ? 0 : Offset
;
219 Sec
->NReloc
= Sec
->Relocations
.size();
220 Offset
+= sizeof(MachO::any_relocation_info
) * Sec
->NReloc
;
226 Error
MachOLayoutBuilder::layoutTail(uint64_t Offset
) {
227 // If we are building the layout of an executable or dynamic library
228 // which does not have any segments other than __LINKEDIT,
229 // the Offset can be equal to zero by this time. It happens because of the
230 // convention that in such cases the file offsets specified by LC_SEGMENT
231 // start with zero (unlike the case of a relocatable object file).
232 const uint64_t HeaderSize
=
233 Is64Bit
? sizeof(MachO::mach_header_64
) : sizeof(MachO::mach_header
);
234 assert((!(O
.Header
.FileType
== MachO::HeaderFileType::MH_OBJECT
) ||
235 Offset
>= HeaderSize
+ O
.Header
.SizeOfCmds
) &&
236 "Incorrect tail offset");
237 Offset
= std::max(Offset
, HeaderSize
+ O
.Header
.SizeOfCmds
);
239 // The exports trie can be in either LC_DYLD_INFO or in
240 // LC_DYLD_EXPORTS_TRIE, but not both.
241 size_t DyldInfoExportsTrieSize
= 0;
242 size_t DyldExportsTrieSize
= 0;
243 for (const auto &LC
: O
.LoadCommands
) {
244 switch (LC
.MachOLoadCommand
.load_command_data
.cmd
) {
245 case MachO::LC_DYLD_INFO
:
246 case MachO::LC_DYLD_INFO_ONLY
:
247 DyldInfoExportsTrieSize
= O
.Exports
.Trie
.size();
249 case MachO::LC_DYLD_EXPORTS_TRIE
:
250 DyldExportsTrieSize
= O
.Exports
.Trie
.size();
256 assert((DyldInfoExportsTrieSize
== 0 || DyldExportsTrieSize
== 0) &&
257 "Export trie in both LCs");
259 uint64_t NListSize
= Is64Bit
? sizeof(MachO::nlist_64
) : sizeof(MachO::nlist
);
260 uint64_t StartOfLinkEdit
= Offset
;
262 // The order of LINKEDIT elements is as follows:
263 // rebase info, binding info, weak binding info, lazy binding info, export
264 // trie, chained fixups, dyld exports trie, function starts, data-in-code,
265 // symbol table, indirect symbol table, symbol table strings,
266 // dylib codesign drs, and code signature.
267 auto updateOffset
= [&Offset
](size_t Size
) {
268 uint64_t PreviousOffset
= Offset
;
270 return PreviousOffset
;
273 uint64_t StartOfRebaseInfo
= updateOffset(O
.Rebases
.Opcodes
.size());
274 uint64_t StartOfBindingInfo
= updateOffset(O
.Binds
.Opcodes
.size());
275 uint64_t StartOfWeakBindingInfo
= updateOffset(O
.WeakBinds
.Opcodes
.size());
276 uint64_t StartOfLazyBindingInfo
= updateOffset(O
.LazyBinds
.Opcodes
.size());
277 uint64_t StartOfExportTrie
= updateOffset(DyldInfoExportsTrieSize
);
278 uint64_t StartOfChainedFixups
= updateOffset(O
.ChainedFixups
.Data
.size());
279 uint64_t StartOfDyldExportsTrie
= updateOffset(DyldExportsTrieSize
);
280 uint64_t StartOfFunctionStarts
= updateOffset(O
.FunctionStarts
.Data
.size());
281 uint64_t StartOfDataInCode
= updateOffset(O
.DataInCode
.Data
.size());
282 uint64_t StartOfLinkerOptimizationHint
=
283 updateOffset(O
.LinkerOptimizationHint
.Data
.size());
284 uint64_t StartOfSymbols
= updateOffset(NListSize
* O
.SymTable
.Symbols
.size());
285 uint64_t StartOfIndirectSymbols
=
286 updateOffset(sizeof(uint32_t) * O
.IndirectSymTable
.Symbols
.size());
287 uint64_t StartOfSymbolStrings
= updateOffset(StrTableBuilder
.getSize());
288 uint64_t StartOfDylibCodeSignDRs
= updateOffset(O
.DylibCodeSignDRs
.Data
.size());
290 uint64_t StartOfCodeSignature
= Offset
;
291 uint32_t CodeSignatureSize
= 0;
292 if (O
.CodeSignatureCommandIndex
) {
293 StartOfCodeSignature
= alignTo(StartOfCodeSignature
, 16);
295 // Note: These calculations are to be kept in sync with the same
296 // calculations performed in LLD's CodeSignatureSection.
297 const uint32_t AllHeadersSize
=
298 alignTo(CodeSignature
.FixedHeadersSize
+ OutputFileName
.size() + 1,
299 CodeSignature
.Align
);
300 const uint32_t BlockCount
=
301 (StartOfCodeSignature
+ CodeSignature
.BlockSize
- 1) /
302 CodeSignature
.BlockSize
;
303 const uint32_t Size
=
304 alignTo(AllHeadersSize
+ BlockCount
* CodeSignature
.HashSize
,
305 CodeSignature
.Align
);
307 CodeSignature
.StartOffset
= StartOfCodeSignature
;
308 CodeSignature
.AllHeadersSize
= AllHeadersSize
;
309 CodeSignature
.BlockCount
= BlockCount
;
310 CodeSignature
.OutputFileName
= OutputFileName
;
311 CodeSignature
.Size
= Size
;
312 CodeSignatureSize
= Size
;
314 uint64_t LinkEditSize
=
315 StartOfCodeSignature
+ CodeSignatureSize
- StartOfLinkEdit
;
317 // Now we have determined the layout of the contents of the __LINKEDIT
318 // segment. Update its load command.
319 if (LinkEditLoadCommand
) {
320 MachO::macho_load_command
*MLC
= LinkEditLoadCommand
;
321 switch (LinkEditLoadCommand
->load_command_data
.cmd
) {
322 case MachO::LC_SEGMENT
:
323 MLC
->segment_command_data
.cmdsize
= sizeof(MachO::segment_command
);
324 MLC
->segment_command_data
.fileoff
= StartOfLinkEdit
;
325 MLC
->segment_command_data
.vmsize
= alignTo(LinkEditSize
, PageSize
);
326 MLC
->segment_command_data
.filesize
= LinkEditSize
;
328 case MachO::LC_SEGMENT_64
:
329 MLC
->segment_command_64_data
.cmdsize
= sizeof(MachO::segment_command_64
);
330 MLC
->segment_command_64_data
.fileoff
= StartOfLinkEdit
;
331 MLC
->segment_command_64_data
.vmsize
= alignTo(LinkEditSize
, PageSize
);
332 MLC
->segment_command_64_data
.filesize
= LinkEditSize
;
337 for (LoadCommand
&LC
: O
.LoadCommands
) {
338 auto &MLC
= LC
.MachOLoadCommand
;
339 auto cmd
= MLC
.load_command_data
.cmd
;
341 case MachO::LC_CODE_SIGNATURE
:
342 MLC
.linkedit_data_command_data
.dataoff
= StartOfCodeSignature
;
343 MLC
.linkedit_data_command_data
.datasize
= CodeSignatureSize
;
345 case MachO::LC_DYLIB_CODE_SIGN_DRS
:
346 MLC
.linkedit_data_command_data
.dataoff
= StartOfDylibCodeSignDRs
;
347 MLC
.linkedit_data_command_data
.datasize
= O
.DylibCodeSignDRs
.Data
.size();
349 case MachO::LC_SYMTAB
:
350 MLC
.symtab_command_data
.symoff
= StartOfSymbols
;
351 MLC
.symtab_command_data
.nsyms
= O
.SymTable
.Symbols
.size();
352 MLC
.symtab_command_data
.stroff
= StartOfSymbolStrings
;
353 MLC
.symtab_command_data
.strsize
= StrTableBuilder
.getSize();
355 case MachO::LC_DYSYMTAB
: {
356 if (MLC
.dysymtab_command_data
.ntoc
!= 0 ||
357 MLC
.dysymtab_command_data
.nmodtab
!= 0 ||
358 MLC
.dysymtab_command_data
.nextrefsyms
!= 0 ||
359 MLC
.dysymtab_command_data
.nlocrel
!= 0 ||
360 MLC
.dysymtab_command_data
.nextrel
!= 0)
361 return createStringError(llvm::errc::not_supported
,
362 "shared library is not yet supported");
363 MLC
.dysymtab_command_data
.indirectsymoff
=
364 O
.IndirectSymTable
.Symbols
.size() ? StartOfIndirectSymbols
: 0;
365 MLC
.dysymtab_command_data
.nindirectsyms
=
366 O
.IndirectSymTable
.Symbols
.size();
370 case MachO::LC_DATA_IN_CODE
:
371 MLC
.linkedit_data_command_data
.dataoff
= StartOfDataInCode
;
372 MLC
.linkedit_data_command_data
.datasize
= O
.DataInCode
.Data
.size();
374 case MachO::LC_LINKER_OPTIMIZATION_HINT
:
375 MLC
.linkedit_data_command_data
.dataoff
= StartOfLinkerOptimizationHint
;
376 MLC
.linkedit_data_command_data
.datasize
=
377 O
.LinkerOptimizationHint
.Data
.size();
379 case MachO::LC_FUNCTION_STARTS
:
380 MLC
.linkedit_data_command_data
.dataoff
= StartOfFunctionStarts
;
381 MLC
.linkedit_data_command_data
.datasize
= O
.FunctionStarts
.Data
.size();
383 case MachO::LC_DYLD_CHAINED_FIXUPS
:
384 MLC
.linkedit_data_command_data
.dataoff
= StartOfChainedFixups
;
385 MLC
.linkedit_data_command_data
.datasize
= O
.ChainedFixups
.Data
.size();
387 case MachO::LC_DYLD_EXPORTS_TRIE
:
388 MLC
.linkedit_data_command_data
.dataoff
= StartOfDyldExportsTrie
;
389 MLC
.linkedit_data_command_data
.datasize
= DyldExportsTrieSize
;
391 case MachO::LC_DYLD_INFO
:
392 case MachO::LC_DYLD_INFO_ONLY
:
393 MLC
.dyld_info_command_data
.rebase_off
=
394 O
.Rebases
.Opcodes
.empty() ? 0 : StartOfRebaseInfo
;
395 MLC
.dyld_info_command_data
.rebase_size
= O
.Rebases
.Opcodes
.size();
396 MLC
.dyld_info_command_data
.bind_off
=
397 O
.Binds
.Opcodes
.empty() ? 0 : StartOfBindingInfo
;
398 MLC
.dyld_info_command_data
.bind_size
= O
.Binds
.Opcodes
.size();
399 MLC
.dyld_info_command_data
.weak_bind_off
=
400 O
.WeakBinds
.Opcodes
.empty() ? 0 : StartOfWeakBindingInfo
;
401 MLC
.dyld_info_command_data
.weak_bind_size
= O
.WeakBinds
.Opcodes
.size();
402 MLC
.dyld_info_command_data
.lazy_bind_off
=
403 O
.LazyBinds
.Opcodes
.empty() ? 0 : StartOfLazyBindingInfo
;
404 MLC
.dyld_info_command_data
.lazy_bind_size
= O
.LazyBinds
.Opcodes
.size();
405 MLC
.dyld_info_command_data
.export_off
=
406 O
.Exports
.Trie
.empty() ? 0 : StartOfExportTrie
;
407 MLC
.dyld_info_command_data
.export_size
= DyldInfoExportsTrieSize
;
409 // Note that LC_ENCRYPTION_INFO.cryptoff despite its name and the comment in
410 // <mach-o/loader.h> is not an offset in the binary file, instead, it is a
411 // relative virtual address. At the moment modification of the __TEXT
412 // segment of executables isn't supported anyway (e.g. data in code entries
413 // are not recalculated). Moreover, in general
414 // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 are nontrivial to update because
415 // without making additional assumptions (e.g. that the entire __TEXT
416 // segment should be encrypted) we do not know how to recalculate the
417 // boundaries of the encrypted part. For now just copy over these load
418 // commands until we encounter a real world usecase where
419 // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 need to be adjusted.
420 case MachO::LC_ENCRYPTION_INFO
:
421 case MachO::LC_ENCRYPTION_INFO_64
:
422 case MachO::LC_LOAD_DYLINKER
:
424 case MachO::LC_RPATH
:
425 case MachO::LC_SEGMENT
:
426 case MachO::LC_SEGMENT_64
:
427 case MachO::LC_VERSION_MIN_MACOSX
:
428 case MachO::LC_VERSION_MIN_IPHONEOS
:
429 case MachO::LC_VERSION_MIN_TVOS
:
430 case MachO::LC_VERSION_MIN_WATCHOS
:
431 case MachO::LC_BUILD_VERSION
:
432 case MachO::LC_ID_DYLIB
:
433 case MachO::LC_LOAD_DYLIB
:
434 case MachO::LC_LOAD_WEAK_DYLIB
:
436 case MachO::LC_SOURCE_VERSION
:
437 case MachO::LC_THREAD
:
438 case MachO::LC_UNIXTHREAD
:
439 case MachO::LC_SUB_FRAMEWORK
:
440 case MachO::LC_SUB_UMBRELLA
:
441 case MachO::LC_SUB_CLIENT
:
442 case MachO::LC_SUB_LIBRARY
:
443 case MachO::LC_LINKER_OPTION
:
444 // Nothing to update.
447 // Abort if it's unsupported in order to prevent corrupting the object.
448 return createStringError(llvm::errc::not_supported
,
449 "unsupported load command (cmd=0x%x)", cmd
);
453 return Error::success();
456 Error
MachOLayoutBuilder::layout() {
457 O
.Header
.NCmds
= O
.LoadCommands
.size();
458 O
.Header
.SizeOfCmds
= computeSizeOfCmds();
459 constructStringTable();
460 updateSymbolIndexes();
461 uint64_t Offset
= layoutSegments();
462 Offset
= layoutRelocations(Offset
);
463 return layoutTail(Offset
);