1 //===- MachOLayoutBuilder.cpp -----------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "MachOLayoutBuilder.h"
10 #include "llvm/Support/Alignment.h"
11 #include "llvm/Support/Errc.h"
12 #include "llvm/Support/ErrorHandling.h"
15 using namespace llvm::objcopy::macho
;
17 StringTableBuilder::Kind
18 MachOLayoutBuilder::getStringTableBuilderKind(const Object
&O
, bool Is64Bit
) {
19 if (O
.Header
.FileType
== MachO::HeaderFileType::MH_OBJECT
)
20 return Is64Bit
? StringTableBuilder::MachO64
: StringTableBuilder::MachO
;
21 return Is64Bit
? StringTableBuilder::MachO64Linked
22 : StringTableBuilder::MachOLinked
;
25 uint32_t MachOLayoutBuilder::computeSizeOfCmds() const {
27 for (const LoadCommand
&LC
: O
.LoadCommands
) {
28 const MachO::macho_load_command
&MLC
= LC
.MachOLoadCommand
;
29 auto cmd
= MLC
.load_command_data
.cmd
;
31 case MachO::LC_SEGMENT
:
32 Size
+= sizeof(MachO::segment_command
) +
33 sizeof(MachO::section
) * LC
.Sections
.size();
35 case MachO::LC_SEGMENT_64
:
36 Size
+= sizeof(MachO::segment_command_64
) +
37 sizeof(MachO::section_64
) * LC
.Sections
.size();
42 #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
44 Size += sizeof(MachO::LCStruct) + LC.Payload.size(); \
46 #include "llvm/BinaryFormat/MachO.def"
47 #undef HANDLE_LOAD_COMMAND
54 void MachOLayoutBuilder::constructStringTable() {
55 for (std::unique_ptr
<SymbolEntry
> &Sym
: O
.SymTable
.Symbols
)
56 StrTableBuilder
.add(Sym
->Name
);
57 StrTableBuilder
.finalize();
60 void MachOLayoutBuilder::updateSymbolIndexes() {
62 for (auto &Symbol
: O
.SymTable
.Symbols
)
63 Symbol
->Index
= Index
++;
66 // Updates the index and the number of local/external/undefined symbols.
67 void MachOLayoutBuilder::updateDySymTab(MachO::macho_load_command
&MLC
) {
68 assert(MLC
.load_command_data
.cmd
== MachO::LC_DYSYMTAB
);
69 // Make sure that nlist entries in the symbol table are sorted by the those
70 // types. The order is: local < defined external < undefined external.
71 assert(llvm::is_sorted(O
.SymTable
.Symbols
,
72 [](const std::unique_ptr
<SymbolEntry
> &A
,
73 const std::unique_ptr
<SymbolEntry
> &B
) {
74 bool AL
= A
->isLocalSymbol(),
75 BL
= B
->isLocalSymbol();
78 return !AL
&& !A
->isUndefinedSymbol() &&
79 B
->isUndefinedSymbol();
81 "Symbols are not sorted by their types.");
83 uint32_t NumLocalSymbols
= 0;
84 auto Iter
= O
.SymTable
.Symbols
.begin();
85 auto End
= O
.SymTable
.Symbols
.end();
86 for (; Iter
!= End
; ++Iter
) {
87 if ((*Iter
)->isExternalSymbol())
93 uint32_t NumExtDefSymbols
= 0;
94 for (; Iter
!= End
; ++Iter
) {
95 if ((*Iter
)->isUndefinedSymbol())
101 MLC
.dysymtab_command_data
.ilocalsym
= 0;
102 MLC
.dysymtab_command_data
.nlocalsym
= NumLocalSymbols
;
103 MLC
.dysymtab_command_data
.iextdefsym
= NumLocalSymbols
;
104 MLC
.dysymtab_command_data
.nextdefsym
= NumExtDefSymbols
;
105 MLC
.dysymtab_command_data
.iundefsym
= NumLocalSymbols
+ NumExtDefSymbols
;
106 MLC
.dysymtab_command_data
.nundefsym
=
107 O
.SymTable
.Symbols
.size() - (NumLocalSymbols
+ NumExtDefSymbols
);
110 // Recomputes and updates offset and size fields in load commands and sections
111 // since they could be modified.
112 uint64_t MachOLayoutBuilder::layoutSegments() {
114 Is64Bit
? sizeof(MachO::mach_header_64
) : sizeof(MachO::mach_header
);
115 const bool IsObjectFile
=
116 O
.Header
.FileType
== MachO::HeaderFileType::MH_OBJECT
;
117 uint64_t Offset
= IsObjectFile
? (HeaderSize
+ O
.Header
.SizeOfCmds
) : 0;
118 for (LoadCommand
&LC
: O
.LoadCommands
) {
119 auto &MLC
= LC
.MachOLoadCommand
;
121 uint64_t SegmentVmAddr
;
122 uint64_t SegmentVmSize
;
123 switch (MLC
.load_command_data
.cmd
) {
124 case MachO::LC_SEGMENT
:
125 SegmentVmAddr
= MLC
.segment_command_data
.vmaddr
;
126 SegmentVmSize
= MLC
.segment_command_data
.vmsize
;
127 Segname
= StringRef(MLC
.segment_command_data
.segname
,
128 strnlen(MLC
.segment_command_data
.segname
,
129 sizeof(MLC
.segment_command_data
.segname
)));
131 case MachO::LC_SEGMENT_64
:
132 SegmentVmAddr
= MLC
.segment_command_64_data
.vmaddr
;
133 SegmentVmSize
= MLC
.segment_command_64_data
.vmsize
;
134 Segname
= StringRef(MLC
.segment_command_64_data
.segname
,
135 strnlen(MLC
.segment_command_64_data
.segname
,
136 sizeof(MLC
.segment_command_64_data
.segname
)));
142 if (Segname
== "__LINKEDIT") {
143 // We update the __LINKEDIT segment later (in layoutTail).
144 assert(LC
.Sections
.empty() && "__LINKEDIT segment has sections");
145 LinkEditLoadCommand
= &MLC
;
149 // Update file offsets and sizes of sections.
150 uint64_t SegOffset
= Offset
;
151 uint64_t SegFileSize
= 0;
153 for (std::unique_ptr
<Section
> &Sec
: LC
.Sections
) {
154 assert(SegmentVmAddr
<= Sec
->Addr
&&
155 "Section's address cannot be smaller than Segment's one");
156 uint32_t SectOffset
= Sec
->Addr
- SegmentVmAddr
;
158 if (!Sec
->hasValidOffset()) {
161 uint64_t PaddingSize
=
162 offsetToAlignment(SegFileSize
, Align(1ull << Sec
->Align
));
163 Sec
->Offset
= SegOffset
+ SegFileSize
+ PaddingSize
;
164 Sec
->Size
= Sec
->Content
.size();
165 SegFileSize
+= PaddingSize
+ Sec
->Size
;
168 if (!Sec
->hasValidOffset()) {
171 Sec
->Offset
= SegOffset
+ SectOffset
;
172 Sec
->Size
= Sec
->Content
.size();
173 SegFileSize
= std::max(SegFileSize
, SectOffset
+ Sec
->Size
);
176 VMSize
= std::max(VMSize
, SectOffset
+ Sec
->Size
);
180 Offset
+= SegFileSize
;
182 Offset
= alignTo(Offset
+ SegFileSize
, PageSize
);
183 SegFileSize
= alignTo(SegFileSize
, PageSize
);
184 // Use the original vmsize if the segment is __PAGEZERO.
186 Segname
== "__PAGEZERO" ? SegmentVmSize
: alignTo(VMSize
, PageSize
);
189 switch (MLC
.load_command_data
.cmd
) {
190 case MachO::LC_SEGMENT
:
191 MLC
.segment_command_data
.cmdsize
=
192 sizeof(MachO::segment_command
) +
193 sizeof(MachO::section
) * LC
.Sections
.size();
194 MLC
.segment_command_data
.nsects
= LC
.Sections
.size();
195 MLC
.segment_command_data
.fileoff
= SegOffset
;
196 MLC
.segment_command_data
.vmsize
= VMSize
;
197 MLC
.segment_command_data
.filesize
= SegFileSize
;
199 case MachO::LC_SEGMENT_64
:
200 MLC
.segment_command_64_data
.cmdsize
=
201 sizeof(MachO::segment_command_64
) +
202 sizeof(MachO::section_64
) * LC
.Sections
.size();
203 MLC
.segment_command_64_data
.nsects
= LC
.Sections
.size();
204 MLC
.segment_command_64_data
.fileoff
= SegOffset
;
205 MLC
.segment_command_64_data
.vmsize
= VMSize
;
206 MLC
.segment_command_64_data
.filesize
= SegFileSize
;
214 uint64_t MachOLayoutBuilder::layoutRelocations(uint64_t Offset
) {
215 for (LoadCommand
&LC
: O
.LoadCommands
)
216 for (std::unique_ptr
<Section
> &Sec
: LC
.Sections
) {
217 Sec
->RelOff
= Sec
->Relocations
.empty() ? 0 : Offset
;
218 Sec
->NReloc
= Sec
->Relocations
.size();
219 Offset
+= sizeof(MachO::any_relocation_info
) * Sec
->NReloc
;
225 Error
MachOLayoutBuilder::layoutTail(uint64_t Offset
) {
226 // If we are building the layout of an executable or dynamic library
227 // which does not have any segments other than __LINKEDIT,
228 // the Offset can be equal to zero by this time. It happens because of the
229 // convention that in such cases the file offsets specified by LC_SEGMENT
230 // start with zero (unlike the case of a relocatable object file).
231 const uint64_t HeaderSize
=
232 Is64Bit
? sizeof(MachO::mach_header_64
) : sizeof(MachO::mach_header
);
233 assert((!(O
.Header
.FileType
== MachO::HeaderFileType::MH_OBJECT
) ||
234 Offset
>= HeaderSize
+ O
.Header
.SizeOfCmds
) &&
235 "Incorrect tail offset");
236 Offset
= std::max(Offset
, HeaderSize
+ O
.Header
.SizeOfCmds
);
238 // The order of LINKEDIT elements is as follows:
239 // rebase info, binding info, weak binding info, lazy binding info, export
240 // trie, data-in-code, symbol table, indirect symbol table, symbol table
241 // strings, code signature.
242 uint64_t NListSize
= Is64Bit
? sizeof(MachO::nlist_64
) : sizeof(MachO::nlist
);
243 uint64_t StartOfLinkEdit
= Offset
;
244 uint64_t StartOfRebaseInfo
= StartOfLinkEdit
;
245 uint64_t StartOfBindingInfo
= StartOfRebaseInfo
+ O
.Rebases
.Opcodes
.size();
246 uint64_t StartOfWeakBindingInfo
= StartOfBindingInfo
+ O
.Binds
.Opcodes
.size();
247 uint64_t StartOfLazyBindingInfo
=
248 StartOfWeakBindingInfo
+ O
.WeakBinds
.Opcodes
.size();
249 uint64_t StartOfExportTrie
=
250 StartOfLazyBindingInfo
+ O
.LazyBinds
.Opcodes
.size();
251 uint64_t StartOfFunctionStarts
= StartOfExportTrie
+ O
.Exports
.Trie
.size();
252 uint64_t StartOfDyldExportsTrie
=
253 StartOfFunctionStarts
+ O
.FunctionStarts
.Data
.size();
254 uint64_t StartOfChainedFixups
=
255 StartOfDyldExportsTrie
+ O
.ExportsTrie
.Data
.size();
256 uint64_t StartOfDataInCode
=
257 StartOfChainedFixups
+ O
.ChainedFixups
.Data
.size();
258 uint64_t StartOfLinkerOptimizationHint
=
259 StartOfDataInCode
+ O
.DataInCode
.Data
.size();
260 uint64_t StartOfSymbols
=
261 StartOfLinkerOptimizationHint
+ O
.LinkerOptimizationHint
.Data
.size();
262 uint64_t StartOfIndirectSymbols
=
263 StartOfSymbols
+ NListSize
* O
.SymTable
.Symbols
.size();
264 uint64_t StartOfSymbolStrings
=
265 StartOfIndirectSymbols
+
266 sizeof(uint32_t) * O
.IndirectSymTable
.Symbols
.size();
267 uint64_t StartOfCodeSignature
=
268 StartOfSymbolStrings
+ StrTableBuilder
.getSize();
269 uint32_t CodeSignatureSize
= 0;
270 if (O
.CodeSignatureCommandIndex
) {
271 StartOfCodeSignature
= alignTo(StartOfCodeSignature
, 16);
273 // Note: These calculations are to be kept in sync with the same
274 // calculations performed in LLD's CodeSignatureSection.
275 const uint32_t AllHeadersSize
=
276 alignTo(CodeSignature
.FixedHeadersSize
+ OutputFileName
.size() + 1,
277 CodeSignature
.Align
);
278 const uint32_t BlockCount
=
279 (StartOfCodeSignature
+ CodeSignature
.BlockSize
- 1) /
280 CodeSignature
.BlockSize
;
281 const uint32_t Size
=
282 alignTo(AllHeadersSize
+ BlockCount
* CodeSignature
.HashSize
,
283 CodeSignature
.Align
);
285 CodeSignature
.StartOffset
= StartOfCodeSignature
;
286 CodeSignature
.AllHeadersSize
= AllHeadersSize
;
287 CodeSignature
.BlockCount
= BlockCount
;
288 CodeSignature
.OutputFileName
= OutputFileName
;
289 CodeSignature
.Size
= Size
;
290 CodeSignatureSize
= Size
;
292 uint64_t LinkEditSize
=
293 StartOfCodeSignature
+ CodeSignatureSize
- StartOfLinkEdit
;
295 // Now we have determined the layout of the contents of the __LINKEDIT
296 // segment. Update its load command.
297 if (LinkEditLoadCommand
) {
298 MachO::macho_load_command
*MLC
= LinkEditLoadCommand
;
299 switch (LinkEditLoadCommand
->load_command_data
.cmd
) {
300 case MachO::LC_SEGMENT
:
301 MLC
->segment_command_data
.cmdsize
= sizeof(MachO::segment_command
);
302 MLC
->segment_command_data
.fileoff
= StartOfLinkEdit
;
303 MLC
->segment_command_data
.vmsize
= alignTo(LinkEditSize
, PageSize
);
304 MLC
->segment_command_data
.filesize
= LinkEditSize
;
306 case MachO::LC_SEGMENT_64
:
307 MLC
->segment_command_64_data
.cmdsize
= sizeof(MachO::segment_command_64
);
308 MLC
->segment_command_64_data
.fileoff
= StartOfLinkEdit
;
309 MLC
->segment_command_64_data
.vmsize
= alignTo(LinkEditSize
, PageSize
);
310 MLC
->segment_command_64_data
.filesize
= LinkEditSize
;
315 for (LoadCommand
&LC
: O
.LoadCommands
) {
316 auto &MLC
= LC
.MachOLoadCommand
;
317 auto cmd
= MLC
.load_command_data
.cmd
;
319 case MachO::LC_CODE_SIGNATURE
:
320 MLC
.linkedit_data_command_data
.dataoff
= StartOfCodeSignature
;
321 MLC
.linkedit_data_command_data
.datasize
= CodeSignatureSize
;
323 case MachO::LC_SYMTAB
:
324 MLC
.symtab_command_data
.symoff
= StartOfSymbols
;
325 MLC
.symtab_command_data
.nsyms
= O
.SymTable
.Symbols
.size();
326 MLC
.symtab_command_data
.stroff
= StartOfSymbolStrings
;
327 MLC
.symtab_command_data
.strsize
= StrTableBuilder
.getSize();
329 case MachO::LC_DYSYMTAB
: {
330 if (MLC
.dysymtab_command_data
.ntoc
!= 0 ||
331 MLC
.dysymtab_command_data
.nmodtab
!= 0 ||
332 MLC
.dysymtab_command_data
.nextrefsyms
!= 0 ||
333 MLC
.dysymtab_command_data
.nlocrel
!= 0 ||
334 MLC
.dysymtab_command_data
.nextrel
!= 0)
335 return createStringError(llvm::errc::not_supported
,
336 "shared library is not yet supported");
338 if (!O
.IndirectSymTable
.Symbols
.empty()) {
339 MLC
.dysymtab_command_data
.indirectsymoff
= StartOfIndirectSymbols
;
340 MLC
.dysymtab_command_data
.nindirectsyms
=
341 O
.IndirectSymTable
.Symbols
.size();
347 case MachO::LC_DATA_IN_CODE
:
348 MLC
.linkedit_data_command_data
.dataoff
= StartOfDataInCode
;
349 MLC
.linkedit_data_command_data
.datasize
= O
.DataInCode
.Data
.size();
351 case MachO::LC_LINKER_OPTIMIZATION_HINT
:
352 MLC
.linkedit_data_command_data
.dataoff
= StartOfLinkerOptimizationHint
;
353 MLC
.linkedit_data_command_data
.datasize
=
354 O
.LinkerOptimizationHint
.Data
.size();
356 case MachO::LC_FUNCTION_STARTS
:
357 MLC
.linkedit_data_command_data
.dataoff
= StartOfFunctionStarts
;
358 MLC
.linkedit_data_command_data
.datasize
= O
.FunctionStarts
.Data
.size();
360 case MachO::LC_DYLD_CHAINED_FIXUPS
:
361 MLC
.linkedit_data_command_data
.dataoff
= StartOfChainedFixups
;
362 MLC
.linkedit_data_command_data
.datasize
= O
.ChainedFixups
.Data
.size();
364 case MachO::LC_DYLD_EXPORTS_TRIE
:
365 MLC
.linkedit_data_command_data
.dataoff
= StartOfDyldExportsTrie
;
366 MLC
.linkedit_data_command_data
.datasize
= O
.ExportsTrie
.Data
.size();
368 case MachO::LC_DYLD_INFO
:
369 case MachO::LC_DYLD_INFO_ONLY
:
370 MLC
.dyld_info_command_data
.rebase_off
=
371 O
.Rebases
.Opcodes
.empty() ? 0 : StartOfRebaseInfo
;
372 MLC
.dyld_info_command_data
.rebase_size
= O
.Rebases
.Opcodes
.size();
373 MLC
.dyld_info_command_data
.bind_off
=
374 O
.Binds
.Opcodes
.empty() ? 0 : StartOfBindingInfo
;
375 MLC
.dyld_info_command_data
.bind_size
= O
.Binds
.Opcodes
.size();
376 MLC
.dyld_info_command_data
.weak_bind_off
=
377 O
.WeakBinds
.Opcodes
.empty() ? 0 : StartOfWeakBindingInfo
;
378 MLC
.dyld_info_command_data
.weak_bind_size
= O
.WeakBinds
.Opcodes
.size();
379 MLC
.dyld_info_command_data
.lazy_bind_off
=
380 O
.LazyBinds
.Opcodes
.empty() ? 0 : StartOfLazyBindingInfo
;
381 MLC
.dyld_info_command_data
.lazy_bind_size
= O
.LazyBinds
.Opcodes
.size();
382 MLC
.dyld_info_command_data
.export_off
=
383 O
.Exports
.Trie
.empty() ? 0 : StartOfExportTrie
;
384 MLC
.dyld_info_command_data
.export_size
= O
.Exports
.Trie
.size();
386 // Note that LC_ENCRYPTION_INFO.cryptoff despite its name and the comment in
387 // <mach-o/loader.h> is not an offset in the binary file, instead, it is a
388 // relative virtual address. At the moment modification of the __TEXT
389 // segment of executables isn't supported anyway (e.g. data in code entries
390 // are not recalculated). Moreover, in general
391 // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 are nontrivial to update because
392 // without making additional assumptions (e.g. that the entire __TEXT
393 // segment should be encrypted) we do not know how to recalculate the
394 // boundaries of the encrypted part. For now just copy over these load
395 // commands until we encounter a real world usecase where
396 // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 need to be adjusted.
397 case MachO::LC_ENCRYPTION_INFO
:
398 case MachO::LC_ENCRYPTION_INFO_64
:
399 case MachO::LC_LOAD_DYLINKER
:
401 case MachO::LC_RPATH
:
402 case MachO::LC_SEGMENT
:
403 case MachO::LC_SEGMENT_64
:
404 case MachO::LC_VERSION_MIN_MACOSX
:
405 case MachO::LC_VERSION_MIN_IPHONEOS
:
406 case MachO::LC_VERSION_MIN_TVOS
:
407 case MachO::LC_VERSION_MIN_WATCHOS
:
408 case MachO::LC_BUILD_VERSION
:
409 case MachO::LC_ID_DYLIB
:
410 case MachO::LC_LOAD_DYLIB
:
411 case MachO::LC_LOAD_WEAK_DYLIB
:
413 case MachO::LC_SOURCE_VERSION
:
414 case MachO::LC_THREAD
:
415 case MachO::LC_UNIXTHREAD
:
416 case MachO::LC_SUB_FRAMEWORK
:
417 case MachO::LC_SUB_UMBRELLA
:
418 case MachO::LC_SUB_CLIENT
:
419 case MachO::LC_SUB_LIBRARY
:
420 // Nothing to update.
423 // Abort if it's unsupported in order to prevent corrupting the object.
424 return createStringError(llvm::errc::not_supported
,
425 "unsupported load command (cmd=0x%x)", cmd
);
429 return Error::success();
432 Error
MachOLayoutBuilder::layout() {
433 O
.Header
.NCmds
= O
.LoadCommands
.size();
434 O
.Header
.SizeOfCmds
= computeSizeOfCmds();
435 constructStringTable();
436 updateSymbolIndexes();
437 uint64_t Offset
= layoutSegments();
438 Offset
= layoutRelocations(Offset
);
439 return layoutTail(Offset
);