1 //===- Writer.cpp ---------------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 #include "ConcatOutputSection.h"
12 #include "InputFiles.h"
13 #include "InputSection.h"
15 #include "OutputSection.h"
16 #include "OutputSegment.h"
17 #include "SectionPriorities.h"
18 #include "SymbolTable.h"
20 #include "SyntheticSections.h"
22 #include "UnwindInfoSection.h"
23 #include "llvm/Support/Parallel.h"
25 #include "lld/Common/Arrays.h"
26 #include "lld/Common/CommonLinkerContext.h"
27 #include "llvm/BinaryFormat/MachO.h"
28 #include "llvm/Config/llvm-config.h"
29 #include "llvm/Support/LEB128.h"
30 #include "llvm/Support/MathExtras.h"
31 #include "llvm/Support/Parallel.h"
32 #include "llvm/Support/Path.h"
33 #include "llvm/Support/ThreadPool.h"
34 #include "llvm/Support/TimeProfiler.h"
35 #include "llvm/Support/xxhash.h"
40 using namespace llvm::MachO
;
41 using namespace llvm::sys
;
43 using namespace lld::macho
;
50 Writer() : buffer(errorHandler().outputBuffer
) {}
52 void treatSpecialUndefineds();
53 void scanRelocations();
55 template <class LP
> void createOutputSections();
56 template <class LP
> void createLoadCommands();
57 void finalizeAddresses();
58 void finalizeLinkEditSegment();
59 void assignAddresses(OutputSegment
*);
64 void writeCodeSignature();
65 void writeOutputFile();
67 template <class LP
> void run();
69 ThreadPool threadPool
;
70 std::unique_ptr
<FileOutputBuffer
> &buffer
;
73 MachHeaderSection
*header
= nullptr;
74 StringTableSection
*stringTableSection
= nullptr;
75 SymtabSection
*symtabSection
= nullptr;
76 IndirectSymtabSection
*indirectSymtabSection
= nullptr;
77 CodeSignatureSection
*codeSignatureSection
= nullptr;
78 DataInCodeSection
*dataInCodeSection
= nullptr;
79 FunctionStartsSection
*functionStartsSection
= nullptr;
81 LCUuid
*uuidCommand
= nullptr;
82 OutputSegment
*linkEditSegment
= nullptr;
85 // LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information.
86 class LCDyldInfo final
: public LoadCommand
{
88 LCDyldInfo(RebaseSection
*rebaseSection
, BindingSection
*bindingSection
,
89 WeakBindingSection
*weakBindingSection
,
90 LazyBindingSection
*lazyBindingSection
,
91 ExportSection
*exportSection
)
92 : rebaseSection(rebaseSection
), bindingSection(bindingSection
),
93 weakBindingSection(weakBindingSection
),
94 lazyBindingSection(lazyBindingSection
), exportSection(exportSection
) {}
96 uint32_t getSize() const override
{ return sizeof(dyld_info_command
); }
98 void writeTo(uint8_t *buf
) const override
{
99 auto *c
= reinterpret_cast<dyld_info_command
*>(buf
);
100 c
->cmd
= LC_DYLD_INFO_ONLY
;
101 c
->cmdsize
= getSize();
102 if (rebaseSection
->isNeeded()) {
103 c
->rebase_off
= rebaseSection
->fileOff
;
104 c
->rebase_size
= rebaseSection
->getFileSize();
106 if (bindingSection
->isNeeded()) {
107 c
->bind_off
= bindingSection
->fileOff
;
108 c
->bind_size
= bindingSection
->getFileSize();
110 if (weakBindingSection
->isNeeded()) {
111 c
->weak_bind_off
= weakBindingSection
->fileOff
;
112 c
->weak_bind_size
= weakBindingSection
->getFileSize();
114 if (lazyBindingSection
->isNeeded()) {
115 c
->lazy_bind_off
= lazyBindingSection
->fileOff
;
116 c
->lazy_bind_size
= lazyBindingSection
->getFileSize();
118 if (exportSection
->isNeeded()) {
119 c
->export_off
= exportSection
->fileOff
;
120 c
->export_size
= exportSection
->getFileSize();
124 RebaseSection
*rebaseSection
;
125 BindingSection
*bindingSection
;
126 WeakBindingSection
*weakBindingSection
;
127 LazyBindingSection
*lazyBindingSection
;
128 ExportSection
*exportSection
;
131 class LCSubFramework final
: public LoadCommand
{
133 LCSubFramework(StringRef umbrella
) : umbrella(umbrella
) {}
135 uint32_t getSize() const override
{
136 return alignTo(sizeof(sub_framework_command
) + umbrella
.size() + 1,
140 void writeTo(uint8_t *buf
) const override
{
141 auto *c
= reinterpret_cast<sub_framework_command
*>(buf
);
142 buf
+= sizeof(sub_framework_command
);
144 c
->cmd
= LC_SUB_FRAMEWORK
;
145 c
->cmdsize
= getSize();
146 c
->umbrella
= sizeof(sub_framework_command
);
148 memcpy(buf
, umbrella
.data(), umbrella
.size());
149 buf
[umbrella
.size()] = '\0';
153 const StringRef umbrella
;
156 class LCFunctionStarts final
: public LoadCommand
{
158 explicit LCFunctionStarts(FunctionStartsSection
*functionStartsSection
)
159 : functionStartsSection(functionStartsSection
) {}
161 uint32_t getSize() const override
{ return sizeof(linkedit_data_command
); }
163 void writeTo(uint8_t *buf
) const override
{
164 auto *c
= reinterpret_cast<linkedit_data_command
*>(buf
);
165 c
->cmd
= LC_FUNCTION_STARTS
;
166 c
->cmdsize
= getSize();
167 c
->dataoff
= functionStartsSection
->fileOff
;
168 c
->datasize
= functionStartsSection
->getFileSize();
172 FunctionStartsSection
*functionStartsSection
;
175 class LCDataInCode final
: public LoadCommand
{
177 explicit LCDataInCode(DataInCodeSection
*dataInCodeSection
)
178 : dataInCodeSection(dataInCodeSection
) {}
180 uint32_t getSize() const override
{ return sizeof(linkedit_data_command
); }
182 void writeTo(uint8_t *buf
) const override
{
183 auto *c
= reinterpret_cast<linkedit_data_command
*>(buf
);
184 c
->cmd
= LC_DATA_IN_CODE
;
185 c
->cmdsize
= getSize();
186 c
->dataoff
= dataInCodeSection
->fileOff
;
187 c
->datasize
= dataInCodeSection
->getFileSize();
191 DataInCodeSection
*dataInCodeSection
;
194 class LCDysymtab final
: public LoadCommand
{
196 LCDysymtab(SymtabSection
*symtabSection
,
197 IndirectSymtabSection
*indirectSymtabSection
)
198 : symtabSection(symtabSection
),
199 indirectSymtabSection(indirectSymtabSection
) {}
201 uint32_t getSize() const override
{ return sizeof(dysymtab_command
); }
203 void writeTo(uint8_t *buf
) const override
{
204 auto *c
= reinterpret_cast<dysymtab_command
*>(buf
);
205 c
->cmd
= LC_DYSYMTAB
;
206 c
->cmdsize
= getSize();
209 c
->iextdefsym
= c
->nlocalsym
= symtabSection
->getNumLocalSymbols();
210 c
->nextdefsym
= symtabSection
->getNumExternalSymbols();
211 c
->iundefsym
= c
->iextdefsym
+ c
->nextdefsym
;
212 c
->nundefsym
= symtabSection
->getNumUndefinedSymbols();
214 c
->indirectsymoff
= indirectSymtabSection
->fileOff
;
215 c
->nindirectsyms
= indirectSymtabSection
->getNumSymbols();
218 SymtabSection
*symtabSection
;
219 IndirectSymtabSection
*indirectSymtabSection
;
222 template <class LP
> class LCSegment final
: public LoadCommand
{
224 LCSegment(StringRef name
, OutputSegment
*seg
) : name(name
), seg(seg
) {}
226 uint32_t getSize() const override
{
227 return sizeof(typename
LP::segment_command
) +
228 seg
->numNonHiddenSections() * sizeof(typename
LP::section
);
231 void writeTo(uint8_t *buf
) const override
{
232 using SegmentCommand
= typename
LP::segment_command
;
233 using SectionHeader
= typename
LP::section
;
235 auto *c
= reinterpret_cast<SegmentCommand
*>(buf
);
236 buf
+= sizeof(SegmentCommand
);
238 c
->cmd
= LP::segmentLCType
;
239 c
->cmdsize
= getSize();
240 memcpy(c
->segname
, name
.data(), name
.size());
241 c
->fileoff
= seg
->fileOff
;
242 c
->maxprot
= seg
->maxProt
;
243 c
->initprot
= seg
->initProt
;
245 c
->vmaddr
= seg
->addr
;
246 c
->vmsize
= seg
->vmSize
;
247 c
->filesize
= seg
->fileSize
;
248 c
->nsects
= seg
->numNonHiddenSections();
250 for (const OutputSection
*osec
: seg
->getSections()) {
251 if (osec
->isHidden())
254 auto *sectHdr
= reinterpret_cast<SectionHeader
*>(buf
);
255 buf
+= sizeof(SectionHeader
);
257 memcpy(sectHdr
->sectname
, osec
->name
.data(), osec
->name
.size());
258 memcpy(sectHdr
->segname
, name
.data(), name
.size());
260 sectHdr
->addr
= osec
->addr
;
261 sectHdr
->offset
= osec
->fileOff
;
262 sectHdr
->align
= Log2_32(osec
->align
);
263 sectHdr
->flags
= osec
->flags
;
264 sectHdr
->size
= osec
->getSize();
265 sectHdr
->reserved1
= osec
->reserved1
;
266 sectHdr
->reserved2
= osec
->reserved2
;
275 class LCMain final
: public LoadCommand
{
276 uint32_t getSize() const override
{
277 return sizeof(structs::entry_point_command
);
280 void writeTo(uint8_t *buf
) const override
{
281 auto *c
= reinterpret_cast<structs::entry_point_command
*>(buf
);
283 c
->cmdsize
= getSize();
285 if (config
->entry
->isInStubs())
287 in
.stubs
->fileOff
+ config
->entry
->stubsIndex
* target
->stubSize
;
289 c
->entryoff
= config
->entry
->getVA() - in
.header
->addr
;
295 class LCSymtab final
: public LoadCommand
{
297 LCSymtab(SymtabSection
*symtabSection
, StringTableSection
*stringTableSection
)
298 : symtabSection(symtabSection
), stringTableSection(stringTableSection
) {}
300 uint32_t getSize() const override
{ return sizeof(symtab_command
); }
302 void writeTo(uint8_t *buf
) const override
{
303 auto *c
= reinterpret_cast<symtab_command
*>(buf
);
305 c
->cmdsize
= getSize();
306 c
->symoff
= symtabSection
->fileOff
;
307 c
->nsyms
= symtabSection
->getNumSymbols();
308 c
->stroff
= stringTableSection
->fileOff
;
309 c
->strsize
= stringTableSection
->getFileSize();
312 SymtabSection
*symtabSection
= nullptr;
313 StringTableSection
*stringTableSection
= nullptr;
316 // There are several dylib load commands that share the same structure:
319 // * LC_REEXPORT_DYLIB
320 class LCDylib final
: public LoadCommand
{
322 LCDylib(LoadCommandType type
, StringRef path
,
323 uint32_t compatibilityVersion
= 0, uint32_t currentVersion
= 0)
324 : type(type
), path(path
), compatibilityVersion(compatibilityVersion
),
325 currentVersion(currentVersion
) {
329 uint32_t getSize() const override
{
330 return alignTo(sizeof(dylib_command
) + path
.size() + 1, 8);
333 void writeTo(uint8_t *buf
) const override
{
334 auto *c
= reinterpret_cast<dylib_command
*>(buf
);
335 buf
+= sizeof(dylib_command
);
338 c
->cmdsize
= getSize();
339 c
->dylib
.name
= sizeof(dylib_command
);
340 c
->dylib
.timestamp
= 0;
341 c
->dylib
.compatibility_version
= compatibilityVersion
;
342 c
->dylib
.current_version
= currentVersion
;
344 memcpy(buf
, path
.data(), path
.size());
345 buf
[path
.size()] = '\0';
348 static uint32_t getInstanceCount() { return instanceCount
; }
349 static void resetInstanceCount() { instanceCount
= 0; }
352 LoadCommandType type
;
354 uint32_t compatibilityVersion
;
355 uint32_t currentVersion
;
356 static uint32_t instanceCount
;
359 uint32_t LCDylib::instanceCount
= 0;
361 class LCLoadDylinker final
: public LoadCommand
{
363 uint32_t getSize() const override
{
364 return alignTo(sizeof(dylinker_command
) + path
.size() + 1, 8);
367 void writeTo(uint8_t *buf
) const override
{
368 auto *c
= reinterpret_cast<dylinker_command
*>(buf
);
369 buf
+= sizeof(dylinker_command
);
371 c
->cmd
= LC_LOAD_DYLINKER
;
372 c
->cmdsize
= getSize();
373 c
->name
= sizeof(dylinker_command
);
375 memcpy(buf
, path
.data(), path
.size());
376 buf
[path
.size()] = '\0';
380 // Recent versions of Darwin won't run any binary that has dyld at a
381 // different location.
382 const StringRef path
= "/usr/lib/dyld";
385 class LCRPath final
: public LoadCommand
{
387 explicit LCRPath(StringRef path
) : path(path
) {}
389 uint32_t getSize() const override
{
390 return alignTo(sizeof(rpath_command
) + path
.size() + 1, target
->wordSize
);
393 void writeTo(uint8_t *buf
) const override
{
394 auto *c
= reinterpret_cast<rpath_command
*>(buf
);
395 buf
+= sizeof(rpath_command
);
398 c
->cmdsize
= getSize();
399 c
->path
= sizeof(rpath_command
);
401 memcpy(buf
, path
.data(), path
.size());
402 buf
[path
.size()] = '\0';
409 class LCMinVersion final
: public LoadCommand
{
411 explicit LCMinVersion(const PlatformInfo
&platformInfo
)
412 : platformInfo(platformInfo
) {}
414 uint32_t getSize() const override
{ return sizeof(version_min_command
); }
416 void writeTo(uint8_t *buf
) const override
{
417 auto *c
= reinterpret_cast<version_min_command
*>(buf
);
418 switch (platformInfo
.target
.Platform
) {
420 c
->cmd
= LC_VERSION_MIN_MACOSX
;
423 case PLATFORM_IOSSIMULATOR
:
424 c
->cmd
= LC_VERSION_MIN_IPHONEOS
;
427 case PLATFORM_TVOSSIMULATOR
:
428 c
->cmd
= LC_VERSION_MIN_TVOS
;
430 case PLATFORM_WATCHOS
:
431 case PLATFORM_WATCHOSSIMULATOR
:
432 c
->cmd
= LC_VERSION_MIN_WATCHOS
;
435 llvm_unreachable("invalid platform");
438 c
->cmdsize
= getSize();
439 c
->version
= encodeVersion(platformInfo
.minimum
);
440 c
->sdk
= encodeVersion(platformInfo
.sdk
);
444 const PlatformInfo
&platformInfo
;
447 class LCBuildVersion final
: public LoadCommand
{
449 explicit LCBuildVersion(const PlatformInfo
&platformInfo
)
450 : platformInfo(platformInfo
) {}
452 const int ntools
= 1;
454 uint32_t getSize() const override
{
455 return sizeof(build_version_command
) + ntools
* sizeof(build_tool_version
);
458 void writeTo(uint8_t *buf
) const override
{
459 auto *c
= reinterpret_cast<build_version_command
*>(buf
);
460 c
->cmd
= LC_BUILD_VERSION
;
461 c
->cmdsize
= getSize();
463 c
->platform
= static_cast<uint32_t>(platformInfo
.target
.Platform
);
464 c
->minos
= encodeVersion(platformInfo
.minimum
);
465 c
->sdk
= encodeVersion(platformInfo
.sdk
);
468 auto *t
= reinterpret_cast<build_tool_version
*>(&c
[1]);
470 t
->version
= encodeVersion(VersionTuple(
471 LLVM_VERSION_MAJOR
, LLVM_VERSION_MINOR
, LLVM_VERSION_PATCH
));
475 const PlatformInfo
&platformInfo
;
478 // Stores a unique identifier for the output file based on an MD5 hash of its
479 // contents. In order to hash the contents, we must first write them, but
480 // LC_UUID itself must be part of the written contents in order for all the
481 // offsets to be calculated correctly. We resolve this circular paradox by
482 // first writing an LC_UUID with an all-zero UUID, then updating the UUID with
483 // its real value later.
484 class LCUuid final
: public LoadCommand
{
486 uint32_t getSize() const override
{ return sizeof(uuid_command
); }
488 void writeTo(uint8_t *buf
) const override
{
489 auto *c
= reinterpret_cast<uuid_command
*>(buf
);
491 c
->cmdsize
= getSize();
495 void writeUuid(uint64_t digest
) const {
496 // xxhash only gives us 8 bytes, so put some fixed data in the other half.
497 static_assert(sizeof(uuid_command::uuid
) == 16, "unexpected uuid size");
498 memcpy(uuidBuf
, "LLD\xa1UU1D", 8);
499 memcpy(uuidBuf
+ 8, &digest
, 8);
501 // RFC 4122 conformance. We need to fix 4 bits in byte 6 and 2 bits in
502 // byte 8. Byte 6 is already fine due to the fixed data we put in. We don't
503 // want to lose bits of the digest in byte 8, so swap that with a byte of
504 // fixed data that happens to have the right bits set.
505 std::swap(uuidBuf
[3], uuidBuf
[8]);
507 // Claim that this is an MD5-based hash. It isn't, but this signals that
508 // this is not a time-based and not a random hash. MD5 seems like the least
509 // bad lie we can put here.
510 assert((uuidBuf
[6] & 0xf0) == 0x30 && "See RFC 4122 Sections 4.2.2, 4.1.3");
511 assert((uuidBuf
[8] & 0xc0) == 0x80 && "See RFC 4122 Section 4.2.2");
514 mutable uint8_t *uuidBuf
;
517 template <class LP
> class LCEncryptionInfo final
: public LoadCommand
{
519 uint32_t getSize() const override
{
520 return sizeof(typename
LP::encryption_info_command
);
523 void writeTo(uint8_t *buf
) const override
{
524 using EncryptionInfo
= typename
LP::encryption_info_command
;
525 auto *c
= reinterpret_cast<EncryptionInfo
*>(buf
);
526 buf
+= sizeof(EncryptionInfo
);
527 c
->cmd
= LP::encryptionInfoLCType
;
528 c
->cmdsize
= getSize();
529 c
->cryptoff
= in
.header
->getSize();
530 auto it
= find_if(outputSegments
, [](const OutputSegment
*seg
) {
531 return seg
->name
== segment_names::text
;
533 assert(it
!= outputSegments
.end());
534 c
->cryptsize
= (*it
)->fileSize
- c
->cryptoff
;
538 class LCCodeSignature final
: public LoadCommand
{
540 LCCodeSignature(CodeSignatureSection
*section
) : section(section
) {}
542 uint32_t getSize() const override
{ return sizeof(linkedit_data_command
); }
544 void writeTo(uint8_t *buf
) const override
{
545 auto *c
= reinterpret_cast<linkedit_data_command
*>(buf
);
546 c
->cmd
= LC_CODE_SIGNATURE
;
547 c
->cmdsize
= getSize();
548 c
->dataoff
= static_cast<uint32_t>(section
->fileOff
);
549 c
->datasize
= section
->getSize();
552 CodeSignatureSection
*section
;
557 void Writer::treatSpecialUndefineds() {
559 if (auto *undefined
= dyn_cast
<Undefined
>(config
->entry
))
560 treatUndefinedSymbol(*undefined
, "the entry point");
562 // FIXME: This prints symbols that are undefined both in input files and
563 // via -u flag twice.
564 for (const Symbol
*sym
: config
->explicitUndefineds
) {
565 if (const auto *undefined
= dyn_cast
<Undefined
>(sym
))
566 treatUndefinedSymbol(*undefined
, "-u");
568 // Literal exported-symbol names must be defined, but glob
569 // patterns need not match.
570 for (const CachedHashStringRef
&cachedName
:
571 config
->exportedSymbols
.literals
) {
572 if (const Symbol
*sym
= symtab
->find(cachedName
))
573 if (const auto *undefined
= dyn_cast
<Undefined
>(sym
))
574 treatUndefinedSymbol(*undefined
, "-exported_symbol(s_list)");
578 // Add stubs and bindings where necessary (e.g. if the symbol is a
580 static void prepareBranchTarget(Symbol
*sym
) {
581 if (auto *dysym
= dyn_cast
<DylibSymbol
>(sym
)) {
582 if (in
.stubs
->addEntry(dysym
)) {
583 if (sym
->isWeakDef()) {
584 in
.binding
->addEntry(dysym
, in
.lazyPointers
->isec
,
585 sym
->stubsIndex
* target
->wordSize
);
586 in
.weakBinding
->addEntry(sym
, in
.lazyPointers
->isec
,
587 sym
->stubsIndex
* target
->wordSize
);
589 in
.lazyBinding
->addEntry(dysym
);
592 } else if (auto *defined
= dyn_cast
<Defined
>(sym
)) {
593 if (defined
->isExternalWeakDef()) {
594 if (in
.stubs
->addEntry(sym
)) {
595 in
.rebase
->addEntry(in
.lazyPointers
->isec
,
596 sym
->stubsIndex
* target
->wordSize
);
597 in
.weakBinding
->addEntry(sym
, in
.lazyPointers
->isec
,
598 sym
->stubsIndex
* target
->wordSize
);
600 } else if (defined
->interposable
) {
601 if (in
.stubs
->addEntry(sym
))
602 in
.lazyBinding
->addEntry(sym
);
605 llvm_unreachable("invalid branch target symbol type");
609 // Can a symbol's address can only be resolved at runtime?
610 static bool needsBinding(const Symbol
*sym
) {
611 if (isa
<DylibSymbol
>(sym
))
613 if (const auto *defined
= dyn_cast
<Defined
>(sym
))
614 return defined
->isExternalWeakDef() || defined
->interposable
;
618 static void prepareSymbolRelocation(Symbol
*sym
, const InputSection
*isec
,
619 const lld::macho::Reloc
&r
) {
620 assert(sym
->isLive());
621 const RelocAttrs
&relocAttrs
= target
->getRelocAttrs(r
.type
);
623 if (relocAttrs
.hasAttr(RelocAttrBits::BRANCH
)) {
624 prepareBranchTarget(sym
);
625 } else if (relocAttrs
.hasAttr(RelocAttrBits::GOT
)) {
626 if (relocAttrs
.hasAttr(RelocAttrBits::POINTER
) || needsBinding(sym
))
627 in
.got
->addEntry(sym
);
628 } else if (relocAttrs
.hasAttr(RelocAttrBits::TLV
)) {
629 if (needsBinding(sym
))
630 in
.tlvPointers
->addEntry(sym
);
631 } else if (relocAttrs
.hasAttr(RelocAttrBits::UNSIGNED
)) {
632 // References from thread-local variable sections are treated as offsets
633 // relative to the start of the referent section, and therefore have no
634 // need of rebase opcodes.
635 if (!(isThreadLocalVariables(isec
->getFlags()) && isa
<Defined
>(sym
)))
636 addNonLazyBindingEntries(sym
, isec
, r
.offset
, r
.addend
);
640 void Writer::scanRelocations() {
641 TimeTraceScope
timeScope("Scan relocations");
643 // This can't use a for-each loop: It calls treatUndefinedSymbol(), which can
644 // add to inputSections, which invalidates inputSections's iterators.
645 for (size_t i
= 0; i
< inputSections
.size(); ++i
) {
646 ConcatInputSection
*isec
= inputSections
[i
];
648 if (isec
->shouldOmitFromOutput())
651 for (auto it
= isec
->relocs
.begin(); it
!= isec
->relocs
.end(); ++it
) {
652 lld::macho::Reloc
&r
= *it
;
653 if (target
->hasAttr(r
.type
, RelocAttrBits::SUBTRAHEND
)) {
654 // Skip over the following UNSIGNED relocation -- it's just there as the
655 // minuend, and doesn't have the usual UNSIGNED semantics. We don't want
656 // to emit rebase opcodes for it.
660 if (auto *sym
= r
.referent
.dyn_cast
<Symbol
*>()) {
661 if (auto *undefined
= dyn_cast
<Undefined
>(sym
))
662 treatUndefinedSymbol(*undefined
, isec
, r
.offset
);
663 // treatUndefinedSymbol() can replace sym with a DylibSymbol; re-check.
664 if (!isa
<Undefined
>(sym
) && validateSymbolRelocation(sym
, isec
, r
))
665 prepareSymbolRelocation(sym
, isec
, r
);
667 // Canonicalize the referent so that later accesses in Writer won't
668 // have to worry about it. Perhaps we should do this for Defined::isec
670 auto *referentIsec
= r
.referent
.get
<InputSection
*>();
671 r
.referent
= referentIsec
->canonical();
673 in
.rebase
->addEntry(isec
, r
.offset
);
678 in
.unwindInfo
->prepareRelocations();
681 void Writer::scanSymbols() {
682 TimeTraceScope
timeScope("Scan symbols");
683 for (Symbol
*sym
: symtab
->getSymbols()) {
684 if (auto *defined
= dyn_cast
<Defined
>(sym
)) {
685 if (!defined
->isLive())
687 defined
->canonicalize();
688 if (defined
->overridesWeakDef
)
689 in
.weakBinding
->addNonWeakDefinition(defined
);
690 if (!defined
->isAbsolute() && isCodeSection(defined
->isec
))
691 in
.unwindInfo
->addSymbol(defined
);
692 } else if (const auto *dysym
= dyn_cast
<DylibSymbol
>(sym
)) {
693 // This branch intentionally doesn't check isLive().
694 if (dysym
->isDynamicLookup())
696 dysym
->getFile()->refState
=
697 std::max(dysym
->getFile()->refState
, dysym
->getRefState());
701 for (const InputFile
*file
: inputFiles
) {
702 if (auto *objFile
= dyn_cast
<ObjFile
>(file
))
703 for (Symbol
*sym
: objFile
->symbols
) {
704 if (auto *defined
= dyn_cast_or_null
<Defined
>(sym
)) {
705 if (!defined
->isLive())
707 defined
->canonicalize();
708 if (!defined
->isExternal() && !defined
->isAbsolute() &&
709 isCodeSection(defined
->isec
))
710 in
.unwindInfo
->addSymbol(defined
);
716 // TODO: ld64 enforces the old load commands in a few other cases.
717 static bool useLCBuildVersion(const PlatformInfo
&platformInfo
) {
718 static const std::vector
<std::pair
<PlatformType
, VersionTuple
>> minVersion
= {
719 {PLATFORM_MACOS
, VersionTuple(10, 14)},
720 {PLATFORM_IOS
, VersionTuple(12, 0)},
721 {PLATFORM_IOSSIMULATOR
, VersionTuple(13, 0)},
722 {PLATFORM_TVOS
, VersionTuple(12, 0)},
723 {PLATFORM_TVOSSIMULATOR
, VersionTuple(13, 0)},
724 {PLATFORM_WATCHOS
, VersionTuple(5, 0)},
725 {PLATFORM_WATCHOSSIMULATOR
, VersionTuple(6, 0)}};
726 auto it
= llvm::find_if(minVersion
, [&](const auto &p
) {
727 return p
.first
== platformInfo
.target
.Platform
;
729 return it
== minVersion
.end() ? true : platformInfo
.minimum
>= it
->second
;
732 template <class LP
> void Writer::createLoadCommands() {
733 uint8_t segIndex
= 0;
734 for (OutputSegment
*seg
: outputSegments
) {
735 in
.header
->addLoadCommand(make
<LCSegment
<LP
>>(seg
->name
, seg
));
736 seg
->index
= segIndex
++;
739 in
.header
->addLoadCommand(make
<LCDyldInfo
>(
740 in
.rebase
, in
.binding
, in
.weakBinding
, in
.lazyBinding
, in
.exports
));
741 in
.header
->addLoadCommand(make
<LCSymtab
>(symtabSection
, stringTableSection
));
742 in
.header
->addLoadCommand(
743 make
<LCDysymtab
>(symtabSection
, indirectSymtabSection
));
744 if (!config
->umbrella
.empty())
745 in
.header
->addLoadCommand(make
<LCSubFramework
>(config
->umbrella
));
746 if (config
->emitEncryptionInfo
)
747 in
.header
->addLoadCommand(make
<LCEncryptionInfo
<LP
>>());
748 for (StringRef path
: config
->runtimePaths
)
749 in
.header
->addLoadCommand(make
<LCRPath
>(path
));
751 switch (config
->outputType
) {
753 in
.header
->addLoadCommand(make
<LCLoadDylinker
>());
756 in
.header
->addLoadCommand(make
<LCDylib
>(LC_ID_DYLIB
, config
->installName
,
757 config
->dylibCompatibilityVersion
,
758 config
->dylibCurrentVersion
));
763 llvm_unreachable("unhandled output file type");
766 uuidCommand
= make
<LCUuid
>();
767 in
.header
->addLoadCommand(uuidCommand
);
769 if (useLCBuildVersion(config
->platformInfo
))
770 in
.header
->addLoadCommand(make
<LCBuildVersion
>(config
->platformInfo
));
772 in
.header
->addLoadCommand(make
<LCMinVersion
>(config
->platformInfo
));
774 if (config
->secondaryPlatformInfo
) {
775 in
.header
->addLoadCommand(
776 make
<LCBuildVersion
>(*config
->secondaryPlatformInfo
));
779 // This is down here to match ld64's load command order.
780 if (config
->outputType
== MH_EXECUTE
)
781 in
.header
->addLoadCommand(make
<LCMain
>());
783 // See ld64's OutputFile::buildDylibOrdinalMapping for the corresponding
784 // library ordinal computation code in ld64.
785 int64_t dylibOrdinal
= 1;
786 DenseMap
<StringRef
, int64_t> ordinalForInstallName
;
788 std::vector
<DylibFile
*> dylibFiles
;
789 for (InputFile
*file
: inputFiles
) {
790 if (auto *dylibFile
= dyn_cast
<DylibFile
>(file
))
791 dylibFiles
.push_back(dylibFile
);
793 for (size_t i
= 0; i
< dylibFiles
.size(); ++i
)
794 dylibFiles
.insert(dylibFiles
.end(), dylibFiles
[i
]->extraDylibs
.begin(),
795 dylibFiles
[i
]->extraDylibs
.end());
797 for (DylibFile
*dylibFile
: dylibFiles
) {
798 if (dylibFile
->isBundleLoader
) {
799 dylibFile
->ordinal
= BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE
;
800 // Shortcut since bundle-loader does not re-export the symbols.
802 dylibFile
->reexport
= false;
806 // Don't emit load commands for a dylib that is not referenced if:
807 // - it was added implicitly (via a reexport, an LC_LOAD_DYLINKER --
808 // if it's on the linker command line, it's explicit)
809 // - or it's marked MH_DEAD_STRIPPABLE_DYLIB
810 // - or the flag -dead_strip_dylibs is used
811 // FIXME: `isReferenced()` is currently computed before dead code
812 // stripping, so references from dead code keep a dylib alive. This
813 // matches ld64, but it's something we should do better.
814 if (!dylibFile
->isReferenced() && !dylibFile
->forceNeeded
&&
815 (!dylibFile
->isExplicitlyLinked() || dylibFile
->deadStrippable
||
816 config
->deadStripDylibs
))
819 // Several DylibFiles can have the same installName. Only emit a single
820 // load command for that installName and give all these DylibFiles the
822 // This can happen in several cases:
823 // - a new framework could change its installName to an older
824 // framework name via an $ld$ symbol depending on platform_version
825 // - symlinks (for example, libpthread.tbd is a symlink to libSystem.tbd;
826 // Foo.framework/Foo.tbd is usually a symlink to
827 // Foo.framework/Versions/Current/Foo.tbd, where
828 // Foo.framework/Versions/Current is usually a symlink to
829 // Foo.framework/Versions/A)
830 // - a framework can be linked both explicitly on the linker
831 // command line and implicitly as a reexport from a different
832 // framework. The re-export will usually point to the tbd file
833 // in Foo.framework/Versions/A/Foo.tbd, while the explicit link will
834 // usually find Foo.framework/Foo.tbd. These are usually symlinks,
835 // but in a --reproduce archive they will be identical but distinct
837 // In the first case, *semantically distinct* DylibFiles will have the
839 int64_t &ordinal
= ordinalForInstallName
[dylibFile
->installName
];
841 dylibFile
->ordinal
= ordinal
;
845 ordinal
= dylibFile
->ordinal
= dylibOrdinal
++;
846 LoadCommandType lcType
=
847 dylibFile
->forceWeakImport
|| dylibFile
->refState
== RefState::Weak
850 in
.header
->addLoadCommand(make
<LCDylib
>(lcType
, dylibFile
->installName
,
851 dylibFile
->compatibilityVersion
,
852 dylibFile
->currentVersion
));
854 if (dylibFile
->reexport
)
855 in
.header
->addLoadCommand(
856 make
<LCDylib
>(LC_REEXPORT_DYLIB
, dylibFile
->installName
));
859 if (functionStartsSection
)
860 in
.header
->addLoadCommand(make
<LCFunctionStarts
>(functionStartsSection
));
861 if (dataInCodeSection
)
862 in
.header
->addLoadCommand(make
<LCDataInCode
>(dataInCodeSection
));
863 if (codeSignatureSection
)
864 in
.header
->addLoadCommand(make
<LCCodeSignature
>(codeSignatureSection
));
866 const uint32_t MACOS_MAXPATHLEN
= 1024;
867 config
->headerPad
= std::max(
868 config
->headerPad
, (config
->headerPadMaxInstallNames
869 ? LCDylib::getInstanceCount() * MACOS_MAXPATHLEN
873 // Sorting only can happen once all outputs have been collected. Here we sort
874 // segments, output sections within each segment, and input sections within each
876 static void sortSegmentsAndSections() {
877 TimeTraceScope
timeScope("Sort segments and sections");
878 sortOutputSegments();
880 DenseMap
<const InputSection
*, size_t> isecPriorities
=
881 priorityBuilder
.buildInputSectionPriorities();
883 uint32_t sectionIndex
= 0;
884 for (OutputSegment
*seg
: outputSegments
) {
885 seg
->sortOutputSections();
886 // References from thread-local variable sections are treated as offsets
887 // relative to the start of the thread-local data memory area, which
888 // is initialized via copying all the TLV data sections (which are all
889 // contiguous). If later data sections require a greater alignment than
890 // earlier ones, the offsets of data within those sections won't be
891 // guaranteed to aligned unless we normalize alignments. We therefore use
892 // the largest alignment for all TLV data sections.
893 uint32_t tlvAlign
= 0;
894 for (const OutputSection
*osec
: seg
->getSections())
895 if (isThreadLocalData(osec
->flags
) && osec
->align
> tlvAlign
)
896 tlvAlign
= osec
->align
;
898 for (OutputSection
*osec
: seg
->getSections()) {
899 // Now that the output sections are sorted, assign the final
900 // output section indices.
901 if (!osec
->isHidden())
902 osec
->index
= ++sectionIndex
;
903 if (isThreadLocalData(osec
->flags
)) {
904 if (!firstTLVDataSection
)
905 firstTLVDataSection
= osec
;
906 osec
->align
= tlvAlign
;
909 if (!isecPriorities
.empty()) {
910 if (auto *merged
= dyn_cast
<ConcatOutputSection
>(osec
)) {
911 llvm::stable_sort(merged
->inputs
,
912 [&](InputSection
*a
, InputSection
*b
) {
913 return isecPriorities
[a
] > isecPriorities
[b
];
921 template <class LP
> void Writer::createOutputSections() {
922 TimeTraceScope
timeScope("Create output sections");
923 // First, create hidden sections
924 stringTableSection
= make
<StringTableSection
>();
925 symtabSection
= makeSymtabSection
<LP
>(*stringTableSection
);
926 indirectSymtabSection
= make
<IndirectSymtabSection
>();
927 if (config
->adhocCodesign
)
928 codeSignatureSection
= make
<CodeSignatureSection
>();
929 if (config
->emitDataInCodeInfo
)
930 dataInCodeSection
= make
<DataInCodeSection
>();
931 if (config
->emitFunctionStarts
)
932 functionStartsSection
= make
<FunctionStartsSection
>();
933 if (config
->emitBitcodeBundle
)
934 make
<BitcodeBundleSection
>();
936 switch (config
->outputType
) {
938 make
<PageZeroSection
>();
944 llvm_unreachable("unhandled output file type");
947 // Then add input sections to output sections.
948 for (ConcatInputSection
*isec
: inputSections
) {
949 if (isec
->shouldOmitFromOutput())
951 ConcatOutputSection
*osec
= cast
<ConcatOutputSection
>(isec
->parent
);
952 osec
->addInput(isec
);
954 std::min(osec
->inputOrder
, static_cast<int>(isec
->outSecOff
));
957 // Once all the inputs are added, we can finalize the output section
958 // properties and create the corresponding output segments.
959 for (const auto &it
: concatOutputSections
) {
960 StringRef segname
= it
.first
.first
;
961 ConcatOutputSection
*osec
= it
.second
;
962 assert(segname
!= segment_names::ld
);
963 if (osec
->isNeeded()) {
964 // See comment in ObjFile::splitEhFrames()
965 if (osec
->name
== section_names::ehFrame
&&
966 segname
== segment_names::text
)
967 osec
->align
= target
->wordSize
;
969 getOrCreateOutputSegment(segname
)->addOutputSection(osec
);
973 for (SyntheticSection
*ssec
: syntheticSections
) {
974 auto it
= concatOutputSections
.find({ssec
->segname
, ssec
->name
});
975 // We add all LinkEdit sections here because we don't know if they are
976 // needed until their finalizeContents() methods get called later. While
977 // this means that we add some redundant sections to __LINKEDIT, there is
978 // is no redundancy in the output, as we do not emit section headers for
979 // any LinkEdit sections.
980 if (ssec
->isNeeded() || ssec
->segname
== segment_names::linkEdit
) {
981 if (it
== concatOutputSections
.end()) {
982 getOrCreateOutputSegment(ssec
->segname
)->addOutputSection(ssec
);
984 fatal("section from " +
985 toString(it
->second
->firstSection()->getFile()) +
986 " conflicts with synthetic section " + ssec
->segname
+ "," +
992 // dyld requires __LINKEDIT segment to always exist (even if empty).
993 linkEditSegment
= getOrCreateOutputSegment(segment_names::linkEdit
);
996 void Writer::finalizeAddresses() {
997 TimeTraceScope
timeScope("Finalize addresses");
998 uint64_t pageSize
= target
->getPageSize();
1000 // We could parallelize this loop, but local benchmarking indicates it is
1001 // faster to do it all in the main thread.
1002 for (OutputSegment
*seg
: outputSegments
) {
1003 if (seg
== linkEditSegment
)
1005 for (OutputSection
*osec
: seg
->getSections()) {
1006 if (!osec
->isNeeded())
1008 // Other kinds of OutputSections have already been finalized.
1009 if (auto concatOsec
= dyn_cast
<ConcatOutputSection
>(osec
))
1010 concatOsec
->finalizeContents();
1014 // Ensure that segments (and the sections they contain) are allocated
1015 // addresses in ascending order, which dyld requires.
1017 // Note that at this point, __LINKEDIT sections are empty, but we need to
1018 // determine addresses of other segments/sections before generating its
1020 for (OutputSegment
*seg
: outputSegments
) {
1021 if (seg
== linkEditSegment
)
1024 assignAddresses(seg
);
1025 // codesign / libstuff checks for segment ordering by verifying that
1026 // `fileOff + fileSize == next segment fileOff`. So we call alignTo() before
1027 // (instead of after) computing fileSize to ensure that the segments are
1028 // contiguous. We handle addr / vmSize similarly for the same reason.
1029 fileOff
= alignTo(fileOff
, pageSize
);
1030 addr
= alignTo(addr
, pageSize
);
1031 seg
->vmSize
= addr
- seg
->addr
;
1032 seg
->fileSize
= fileOff
- seg
->fileOff
;
1033 seg
->assignAddressesToStartEndSymbols();
1037 void Writer::finalizeLinkEditSegment() {
1038 TimeTraceScope
timeScope("Finalize __LINKEDIT segment");
1039 // Fill __LINKEDIT contents.
1040 std::vector
<LinkEditSection
*> linkEditSections
{
1047 indirectSymtabSection
,
1049 functionStartsSection
,
1051 SmallVector
<std::shared_future
<void>> threadFutures
;
1052 threadFutures
.reserve(linkEditSections
.size());
1053 for (LinkEditSection
*osec
: linkEditSections
)
1055 threadFutures
.emplace_back(threadPool
.async(
1056 [](LinkEditSection
*osec
) { osec
->finalizeContents(); }, osec
));
1057 for (std::shared_future
<void> &future
: threadFutures
)
1060 // Now that __LINKEDIT is filled out, do a proper calculation of its
1061 // addresses and offsets.
1062 linkEditSegment
->addr
= addr
;
1063 assignAddresses(linkEditSegment
);
1064 // No need to page-align fileOff / addr here since this is the last segment.
1065 linkEditSegment
->vmSize
= addr
- linkEditSegment
->addr
;
1066 linkEditSegment
->fileSize
= fileOff
- linkEditSegment
->fileOff
;
1069 void Writer::assignAddresses(OutputSegment
*seg
) {
1070 seg
->fileOff
= fileOff
;
1072 for (OutputSection
*osec
: seg
->getSections()) {
1073 if (!osec
->isNeeded())
1075 addr
= alignTo(addr
, osec
->align
);
1076 fileOff
= alignTo(fileOff
, osec
->align
);
1078 osec
->fileOff
= isZeroFill(osec
->flags
) ? 0 : fileOff
;
1080 osec
->assignAddressesToStartEndSymbols();
1082 addr
+= osec
->getSize();
1083 fileOff
+= osec
->getFileSize();
1087 void Writer::openFile() {
1088 Expected
<std::unique_ptr
<FileOutputBuffer
>> bufferOrErr
=
1089 FileOutputBuffer::create(config
->outputFile
, fileOff
,
1090 FileOutputBuffer::F_executable
);
1093 fatal("failed to open " + config
->outputFile
+ ": " +
1094 llvm::toString(bufferOrErr
.takeError()));
1095 buffer
= std::move(*bufferOrErr
);
1096 in
.bufferStart
= buffer
->getBufferStart();
1099 void Writer::writeSections() {
1100 uint8_t *buf
= buffer
->getBufferStart();
1101 std::vector
<const OutputSection
*> osecs
;
1102 for (const OutputSegment
*seg
: outputSegments
)
1103 append_range(osecs
, seg
->getSections());
1105 parallelForEach(osecs
.begin(), osecs
.end(), [&](const OutputSection
*osec
) {
1106 osec
->writeTo(buf
+ osec
->fileOff
);
1110 // In order to utilize multiple cores, we first split the buffer into chunks,
1111 // compute a hash for each chunk, and then compute a hash value of the hash
1113 void Writer::writeUuid() {
1114 TimeTraceScope
timeScope("Computing UUID");
1116 ArrayRef
<uint8_t> data
{buffer
->getBufferStart(), buffer
->getBufferEnd()};
1117 unsigned chunkCount
= parallel::strategy
.compute_thread_count() * 10;
1118 // Round-up integer division
1119 size_t chunkSize
= (data
.size() + chunkCount
- 1) / chunkCount
;
1120 std::vector
<ArrayRef
<uint8_t>> chunks
= split(data
, chunkSize
);
1121 // Leave one slot for filename
1122 std::vector
<uint64_t> hashes(chunks
.size() + 1);
1123 SmallVector
<std::shared_future
<void>> threadFutures
;
1124 threadFutures
.reserve(chunks
.size());
1125 for (size_t i
= 0; i
< chunks
.size(); ++i
)
1126 threadFutures
.emplace_back(threadPool
.async(
1127 [&](size_t j
) { hashes
[j
] = xxHash64(chunks
[j
]); }, i
));
1128 for (std::shared_future
<void> &future
: threadFutures
)
1130 // Append the output filename so that identical binaries with different names
1131 // don't get the same UUID.
1132 hashes
[chunks
.size()] = xxHash64(sys::path::filename(config
->finalOutput
));
1133 uint64_t digest
= xxHash64({reinterpret_cast<uint8_t *>(hashes
.data()),
1134 hashes
.size() * sizeof(uint64_t)});
1135 uuidCommand
->writeUuid(digest
);
1138 void Writer::writeCodeSignature() {
1139 if (codeSignatureSection
) {
1140 TimeTraceScope
timeScope("Write code signature");
1141 codeSignatureSection
->writeHashes(buffer
->getBufferStart());
1145 void Writer::writeOutputFile() {
1146 TimeTraceScope
timeScope("Write output file");
1148 reportPendingUndefinedSymbols();
1153 writeCodeSignature();
1155 if (auto e
= buffer
->commit())
1156 error("failed to write to the output file: " + toString(std::move(e
)));
1159 template <class LP
> void Writer::run() {
1160 treatSpecialUndefineds();
1161 if (config
->entry
&& !isa
<Undefined
>(config
->entry
))
1162 prepareBranchTarget(config
->entry
);
1164 // Canonicalization of all pointers to InputSections should be handled by
1165 // these two scan* methods. I.e. from this point onward, for all live
1166 // InputSections, we should have `isec->canonical() == isec`.
1170 // Do not proceed if there was an undefined symbol.
1171 reportPendingUndefinedSymbols();
1175 if (in
.stubHelper
->isNeeded())
1176 in
.stubHelper
->setup();
1178 if (in
.objCImageInfo
->isNeeded())
1179 in
.objCImageInfo
->finalizeContents();
1181 // At this point, we should know exactly which output sections are needed,
1182 // courtesy of scanSymbols() and scanRelocations().
1183 createOutputSections
<LP
>();
1185 // After this point, we create no new segments; HOWEVER, we might
1186 // yet create branch-range extension thunks for architectures whose
1187 // hardware call instructions have limited range, e.g., ARM(64).
1188 // The thunks are created as InputSections interspersed among
1189 // the ordinary __TEXT,_text InputSections.
1190 sortSegmentsAndSections();
1191 createLoadCommands
<LP
>();
1192 finalizeAddresses();
1193 threadPool
.async([&] {
1194 if (LLVM_ENABLE_THREADS
&& config
->timeTraceEnabled
)
1195 timeTraceProfilerInitialize(config
->timeTraceGranularity
, "writeMapFile");
1197 if (LLVM_ENABLE_THREADS
&& config
->timeTraceEnabled
)
1198 timeTraceProfilerFinishThread();
1200 finalizeLinkEditSegment();
1204 template <class LP
> void macho::writeResult() { Writer().run
<LP
>(); }
1206 void macho::resetWriter() { LCDylib::resetInstanceCount(); }
1208 void macho::createSyntheticSections() {
1209 in
.header
= make
<MachHeaderSection
>();
1210 if (config
->dedupLiterals
)
1211 in
.cStringSection
= make
<DeduplicatedCStringSection
>();
1213 in
.cStringSection
= make
<CStringSection
>();
1214 in
.wordLiteralSection
=
1215 config
->dedupLiterals
? make
<WordLiteralSection
>() : nullptr;
1216 in
.rebase
= make
<RebaseSection
>();
1217 in
.binding
= make
<BindingSection
>();
1218 in
.weakBinding
= make
<WeakBindingSection
>();
1219 in
.lazyBinding
= make
<LazyBindingSection
>();
1220 in
.exports
= make
<ExportSection
>();
1221 in
.got
= make
<GotSection
>();
1222 in
.tlvPointers
= make
<TlvPointerSection
>();
1223 in
.lazyPointers
= make
<LazyPointerSection
>();
1224 in
.stubs
= make
<StubsSection
>();
1225 in
.stubHelper
= make
<StubHelperSection
>();
1226 in
.unwindInfo
= makeUnwindInfoSection();
1227 in
.objCImageInfo
= make
<ObjCImageInfoSection
>();
1229 // This section contains space for just a single word, and will be used by
1230 // dyld to cache an address to the image loader it uses.
1231 uint8_t *arr
= bAlloc().Allocate
<uint8_t>(target
->wordSize
);
1232 memset(arr
, 0, target
->wordSize
);
1233 in
.imageLoaderCache
= makeSyntheticInputSection(
1234 segment_names::data
, section_names::data
, S_REGULAR
,
1235 ArrayRef
<uint8_t>{arr
, target
->wordSize
},
1236 /*align=*/target
->wordSize
);
1237 // References from dyld are not visible to us, so ensure this section is
1238 // always treated as live.
1239 in
.imageLoaderCache
->live
= true;
1242 OutputSection
*macho::firstTLVDataSection
= nullptr;
1244 template void macho::writeResult
<LP64
>();
1245 template void macho::writeResult
<ILP32
>();