1 //===- SyntheticSections.cpp ---------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "SyntheticSections.h"
10 #include "ConcatOutputSection.h"
12 #include "ExportTrie.h"
13 #include "InputFiles.h"
14 #include "MachOStructs.h"
15 #include "OutputSegment.h"
16 #include "SymbolTable.h"
19 #include "lld/Common/CommonLinkerContext.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/Config/llvm-config.h"
22 #include "llvm/Support/EndianStream.h"
23 #include "llvm/Support/FileSystem.h"
24 #include "llvm/Support/LEB128.h"
25 #include "llvm/Support/Parallel.h"
26 #include "llvm/Support/Path.h"
27 #include "llvm/Support/xxhash.h"
29 #if defined(__APPLE__)
32 #define COMMON_DIGEST_FOR_OPENSSL
33 #include <CommonCrypto/CommonDigest.h>
35 #include "llvm/Support/SHA256.h"
39 using namespace llvm::MachO
;
40 using namespace llvm::support
;
41 using namespace llvm::support::endian
;
43 using namespace lld::macho
;
45 // Reads `len` bytes at data and writes the 32-byte SHA256 checksum to `output`.
46 static void sha256(const uint8_t *data
, size_t len
, uint8_t *output
) {
47 #if defined(__APPLE__)
48 // FIXME: Make LLVM's SHA256 faster and use it unconditionally. See PR56121
49 // for some notes on this.
50 CC_SHA256(data
, len
, output
);
52 ArrayRef
<uint8_t> block(data
, len
);
53 std::array
<uint8_t, 32> hash
= SHA256::hash(block
);
54 static_assert(hash
.size() == CodeSignatureSection::hashSize
);
55 memcpy(output
, hash
.data(), hash
.size());
60 std::vector
<SyntheticSection
*> macho::syntheticSections
;
62 SyntheticSection::SyntheticSection(const char *segname
, const char *name
)
63 : OutputSection(SyntheticKind
, name
) {
64 std::tie(this->segname
, this->name
) = maybeRenameSection({segname
, name
});
65 isec
= makeSyntheticInputSection(segname
, name
);
67 syntheticSections
.push_back(this);
70 // dyld3's MachOLoaded::getSlide() assumes that the __TEXT segment starts
71 // from the beginning of the file (i.e. the header).
72 MachHeaderSection::MachHeaderSection()
73 : SyntheticSection(segment_names::text
, section_names::header
) {
74 // XXX: This is a hack. (See D97007)
75 // Setting the index to 1 to pretend that this section is the text
81 void MachHeaderSection::addLoadCommand(LoadCommand
*lc
) {
82 loadCommands
.push_back(lc
);
83 sizeOfCmds
+= lc
->getSize();
86 uint64_t MachHeaderSection::getSize() const {
87 uint64_t size
= target
->headerSize
+ sizeOfCmds
+ config
->headerPad
;
88 // If we are emitting an encryptable binary, our load commands must have a
89 // separate (non-encrypted) page to themselves.
90 if (config
->emitEncryptionInfo
)
91 size
= alignToPowerOf2(size
, target
->getPageSize());
95 static uint32_t cpuSubtype() {
96 uint32_t subtype
= target
->cpuSubtype
;
98 if (config
->outputType
== MH_EXECUTE
&& !config
->staticLink
&&
99 target
->cpuSubtype
== CPU_SUBTYPE_X86_64_ALL
&&
100 config
->platform() == PLATFORM_MACOS
&&
101 config
->platformInfo
.target
.MinDeployment
>= VersionTuple(10, 5))
102 subtype
|= CPU_SUBTYPE_LIB64
;
107 static bool hasWeakBinding() {
108 return config
->emitChainedFixups
? in
.chainedFixups
->hasWeakBinding()
109 : in
.weakBinding
->hasEntry();
112 static bool hasNonWeakDefinition() {
113 return config
->emitChainedFixups
? in
.chainedFixups
->hasNonWeakDefinition()
114 : in
.weakBinding
->hasNonWeakDefinition();
117 void MachHeaderSection::writeTo(uint8_t *buf
) const {
118 auto *hdr
= reinterpret_cast<mach_header
*>(buf
);
119 hdr
->magic
= target
->magic
;
120 hdr
->cputype
= target
->cpuType
;
121 hdr
->cpusubtype
= cpuSubtype();
122 hdr
->filetype
= config
->outputType
;
123 hdr
->ncmds
= loadCommands
.size();
124 hdr
->sizeofcmds
= sizeOfCmds
;
125 hdr
->flags
= MH_DYLDLINK
;
127 if (config
->namespaceKind
== NamespaceKind::twolevel
)
128 hdr
->flags
|= MH_NOUNDEFS
| MH_TWOLEVEL
;
130 if (config
->outputType
== MH_DYLIB
&& !config
->hasReexports
)
131 hdr
->flags
|= MH_NO_REEXPORTED_DYLIBS
;
133 if (config
->markDeadStrippableDylib
)
134 hdr
->flags
|= MH_DEAD_STRIPPABLE_DYLIB
;
136 if (config
->outputType
== MH_EXECUTE
&& config
->isPic
)
137 hdr
->flags
|= MH_PIE
;
139 if (config
->outputType
== MH_DYLIB
&& config
->applicationExtension
)
140 hdr
->flags
|= MH_APP_EXTENSION_SAFE
;
142 if (in
.exports
->hasWeakSymbol
|| hasNonWeakDefinition())
143 hdr
->flags
|= MH_WEAK_DEFINES
;
145 if (in
.exports
->hasWeakSymbol
|| hasWeakBinding())
146 hdr
->flags
|= MH_BINDS_TO_WEAK
;
148 for (const OutputSegment
*seg
: outputSegments
) {
149 for (const OutputSection
*osec
: seg
->getSections()) {
150 if (isThreadLocalVariables(osec
->flags
)) {
151 hdr
->flags
|= MH_HAS_TLV_DESCRIPTORS
;
157 uint8_t *p
= reinterpret_cast<uint8_t *>(hdr
) + target
->headerSize
;
158 for (const LoadCommand
*lc
: loadCommands
) {
164 PageZeroSection::PageZeroSection()
165 : SyntheticSection(segment_names::pageZero
, section_names::pageZero
) {}
167 RebaseSection::RebaseSection()
168 : LinkEditSection(segment_names::linkEdit
, section_names::rebase
) {}
172 uint64_t sequenceLength
;
177 static void emitIncrement(uint64_t incr
, raw_svector_ostream
&os
) {
180 if ((incr
>> target
->p2WordSize
) <= REBASE_IMMEDIATE_MASK
&&
181 (incr
% target
->wordSize
) == 0) {
182 os
<< static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_IMM_SCALED
|
183 (incr
>> target
->p2WordSize
));
185 os
<< static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_ULEB
);
186 encodeULEB128(incr
, os
);
190 static void flushRebase(const RebaseState
&state
, raw_svector_ostream
&os
) {
191 assert(state
.sequenceLength
> 0);
193 if (state
.skipLength
== target
->wordSize
) {
194 if (state
.sequenceLength
<= REBASE_IMMEDIATE_MASK
) {
195 os
<< static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_IMM_TIMES
|
196 state
.sequenceLength
);
198 os
<< static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_ULEB_TIMES
);
199 encodeULEB128(state
.sequenceLength
, os
);
201 } else if (state
.sequenceLength
== 1) {
202 os
<< static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB
);
203 encodeULEB128(state
.skipLength
- target
->wordSize
, os
);
205 os
<< static_cast<uint8_t>(
206 REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB
);
207 encodeULEB128(state
.sequenceLength
, os
);
208 encodeULEB128(state
.skipLength
- target
->wordSize
, os
);
212 // Rebases are communicated to dyld using a bytecode, whose opcodes cause the
213 // memory location at a specific address to be rebased and/or the address to be
216 // Opcode REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB is the most generic
217 // one, encoding a series of evenly spaced addresses. This algorithm works by
218 // splitting up the sorted list of addresses into such chunks. If the locations
219 // are consecutive or the sequence consists of a single location, flushRebase
220 // will use a smaller, more specialized encoding.
221 static void encodeRebases(const OutputSegment
*seg
,
222 MutableArrayRef
<Location
> locations
,
223 raw_svector_ostream
&os
) {
224 // dyld operates on segments. Translate section offsets into segment offsets.
225 for (Location
&loc
: locations
)
227 loc
.isec
->parent
->getSegmentOffset() + loc
.isec
->getOffset(loc
.offset
);
228 // The algorithm assumes that locations are unique.
230 llvm::unique(locations
, [](const Location
&a
, const Location
&b
) {
231 return a
.offset
== b
.offset
;
233 size_t count
= end
- locations
.begin();
235 os
<< static_cast<uint8_t>(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
|
237 assert(!locations
.empty());
238 uint64_t offset
= locations
[0].offset
;
239 encodeULEB128(offset
, os
);
241 RebaseState state
{1, target
->wordSize
};
243 for (size_t i
= 1; i
< count
; ++i
) {
244 offset
= locations
[i
].offset
;
246 uint64_t skip
= offset
- locations
[i
- 1].offset
;
247 assert(skip
!= 0 && "duplicate locations should have been weeded out");
249 if (skip
== state
.skipLength
) {
250 ++state
.sequenceLength
;
251 } else if (state
.sequenceLength
== 1) {
252 ++state
.sequenceLength
;
253 state
.skipLength
= skip
;
254 } else if (skip
< state
.skipLength
) {
255 // The address is lower than what the rebase pointer would be if the last
256 // location would be part of a sequence. We start a new sequence from the
257 // previous location.
258 --state
.sequenceLength
;
259 flushRebase(state
, os
);
261 state
.sequenceLength
= 2;
262 state
.skipLength
= skip
;
264 // The address is at some positive offset from the rebase pointer. We
265 // start a new sequence which begins with the current location.
266 flushRebase(state
, os
);
267 emitIncrement(skip
- state
.skipLength
, os
);
268 state
.sequenceLength
= 1;
269 state
.skipLength
= target
->wordSize
;
272 flushRebase(state
, os
);
275 void RebaseSection::finalizeContents() {
276 if (locations
.empty())
279 raw_svector_ostream os
{contents
};
280 os
<< static_cast<uint8_t>(REBASE_OPCODE_SET_TYPE_IMM
| REBASE_TYPE_POINTER
);
282 llvm::sort(locations
, [](const Location
&a
, const Location
&b
) {
283 return a
.isec
->getVA(a
.offset
) < b
.isec
->getVA(b
.offset
);
286 for (size_t i
= 0, count
= locations
.size(); i
< count
;) {
287 const OutputSegment
*seg
= locations
[i
].isec
->parent
->parent
;
289 while (j
< count
&& locations
[j
].isec
->parent
->parent
== seg
)
291 encodeRebases(seg
, {locations
.data() + i
, locations
.data() + j
}, os
);
294 os
<< static_cast<uint8_t>(REBASE_OPCODE_DONE
);
297 void RebaseSection::writeTo(uint8_t *buf
) const {
298 memcpy(buf
, contents
.data(), contents
.size());
301 NonLazyPointerSectionBase::NonLazyPointerSectionBase(const char *segname
,
303 : SyntheticSection(segname
, name
) {
304 align
= target
->wordSize
;
307 void macho::addNonLazyBindingEntries(const Symbol
*sym
,
308 const InputSection
*isec
, uint64_t offset
,
310 if (config
->emitChainedFixups
) {
311 if (needsBinding(sym
))
312 in
.chainedFixups
->addBinding(sym
, isec
, offset
, addend
);
313 else if (isa
<Defined
>(sym
))
314 in
.chainedFixups
->addRebase(isec
, offset
);
316 llvm_unreachable("cannot bind to an undefined symbol");
320 if (const auto *dysym
= dyn_cast
<DylibSymbol
>(sym
)) {
321 in
.binding
->addEntry(dysym
, isec
, offset
, addend
);
322 if (dysym
->isWeakDef())
323 in
.weakBinding
->addEntry(sym
, isec
, offset
, addend
);
324 } else if (const auto *defined
= dyn_cast
<Defined
>(sym
)) {
325 in
.rebase
->addEntry(isec
, offset
);
326 if (defined
->isExternalWeakDef())
327 in
.weakBinding
->addEntry(sym
, isec
, offset
, addend
);
328 else if (defined
->interposable
)
329 in
.binding
->addEntry(sym
, isec
, offset
, addend
);
331 // Undefined symbols are filtered out in scanRelocations(); we should never
333 llvm_unreachable("cannot bind to an undefined symbol");
337 void NonLazyPointerSectionBase::addEntry(Symbol
*sym
) {
338 if (entries
.insert(sym
)) {
339 assert(!sym
->isInGot());
340 sym
->gotIndex
= entries
.size() - 1;
342 addNonLazyBindingEntries(sym
, isec
, sym
->gotIndex
* target
->wordSize
);
346 void macho::writeChainedRebase(uint8_t *buf
, uint64_t targetVA
) {
347 assert(config
->emitChainedFixups
);
348 assert(target
->wordSize
== 8 && "Only 64-bit platforms are supported");
349 auto *rebase
= reinterpret_cast<dyld_chained_ptr_64_rebase
*>(buf
);
350 rebase
->target
= targetVA
& 0xf'ffff'ffff;
351 rebase
->high8
= (targetVA
>> 56);
352 rebase
->reserved
= 0;
356 // The fixup format places a 64 GiB limit on the output's size.
357 // Should we handle this gracefully?
358 uint64_t encodedVA
= rebase
->target
| ((uint64_t)rebase
->high8
<< 56);
359 if (encodedVA
!= targetVA
)
360 error("rebase target address 0x" + Twine::utohexstr(targetVA
) +
361 " does not fit into chained fixup. Re-link with -no_fixup_chains");
364 static void writeChainedBind(uint8_t *buf
, const Symbol
*sym
, int64_t addend
) {
365 assert(config
->emitChainedFixups
);
366 assert(target
->wordSize
== 8 && "Only 64-bit platforms are supported");
367 auto *bind
= reinterpret_cast<dyld_chained_ptr_64_bind
*>(buf
);
368 auto [ordinal
, inlineAddend
] = in
.chainedFixups
->getBinding(sym
, addend
);
369 bind
->ordinal
= ordinal
;
370 bind
->addend
= inlineAddend
;
376 void macho::writeChainedFixup(uint8_t *buf
, const Symbol
*sym
, int64_t addend
) {
377 if (needsBinding(sym
))
378 writeChainedBind(buf
, sym
, addend
);
380 writeChainedRebase(buf
, sym
->getVA() + addend
);
383 void NonLazyPointerSectionBase::writeTo(uint8_t *buf
) const {
384 if (config
->emitChainedFixups
) {
385 for (const auto &[i
, entry
] : llvm::enumerate(entries
))
386 writeChainedFixup(&buf
[i
* target
->wordSize
], entry
, 0);
388 for (const auto &[i
, entry
] : llvm::enumerate(entries
))
389 if (auto *defined
= dyn_cast
<Defined
>(entry
))
390 write64le(&buf
[i
* target
->wordSize
], defined
->getVA());
394 GotSection::GotSection()
395 : NonLazyPointerSectionBase(segment_names::data
, section_names::got
) {
396 flags
= S_NON_LAZY_SYMBOL_POINTERS
;
399 TlvPointerSection::TlvPointerSection()
400 : NonLazyPointerSectionBase(segment_names::data
,
401 section_names::threadPtrs
) {
402 flags
= S_THREAD_LOCAL_VARIABLE_POINTERS
;
405 BindingSection::BindingSection()
406 : LinkEditSection(segment_names::linkEdit
, section_names::binding
) {}
410 OutputSegment
*segment
= nullptr;
415 // Default value of 0xF0 is not valid opcode and should make the program
416 // scream instead of accidentally writing "valid" values.
417 uint8_t opcode
= 0xF0;
419 uint64_t consecutiveCount
= 0;
423 // Encode a sequence of opcodes that tell dyld to write the address of symbol +
424 // addend at osec->addr + outSecOff.
426 // The bind opcode "interpreter" remembers the values of each binding field, so
427 // we only need to encode the differences between bindings. Hence the use of
429 static void encodeBinding(const OutputSection
*osec
, uint64_t outSecOff
,
430 int64_t addend
, Binding
&lastBinding
,
431 std::vector
<BindIR
> &opcodes
) {
432 OutputSegment
*seg
= osec
->parent
;
433 uint64_t offset
= osec
->getSegmentOffset() + outSecOff
;
434 if (lastBinding
.segment
!= seg
) {
436 {static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
|
439 lastBinding
.segment
= seg
;
440 lastBinding
.offset
= offset
;
441 } else if (lastBinding
.offset
!= offset
) {
442 opcodes
.push_back({BIND_OPCODE_ADD_ADDR_ULEB
, offset
- lastBinding
.offset
});
443 lastBinding
.offset
= offset
;
446 if (lastBinding
.addend
!= addend
) {
448 {BIND_OPCODE_SET_ADDEND_SLEB
, static_cast<uint64_t>(addend
)});
449 lastBinding
.addend
= addend
;
452 opcodes
.push_back({BIND_OPCODE_DO_BIND
, 0});
453 // DO_BIND causes dyld to both perform the binding and increment the offset
454 lastBinding
.offset
+= target
->wordSize
;
457 static void optimizeOpcodes(std::vector
<BindIR
> &opcodes
) {
458 // Pass 1: Combine bind/add pairs
461 for (i
= 1; i
< opcodes
.size(); ++i
, ++pWrite
) {
462 if ((opcodes
[i
].opcode
== BIND_OPCODE_ADD_ADDR_ULEB
) &&
463 (opcodes
[i
- 1].opcode
== BIND_OPCODE_DO_BIND
)) {
464 opcodes
[pWrite
].opcode
= BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB
;
465 opcodes
[pWrite
].data
= opcodes
[i
].data
;
468 opcodes
[pWrite
] = opcodes
[i
- 1];
471 if (i
== opcodes
.size())
472 opcodes
[pWrite
] = opcodes
[i
- 1];
473 opcodes
.resize(pWrite
+ 1);
475 // Pass 2: Compress two or more bind_add opcodes
477 for (i
= 1; i
< opcodes
.size(); ++i
, ++pWrite
) {
478 if ((opcodes
[i
].opcode
== BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB
) &&
479 (opcodes
[i
- 1].opcode
== BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB
) &&
480 (opcodes
[i
].data
== opcodes
[i
- 1].data
)) {
481 opcodes
[pWrite
].opcode
= BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB
;
482 opcodes
[pWrite
].consecutiveCount
= 2;
483 opcodes
[pWrite
].data
= opcodes
[i
].data
;
485 while (i
< opcodes
.size() &&
486 (opcodes
[i
].opcode
== BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB
) &&
487 (opcodes
[i
].data
== opcodes
[i
- 1].data
)) {
488 opcodes
[pWrite
].consecutiveCount
++;
492 opcodes
[pWrite
] = opcodes
[i
- 1];
495 if (i
== opcodes
.size())
496 opcodes
[pWrite
] = opcodes
[i
- 1];
497 opcodes
.resize(pWrite
+ 1);
499 // Pass 3: Use immediate encodings
500 // Every binding is the size of one pointer. If the next binding is a
501 // multiple of wordSize away that is within BIND_IMMEDIATE_MASK, the
502 // opcode can be scaled by wordSize into a single byte and dyld will
503 // expand it to the correct address.
504 for (auto &p
: opcodes
) {
505 // It's unclear why the check needs to be less than BIND_IMMEDIATE_MASK,
506 // but ld64 currently does this. This could be a potential bug, but
507 // for now, perform the same behavior to prevent mysterious bugs.
508 if ((p
.opcode
== BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB
) &&
509 ((p
.data
/ target
->wordSize
) < BIND_IMMEDIATE_MASK
) &&
510 ((p
.data
% target
->wordSize
) == 0)) {
511 p
.opcode
= BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED
;
512 p
.data
/= target
->wordSize
;
517 static void flushOpcodes(const BindIR
&op
, raw_svector_ostream
&os
) {
518 uint8_t opcode
= op
.opcode
& BIND_OPCODE_MASK
;
520 case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
:
521 case BIND_OPCODE_ADD_ADDR_ULEB
:
522 case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB
:
524 encodeULEB128(op
.data
, os
);
526 case BIND_OPCODE_SET_ADDEND_SLEB
:
528 encodeSLEB128(static_cast<int64_t>(op
.data
), os
);
530 case BIND_OPCODE_DO_BIND
:
533 case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB
:
535 encodeULEB128(op
.consecutiveCount
, os
);
536 encodeULEB128(op
.data
, os
);
538 case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED
:
539 os
<< static_cast<uint8_t>(op
.opcode
| op
.data
);
542 llvm_unreachable("cannot bind to an unrecognized symbol");
546 // Non-weak bindings need to have their dylib ordinal encoded as well.
547 static int16_t ordinalForDylibSymbol(const DylibSymbol
&dysym
) {
548 if (config
->namespaceKind
== NamespaceKind::flat
|| dysym
.isDynamicLookup())
549 return static_cast<int16_t>(BIND_SPECIAL_DYLIB_FLAT_LOOKUP
);
550 assert(dysym
.getFile()->isReferenced());
551 return dysym
.getFile()->ordinal
;
554 static int16_t ordinalForSymbol(const Symbol
&sym
) {
555 if (const auto *dysym
= dyn_cast
<DylibSymbol
>(&sym
))
556 return ordinalForDylibSymbol(*dysym
);
557 assert(cast
<Defined
>(&sym
)->interposable
);
558 return BIND_SPECIAL_DYLIB_FLAT_LOOKUP
;
561 static void encodeDylibOrdinal(int16_t ordinal
, raw_svector_ostream
&os
) {
563 os
<< static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM
|
564 (ordinal
& BIND_IMMEDIATE_MASK
));
565 } else if (ordinal
<= BIND_IMMEDIATE_MASK
) {
566 os
<< static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM
| ordinal
);
568 os
<< static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB
);
569 encodeULEB128(ordinal
, os
);
573 static void encodeWeakOverride(const Defined
*defined
,
574 raw_svector_ostream
&os
) {
575 os
<< static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
|
576 BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION
)
577 << defined
->getName() << '\0';
580 // Organize the bindings so we can encoded them with fewer opcodes.
582 // First, all bindings for a given symbol should be grouped together.
583 // BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM is the largest opcode (since it
584 // has an associated symbol string), so we only want to emit it once per symbol.
586 // Within each group, we sort the bindings by address. Since bindings are
587 // delta-encoded, sorting them allows for a more compact result. Note that
588 // sorting by address alone ensures that bindings for the same segment / section
589 // are located together, minimizing the number of times we have to emit
590 // BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB.
592 // Finally, we sort the symbols by the address of their first binding, again
593 // to facilitate the delta-encoding process.
595 std::vector
<std::pair
<const Sym
*, std::vector
<BindingEntry
>>>
596 sortBindings(const BindingsMap
<const Sym
*> &bindingsMap
) {
597 std::vector
<std::pair
<const Sym
*, std::vector
<BindingEntry
>>> bindingsVec(
598 bindingsMap
.begin(), bindingsMap
.end());
599 for (auto &p
: bindingsVec
) {
600 std::vector
<BindingEntry
> &bindings
= p
.second
;
601 llvm::sort(bindings
, [](const BindingEntry
&a
, const BindingEntry
&b
) {
602 return a
.target
.getVA() < b
.target
.getVA();
605 llvm::sort(bindingsVec
, [](const auto &a
, const auto &b
) {
606 return a
.second
[0].target
.getVA() < b
.second
[0].target
.getVA();
611 // Emit bind opcodes, which are a stream of byte-sized opcodes that dyld
612 // interprets to update a record with the following fields:
613 // * segment index (of the segment to write the symbol addresses to, typically
614 // the __DATA_CONST segment which contains the GOT)
615 // * offset within the segment, indicating the next location to write a binding
617 // * symbol library ordinal (the index of its library's LC_LOAD_DYLIB command)
620 // When dyld sees BIND_OPCODE_DO_BIND, it uses the current record state to bind
621 // a symbol in the GOT, and increments the segment offset to point to the next
622 // entry. It does *not* clear the record state after doing the bind, so
623 // subsequent opcodes only need to encode the differences between bindings.
624 void BindingSection::finalizeContents() {
625 raw_svector_ostream os
{contents
};
627 int16_t lastOrdinal
= 0;
629 for (auto &p
: sortBindings(bindingsMap
)) {
630 const Symbol
*sym
= p
.first
;
631 std::vector
<BindingEntry
> &bindings
= p
.second
;
632 uint8_t flags
= BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
;
633 if (sym
->isWeakRef())
634 flags
|= BIND_SYMBOL_FLAGS_WEAK_IMPORT
;
635 os
<< flags
<< sym
->getName() << '\0'
636 << static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM
| BIND_TYPE_POINTER
);
637 int16_t ordinal
= ordinalForSymbol(*sym
);
638 if (ordinal
!= lastOrdinal
) {
639 encodeDylibOrdinal(ordinal
, os
);
640 lastOrdinal
= ordinal
;
642 std::vector
<BindIR
> opcodes
;
643 for (const BindingEntry
&b
: bindings
)
644 encodeBinding(b
.target
.isec
->parent
,
645 b
.target
.isec
->getOffset(b
.target
.offset
), b
.addend
,
646 lastBinding
, opcodes
);
647 if (config
->optimize
> 1)
648 optimizeOpcodes(opcodes
);
649 for (const auto &op
: opcodes
)
650 flushOpcodes(op
, os
);
652 if (!bindingsMap
.empty())
653 os
<< static_cast<uint8_t>(BIND_OPCODE_DONE
);
656 void BindingSection::writeTo(uint8_t *buf
) const {
657 memcpy(buf
, contents
.data(), contents
.size());
660 WeakBindingSection::WeakBindingSection()
661 : LinkEditSection(segment_names::linkEdit
, section_names::weakBinding
) {}
663 void WeakBindingSection::finalizeContents() {
664 raw_svector_ostream os
{contents
};
667 for (const Defined
*defined
: definitions
)
668 encodeWeakOverride(defined
, os
);
670 for (auto &p
: sortBindings(bindingsMap
)) {
671 const Symbol
*sym
= p
.first
;
672 std::vector
<BindingEntry
> &bindings
= p
.second
;
673 os
<< static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
)
674 << sym
->getName() << '\0'
675 << static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM
| BIND_TYPE_POINTER
);
676 std::vector
<BindIR
> opcodes
;
677 for (const BindingEntry
&b
: bindings
)
678 encodeBinding(b
.target
.isec
->parent
,
679 b
.target
.isec
->getOffset(b
.target
.offset
), b
.addend
,
680 lastBinding
, opcodes
);
681 if (config
->optimize
> 1)
682 optimizeOpcodes(opcodes
);
683 for (const auto &op
: opcodes
)
684 flushOpcodes(op
, os
);
686 if (!bindingsMap
.empty() || !definitions
.empty())
687 os
<< static_cast<uint8_t>(BIND_OPCODE_DONE
);
690 void WeakBindingSection::writeTo(uint8_t *buf
) const {
691 memcpy(buf
, contents
.data(), contents
.size());
694 StubsSection::StubsSection()
695 : SyntheticSection(segment_names::text
, section_names::stubs
) {
696 flags
= S_SYMBOL_STUBS
| S_ATTR_SOME_INSTRUCTIONS
| S_ATTR_PURE_INSTRUCTIONS
;
697 // The stubs section comprises machine instructions, which are aligned to
698 // 4 bytes on the archs we care about.
700 reserved2
= target
->stubSize
;
703 uint64_t StubsSection::getSize() const {
704 return entries
.size() * target
->stubSize
;
707 void StubsSection::writeTo(uint8_t *buf
) const {
709 for (const Symbol
*sym
: entries
) {
711 config
->emitChainedFixups
? sym
->getGotVA() : sym
->getLazyPtrVA();
712 target
->writeStub(buf
+ off
, *sym
, pointerVA
);
713 off
+= target
->stubSize
;
717 void StubsSection::finalize() { isFinal
= true; }
719 static void addBindingsForStub(Symbol
*sym
) {
720 assert(!config
->emitChainedFixups
);
721 if (auto *dysym
= dyn_cast
<DylibSymbol
>(sym
)) {
722 if (sym
->isWeakDef()) {
723 in
.binding
->addEntry(dysym
, in
.lazyPointers
->isec
,
724 sym
->stubsIndex
* target
->wordSize
);
725 in
.weakBinding
->addEntry(sym
, in
.lazyPointers
->isec
,
726 sym
->stubsIndex
* target
->wordSize
);
728 in
.lazyBinding
->addEntry(dysym
);
730 } else if (auto *defined
= dyn_cast
<Defined
>(sym
)) {
731 if (defined
->isExternalWeakDef()) {
732 in
.rebase
->addEntry(in
.lazyPointers
->isec
,
733 sym
->stubsIndex
* target
->wordSize
);
734 in
.weakBinding
->addEntry(sym
, in
.lazyPointers
->isec
,
735 sym
->stubsIndex
* target
->wordSize
);
736 } else if (defined
->interposable
) {
737 in
.lazyBinding
->addEntry(sym
);
739 llvm_unreachable("invalid stub target");
742 llvm_unreachable("invalid stub target symbol type");
746 void StubsSection::addEntry(Symbol
*sym
) {
747 bool inserted
= entries
.insert(sym
);
749 sym
->stubsIndex
= entries
.size() - 1;
751 if (config
->emitChainedFixups
)
752 in
.got
->addEntry(sym
);
754 addBindingsForStub(sym
);
758 StubHelperSection::StubHelperSection()
759 : SyntheticSection(segment_names::text
, section_names::stubHelper
) {
760 flags
= S_ATTR_SOME_INSTRUCTIONS
| S_ATTR_PURE_INSTRUCTIONS
;
761 align
= 4; // This section comprises machine instructions
764 uint64_t StubHelperSection::getSize() const {
765 return target
->stubHelperHeaderSize
+
766 in
.lazyBinding
->getEntries().size() * target
->stubHelperEntrySize
;
769 bool StubHelperSection::isNeeded() const { return in
.lazyBinding
->isNeeded(); }
771 void StubHelperSection::writeTo(uint8_t *buf
) const {
772 target
->writeStubHelperHeader(buf
);
773 size_t off
= target
->stubHelperHeaderSize
;
774 for (const Symbol
*sym
: in
.lazyBinding
->getEntries()) {
775 target
->writeStubHelperEntry(buf
+ off
, *sym
, addr
+ off
);
776 off
+= target
->stubHelperEntrySize
;
780 void StubHelperSection::setUp() {
781 Symbol
*binder
= symtab
->addUndefined("dyld_stub_binder", /*file=*/nullptr,
782 /*isWeakRef=*/false);
783 if (auto *undefined
= dyn_cast
<Undefined
>(binder
))
784 treatUndefinedSymbol(*undefined
,
785 "lazy binding (normally in libSystem.dylib)");
787 // treatUndefinedSymbol() can replace binder with a DylibSymbol; re-check.
788 stubBinder
= dyn_cast_or_null
<DylibSymbol
>(binder
);
789 if (stubBinder
== nullptr)
792 in
.got
->addEntry(stubBinder
);
794 in
.imageLoaderCache
->parent
=
795 ConcatOutputSection::getOrCreateForInput(in
.imageLoaderCache
);
796 inputSections
.push_back(in
.imageLoaderCache
);
797 // Since this isn't in the symbol table or in any input file, the noDeadStrip
798 // argument doesn't matter.
800 make
<Defined
>("__dyld_private", nullptr, in
.imageLoaderCache
, 0, 0,
802 /*isExternal=*/false, /*isPrivateExtern=*/false,
803 /*includeInSymtab=*/true,
804 /*isReferencedDynamically=*/false,
805 /*noDeadStrip=*/false);
806 dyldPrivate
->used
= true;
809 ObjCStubsSection::ObjCStubsSection()
810 : SyntheticSection(segment_names::text
, section_names::objcStubs
) {
811 flags
= S_ATTR_SOME_INSTRUCTIONS
| S_ATTR_PURE_INSTRUCTIONS
;
812 align
= target
->objcStubsAlignment
;
815 void ObjCStubsSection::addEntry(Symbol
*sym
) {
816 assert(sym
->getName().starts_with(symbolPrefix
) && "not an objc stub");
817 StringRef methname
= sym
->getName().drop_front(symbolPrefix
.size());
819 in
.objcMethnameSection
->getStringOffset(methname
).outSecOff
);
820 Defined
*newSym
= replaceSymbol
<Defined
>(
821 sym
, sym
->getName(), nullptr, isec
,
822 /*value=*/symbols
.size() * target
->objcStubsFastSize
,
823 /*size=*/target
->objcStubsFastSize
,
824 /*isWeakDef=*/false, /*isExternal=*/true, /*isPrivateExtern=*/true,
825 /*includeInSymtab=*/true, /*isReferencedDynamically=*/false,
826 /*noDeadStrip=*/false);
827 symbols
.push_back(newSym
);
830 void ObjCStubsSection::setUp() {
831 Symbol
*objcMsgSend
= symtab
->addUndefined("_objc_msgSend", /*file=*/nullptr,
832 /*isWeakRef=*/false);
833 objcMsgSend
->used
= true;
834 in
.got
->addEntry(objcMsgSend
);
835 assert(objcMsgSend
->isInGot());
836 objcMsgSendGotIndex
= objcMsgSend
->gotIndex
;
838 size_t size
= offsets
.size() * target
->wordSize
;
839 uint8_t *selrefsData
= bAlloc().Allocate
<uint8_t>(size
);
840 for (size_t i
= 0, n
= offsets
.size(); i
< n
; ++i
)
841 write64le(&selrefsData
[i
* target
->wordSize
], offsets
[i
]);
844 makeSyntheticInputSection(segment_names::data
, section_names::objcSelrefs
,
845 S_LITERAL_POINTERS
| S_ATTR_NO_DEAD_STRIP
,
846 ArrayRef
<uint8_t>{selrefsData
, size
},
847 /*align=*/target
->wordSize
);
848 in
.objcSelrefs
->live
= true;
850 for (size_t i
= 0, n
= offsets
.size(); i
< n
; ++i
) {
851 in
.objcSelrefs
->relocs
.push_back(
852 {/*type=*/target
->unsignedRelocType
,
853 /*pcrel=*/false, /*length=*/3,
854 /*offset=*/static_cast<uint32_t>(i
* target
->wordSize
),
855 /*addend=*/offsets
[i
] * in
.objcMethnameSection
->align
,
856 /*referent=*/in
.objcMethnameSection
->isec
});
859 in
.objcSelrefs
->parent
=
860 ConcatOutputSection::getOrCreateForInput(in
.objcSelrefs
);
861 inputSections
.push_back(in
.objcSelrefs
);
862 in
.objcSelrefs
->isFinal
= true;
865 uint64_t ObjCStubsSection::getSize() const {
866 return target
->objcStubsFastSize
* symbols
.size();
869 void ObjCStubsSection::writeTo(uint8_t *buf
) const {
870 assert(in
.objcSelrefs
->live
);
871 assert(in
.objcSelrefs
->isFinal
);
873 uint64_t stubOffset
= 0;
874 for (size_t i
= 0, n
= symbols
.size(); i
< n
; ++i
) {
875 Defined
*sym
= symbols
[i
];
876 target
->writeObjCMsgSendStub(buf
+ stubOffset
, sym
, in
.objcStubs
->addr
,
877 stubOffset
, in
.objcSelrefs
->getVA(), i
,
878 in
.got
->addr
, objcMsgSendGotIndex
);
879 stubOffset
+= target
->objcStubsFastSize
;
883 LazyPointerSection::LazyPointerSection()
884 : SyntheticSection(segment_names::data
, section_names::lazySymbolPtr
) {
885 align
= target
->wordSize
;
886 flags
= S_LAZY_SYMBOL_POINTERS
;
889 uint64_t LazyPointerSection::getSize() const {
890 return in
.stubs
->getEntries().size() * target
->wordSize
;
893 bool LazyPointerSection::isNeeded() const {
894 return !in
.stubs
->getEntries().empty();
897 void LazyPointerSection::writeTo(uint8_t *buf
) const {
899 for (const Symbol
*sym
: in
.stubs
->getEntries()) {
900 if (const auto *dysym
= dyn_cast
<DylibSymbol
>(sym
)) {
901 if (dysym
->hasStubsHelper()) {
902 uint64_t stubHelperOffset
=
903 target
->stubHelperHeaderSize
+
904 dysym
->stubsHelperIndex
* target
->stubHelperEntrySize
;
905 write64le(buf
+ off
, in
.stubHelper
->addr
+ stubHelperOffset
);
908 write64le(buf
+ off
, sym
->getVA());
910 off
+= target
->wordSize
;
914 LazyBindingSection::LazyBindingSection()
915 : LinkEditSection(segment_names::linkEdit
, section_names::lazyBinding
) {}
917 void LazyBindingSection::finalizeContents() {
918 // TODO: Just precompute output size here instead of writing to a temporary
920 for (Symbol
*sym
: entries
)
921 sym
->lazyBindOffset
= encode(*sym
);
924 void LazyBindingSection::writeTo(uint8_t *buf
) const {
925 memcpy(buf
, contents
.data(), contents
.size());
928 void LazyBindingSection::addEntry(Symbol
*sym
) {
929 assert(!config
->emitChainedFixups
&& "Chained fixups always bind eagerly");
930 if (entries
.insert(sym
)) {
931 sym
->stubsHelperIndex
= entries
.size() - 1;
932 in
.rebase
->addEntry(in
.lazyPointers
->isec
,
933 sym
->stubsIndex
* target
->wordSize
);
937 // Unlike the non-lazy binding section, the bind opcodes in this section aren't
938 // interpreted all at once. Rather, dyld will start interpreting opcodes at a
939 // given offset, typically only binding a single symbol before it finds a
940 // BIND_OPCODE_DONE terminator. As such, unlike in the non-lazy-binding case,
941 // we cannot encode just the differences between symbols; we have to emit the
942 // complete bind information for each symbol.
943 uint32_t LazyBindingSection::encode(const Symbol
&sym
) {
944 uint32_t opstreamOffset
= contents
.size();
945 OutputSegment
*dataSeg
= in
.lazyPointers
->parent
;
946 os
<< static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
|
949 in
.lazyPointers
->addr
- dataSeg
->addr
+ sym
.stubsIndex
* target
->wordSize
;
950 encodeULEB128(offset
, os
);
951 encodeDylibOrdinal(ordinalForSymbol(sym
), os
);
953 uint8_t flags
= BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
;
955 flags
|= BIND_SYMBOL_FLAGS_WEAK_IMPORT
;
957 os
<< flags
<< sym
.getName() << '\0'
958 << static_cast<uint8_t>(BIND_OPCODE_DO_BIND
)
959 << static_cast<uint8_t>(BIND_OPCODE_DONE
);
960 return opstreamOffset
;
963 ExportSection::ExportSection()
964 : LinkEditSection(segment_names::linkEdit
, section_names::export_
) {}
966 void ExportSection::finalizeContents() {
967 trieBuilder
.setImageBase(in
.header
->addr
);
968 for (const Symbol
*sym
: symtab
->getSymbols()) {
969 if (const auto *defined
= dyn_cast
<Defined
>(sym
)) {
970 if (defined
->privateExtern
|| !defined
->isLive())
972 trieBuilder
.addSymbol(*defined
);
973 hasWeakSymbol
= hasWeakSymbol
|| sym
->isWeakDef();
974 } else if (auto *dysym
= dyn_cast
<DylibSymbol
>(sym
)) {
975 if (dysym
->shouldReexport
)
976 trieBuilder
.addSymbol(*dysym
);
979 size
= trieBuilder
.build();
982 void ExportSection::writeTo(uint8_t *buf
) const { trieBuilder
.writeTo(buf
); }
984 DataInCodeSection::DataInCodeSection()
985 : LinkEditSection(segment_names::linkEdit
, section_names::dataInCode
) {}
988 static std::vector
<MachO::data_in_code_entry
> collectDataInCodeEntries() {
989 std::vector
<MachO::data_in_code_entry
> dataInCodeEntries
;
990 for (const InputFile
*inputFile
: inputFiles
) {
991 if (!isa
<ObjFile
>(inputFile
))
993 const ObjFile
*objFile
= cast
<ObjFile
>(inputFile
);
994 ArrayRef
<MachO::data_in_code_entry
> entries
= objFile
->getDataInCode();
998 assert(is_sorted(entries
, [](const data_in_code_entry
&lhs
,
999 const data_in_code_entry
&rhs
) {
1000 return lhs
.offset
< rhs
.offset
;
1002 // For each code subsection find 'data in code' entries residing in it.
1003 // Compute the new offset values as
1004 // <offset within subsection> + <subsection address> - <__TEXT address>.
1005 for (const Section
*section
: objFile
->sections
) {
1006 for (const Subsection
&subsec
: section
->subsections
) {
1007 const InputSection
*isec
= subsec
.isec
;
1008 if (!isCodeSection(isec
))
1010 if (cast
<ConcatInputSection
>(isec
)->shouldOmitFromOutput())
1012 const uint64_t beginAddr
= section
->addr
+ subsec
.offset
;
1013 auto it
= llvm::lower_bound(
1015 [](const MachO::data_in_code_entry
&entry
, uint64_t addr
) {
1016 return entry
.offset
< addr
;
1018 const uint64_t endAddr
= beginAddr
+ isec
->getSize();
1019 for (const auto end
= entries
.end();
1020 it
!= end
&& it
->offset
+ it
->length
<= endAddr
; ++it
)
1021 dataInCodeEntries
.push_back(
1022 {static_cast<uint32_t>(isec
->getVA(it
->offset
- beginAddr
) -
1024 it
->length
, it
->kind
});
1029 // ld64 emits the table in sorted order too.
1030 llvm::sort(dataInCodeEntries
,
1031 [](const data_in_code_entry
&lhs
, const data_in_code_entry
&rhs
) {
1032 return lhs
.offset
< rhs
.offset
;
1034 return dataInCodeEntries
;
1037 void DataInCodeSection::finalizeContents() {
1038 entries
= target
->wordSize
== 8 ? collectDataInCodeEntries
<LP64
>()
1039 : collectDataInCodeEntries
<ILP32
>();
1042 void DataInCodeSection::writeTo(uint8_t *buf
) const {
1043 if (!entries
.empty())
1044 memcpy(buf
, entries
.data(), getRawSize());
1047 FunctionStartsSection::FunctionStartsSection()
1048 : LinkEditSection(segment_names::linkEdit
, section_names::functionStarts
) {}
1050 void FunctionStartsSection::finalizeContents() {
1051 raw_svector_ostream os
{contents
};
1052 std::vector
<uint64_t> addrs
;
1053 for (const InputFile
*file
: inputFiles
) {
1054 if (auto *objFile
= dyn_cast
<ObjFile
>(file
)) {
1055 for (const Symbol
*sym
: objFile
->symbols
) {
1056 if (const auto *defined
= dyn_cast_or_null
<Defined
>(sym
)) {
1057 if (!defined
->isec
|| !isCodeSection(defined
->isec
) ||
1060 addrs
.push_back(defined
->getVA());
1066 uint64_t addr
= in
.header
->addr
;
1067 for (uint64_t nextAddr
: addrs
) {
1068 uint64_t delta
= nextAddr
- addr
;
1071 encodeULEB128(delta
, os
);
1077 void FunctionStartsSection::writeTo(uint8_t *buf
) const {
1078 memcpy(buf
, contents
.data(), contents
.size());
1081 SymtabSection::SymtabSection(StringTableSection
&stringTableSection
)
1082 : LinkEditSection(segment_names::linkEdit
, section_names::symbolTable
),
1083 stringTableSection(stringTableSection
) {}
1085 void SymtabSection::emitBeginSourceStab(StringRef sourceFile
) {
1086 StabsEntry
stab(N_SO
);
1087 stab
.strx
= stringTableSection
.addString(saver().save(sourceFile
));
1088 stabs
.emplace_back(std::move(stab
));
1091 void SymtabSection::emitEndSourceStab() {
1092 StabsEntry
stab(N_SO
);
1094 stabs
.emplace_back(std::move(stab
));
1097 void SymtabSection::emitObjectFileStab(ObjFile
*file
) {
1098 StabsEntry
stab(N_OSO
);
1099 stab
.sect
= target
->cpuSubtype
;
1100 SmallString
<261> path(!file
->archiveName
.empty() ? file
->archiveName
1102 std::error_code ec
= sys::fs::make_absolute(path
);
1104 fatal("failed to get absolute path for " + path
);
1106 if (!file
->archiveName
.empty())
1107 path
.append({"(", file
->getName(), ")"});
1109 StringRef adjustedPath
= saver().save(path
.str());
1110 adjustedPath
.consume_front(config
->osoPrefix
);
1112 stab
.strx
= stringTableSection
.addString(adjustedPath
);
1114 stab
.value
= file
->modTime
;
1115 stabs
.emplace_back(std::move(stab
));
1118 void SymtabSection::emitEndFunStab(Defined
*defined
) {
1119 StabsEntry
stab(N_FUN
);
1120 stab
.value
= defined
->size
;
1121 stabs
.emplace_back(std::move(stab
));
1124 void SymtabSection::emitStabs() {
1125 if (config
->omitDebugInfo
)
1128 for (const std::string
&s
: config
->astPaths
) {
1129 StabsEntry
astStab(N_AST
);
1130 astStab
.strx
= stringTableSection
.addString(s
);
1131 stabs
.emplace_back(std::move(astStab
));
1134 // Cache the file ID for each symbol in an std::pair for faster sorting.
1135 using SortingPair
= std::pair
<Defined
*, int>;
1136 std::vector
<SortingPair
> symbolsNeedingStabs
;
1137 for (const SymtabEntry
&entry
:
1138 concat
<SymtabEntry
>(localSymbols
, externalSymbols
)) {
1139 Symbol
*sym
= entry
.sym
;
1140 assert(sym
->isLive() &&
1141 "dead symbols should not be in localSymbols, externalSymbols");
1142 if (auto *defined
= dyn_cast
<Defined
>(sym
)) {
1143 // Excluded symbols should have been filtered out in finalizeContents().
1144 assert(defined
->includeInSymtab
);
1146 if (defined
->isAbsolute())
1149 // Constant-folded symbols go in the executable's symbol table, but don't
1150 // get a stabs entry.
1151 if (defined
->wasIdenticalCodeFolded
)
1154 ObjFile
*file
= defined
->getObjectFile();
1155 if (!file
|| !file
->compileUnit
)
1158 symbolsNeedingStabs
.emplace_back(defined
, defined
->isec
->getFile()->id
);
1162 llvm::stable_sort(symbolsNeedingStabs
,
1163 [&](const SortingPair
&a
, const SortingPair
&b
) {
1164 return a
.second
< b
.second
;
1167 // Emit STABS symbols so that dsymutil and/or the debugger can map address
1168 // regions in the final binary to the source and object files from which they
1170 InputFile
*lastFile
= nullptr;
1171 for (SortingPair
&pair
: symbolsNeedingStabs
) {
1172 Defined
*defined
= pair
.first
;
1173 InputSection
*isec
= defined
->isec
;
1174 ObjFile
*file
= cast
<ObjFile
>(isec
->getFile());
1176 if (lastFile
== nullptr || lastFile
!= file
) {
1177 if (lastFile
!= nullptr)
1178 emitEndSourceStab();
1181 emitBeginSourceStab(file
->sourceFile());
1182 emitObjectFileStab(file
);
1186 symStab
.sect
= defined
->isec
->parent
->index
;
1187 symStab
.strx
= stringTableSection
.addString(defined
->getName());
1188 symStab
.value
= defined
->getVA();
1190 if (isCodeSection(isec
)) {
1191 symStab
.type
= N_FUN
;
1192 stabs
.emplace_back(std::move(symStab
));
1193 emitEndFunStab(defined
);
1195 symStab
.type
= defined
->isExternal() ? N_GSYM
: N_STSYM
;
1196 stabs
.emplace_back(std::move(symStab
));
1201 emitEndSourceStab();
1204 void SymtabSection::finalizeContents() {
1205 auto addSymbol
= [&](std::vector
<SymtabEntry
> &symbols
, Symbol
*sym
) {
1206 uint32_t strx
= stringTableSection
.addString(sym
->getName());
1207 symbols
.push_back({sym
, strx
});
1210 std::function
<void(Symbol
*)> localSymbolsHandler
;
1211 switch (config
->localSymbolsPresence
) {
1212 case SymtabPresence::All
:
1213 localSymbolsHandler
= [&](Symbol
*sym
) { addSymbol(localSymbols
, sym
); };
1215 case SymtabPresence::None
:
1216 localSymbolsHandler
= [&](Symbol
*) { /* Do nothing*/ };
1218 case SymtabPresence::SelectivelyIncluded
:
1219 localSymbolsHandler
= [&](Symbol
*sym
) {
1220 if (config
->localSymbolPatterns
.match(sym
->getName()))
1221 addSymbol(localSymbols
, sym
);
1224 case SymtabPresence::SelectivelyExcluded
:
1225 localSymbolsHandler
= [&](Symbol
*sym
) {
1226 if (!config
->localSymbolPatterns
.match(sym
->getName()))
1227 addSymbol(localSymbols
, sym
);
1232 // Local symbols aren't in the SymbolTable, so we walk the list of object
1233 // files to gather them.
1234 // But if `-x` is set, then we don't need to. localSymbolsHandler() will do
1235 // the right thing regardless, but this check is a perf optimization because
1236 // iterating through all the input files and their symbols is expensive.
1237 if (config
->localSymbolsPresence
!= SymtabPresence::None
) {
1238 for (const InputFile
*file
: inputFiles
) {
1239 if (auto *objFile
= dyn_cast
<ObjFile
>(file
)) {
1240 for (Symbol
*sym
: objFile
->symbols
) {
1241 if (auto *defined
= dyn_cast_or_null
<Defined
>(sym
)) {
1242 if (defined
->isExternal() || !defined
->isLive() ||
1243 !defined
->includeInSymtab
)
1245 localSymbolsHandler(sym
);
1252 // __dyld_private is a local symbol too. It's linker-created and doesn't
1253 // exist in any object file.
1254 if (in
.stubHelper
&& in
.stubHelper
->dyldPrivate
)
1255 localSymbolsHandler(in
.stubHelper
->dyldPrivate
);
1257 for (Symbol
*sym
: symtab
->getSymbols()) {
1260 if (auto *defined
= dyn_cast
<Defined
>(sym
)) {
1261 if (!defined
->includeInSymtab
)
1263 assert(defined
->isExternal());
1264 if (defined
->privateExtern
)
1265 localSymbolsHandler(defined
);
1267 addSymbol(externalSymbols
, defined
);
1268 } else if (auto *dysym
= dyn_cast
<DylibSymbol
>(sym
)) {
1269 if (dysym
->isReferenced())
1270 addSymbol(undefinedSymbols
, sym
);
1275 uint32_t symtabIndex
= stabs
.size();
1276 for (const SymtabEntry
&entry
:
1277 concat
<SymtabEntry
>(localSymbols
, externalSymbols
, undefinedSymbols
)) {
1278 entry
.sym
->symtabIndex
= symtabIndex
++;
1282 uint32_t SymtabSection::getNumSymbols() const {
1283 return stabs
.size() + localSymbols
.size() + externalSymbols
.size() +
1284 undefinedSymbols
.size();
1287 // This serves to hide (type-erase) the template parameter from SymtabSection.
1288 template <class LP
> class SymtabSectionImpl final
: public SymtabSection
{
1290 SymtabSectionImpl(StringTableSection
&stringTableSection
)
1291 : SymtabSection(stringTableSection
) {}
1292 uint64_t getRawSize() const override
;
1293 void writeTo(uint8_t *buf
) const override
;
1296 template <class LP
> uint64_t SymtabSectionImpl
<LP
>::getRawSize() const {
1297 return getNumSymbols() * sizeof(typename
LP::nlist
);
1300 template <class LP
> void SymtabSectionImpl
<LP
>::writeTo(uint8_t *buf
) const {
1301 auto *nList
= reinterpret_cast<typename
LP::nlist
*>(buf
);
1302 // Emit the stabs entries before the "real" symbols. We cannot emit them
1303 // after as that would render Symbol::symtabIndex inaccurate.
1304 for (const StabsEntry
&entry
: stabs
) {
1305 nList
->n_strx
= entry
.strx
;
1306 nList
->n_type
= entry
.type
;
1307 nList
->n_sect
= entry
.sect
;
1308 nList
->n_desc
= entry
.desc
;
1309 nList
->n_value
= entry
.value
;
1313 for (const SymtabEntry
&entry
: concat
<const SymtabEntry
>(
1314 localSymbols
, externalSymbols
, undefinedSymbols
)) {
1315 nList
->n_strx
= entry
.strx
;
1316 // TODO populate n_desc with more flags
1317 if (auto *defined
= dyn_cast
<Defined
>(entry
.sym
)) {
1319 if (defined
->privateExtern
) {
1320 // Private external -- dylib scoped symbol.
1321 // Promote to non-external at link time.
1323 } else if (defined
->isExternal()) {
1324 // Normal global symbol.
1327 // TU-local symbol from localSymbols.
1331 if (defined
->isAbsolute()) {
1332 nList
->n_type
= scope
| N_ABS
;
1333 nList
->n_sect
= NO_SECT
;
1334 nList
->n_value
= defined
->value
;
1336 nList
->n_type
= scope
| N_SECT
;
1337 nList
->n_sect
= defined
->isec
->parent
->index
;
1338 // For the N_SECT symbol type, n_value is the address of the symbol
1339 nList
->n_value
= defined
->getVA();
1341 nList
->n_desc
|= defined
->isExternalWeakDef() ? N_WEAK_DEF
: 0;
1343 defined
->referencedDynamically
? REFERENCED_DYNAMICALLY
: 0;
1344 } else if (auto *dysym
= dyn_cast
<DylibSymbol
>(entry
.sym
)) {
1345 uint16_t n_desc
= nList
->n_desc
;
1346 int16_t ordinal
= ordinalForDylibSymbol(*dysym
);
1347 if (ordinal
== BIND_SPECIAL_DYLIB_FLAT_LOOKUP
)
1348 SET_LIBRARY_ORDINAL(n_desc
, DYNAMIC_LOOKUP_ORDINAL
);
1349 else if (ordinal
== BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE
)
1350 SET_LIBRARY_ORDINAL(n_desc
, EXECUTABLE_ORDINAL
);
1352 assert(ordinal
> 0);
1353 SET_LIBRARY_ORDINAL(n_desc
, static_cast<uint8_t>(ordinal
));
1356 nList
->n_type
= N_EXT
;
1357 n_desc
|= dysym
->isWeakDef() ? N_WEAK_DEF
: 0;
1358 n_desc
|= dysym
->isWeakRef() ? N_WEAK_REF
: 0;
1359 nList
->n_desc
= n_desc
;
1367 macho::makeSymtabSection(StringTableSection
&stringTableSection
) {
1368 return make
<SymtabSectionImpl
<LP
>>(stringTableSection
);
1371 IndirectSymtabSection::IndirectSymtabSection()
1372 : LinkEditSection(segment_names::linkEdit
,
1373 section_names::indirectSymbolTable
) {}
1375 uint32_t IndirectSymtabSection::getNumSymbols() const {
1376 uint32_t size
= in
.got
->getEntries().size() +
1377 in
.tlvPointers
->getEntries().size() +
1378 in
.stubs
->getEntries().size();
1379 if (!config
->emitChainedFixups
)
1380 size
+= in
.stubs
->getEntries().size();
1384 bool IndirectSymtabSection::isNeeded() const {
1385 return in
.got
->isNeeded() || in
.tlvPointers
->isNeeded() ||
1386 in
.stubs
->isNeeded();
1389 void IndirectSymtabSection::finalizeContents() {
1391 in
.got
->reserved1
= off
;
1392 off
+= in
.got
->getEntries().size();
1393 in
.tlvPointers
->reserved1
= off
;
1394 off
+= in
.tlvPointers
->getEntries().size();
1395 in
.stubs
->reserved1
= off
;
1396 if (in
.lazyPointers
) {
1397 off
+= in
.stubs
->getEntries().size();
1398 in
.lazyPointers
->reserved1
= off
;
1402 static uint32_t indirectValue(const Symbol
*sym
) {
1403 if (sym
->symtabIndex
== UINT32_MAX
)
1404 return INDIRECT_SYMBOL_LOCAL
;
1405 if (auto *defined
= dyn_cast
<Defined
>(sym
))
1406 if (defined
->privateExtern
)
1407 return INDIRECT_SYMBOL_LOCAL
;
1408 return sym
->symtabIndex
;
1411 void IndirectSymtabSection::writeTo(uint8_t *buf
) const {
1413 for (const Symbol
*sym
: in
.got
->getEntries()) {
1414 write32le(buf
+ off
* sizeof(uint32_t), indirectValue(sym
));
1417 for (const Symbol
*sym
: in
.tlvPointers
->getEntries()) {
1418 write32le(buf
+ off
* sizeof(uint32_t), indirectValue(sym
));
1421 for (const Symbol
*sym
: in
.stubs
->getEntries()) {
1422 write32le(buf
+ off
* sizeof(uint32_t), indirectValue(sym
));
1426 if (in
.lazyPointers
) {
1427 // There is a 1:1 correspondence between stubs and LazyPointerSection
1428 // entries. But giving __stubs and __la_symbol_ptr the same reserved1
1429 // (the offset into the indirect symbol table) so that they both refer
1430 // to the same range of offsets confuses `strip`, so write the stubs
1431 // symbol table offsets a second time.
1432 for (const Symbol
*sym
: in
.stubs
->getEntries()) {
1433 write32le(buf
+ off
* sizeof(uint32_t), indirectValue(sym
));
1439 StringTableSection::StringTableSection()
1440 : LinkEditSection(segment_names::linkEdit
, section_names::stringTable
) {}
1442 uint32_t StringTableSection::addString(StringRef str
) {
1443 uint32_t strx
= size
;
1444 strings
.push_back(str
); // TODO: consider deduplicating strings
1445 size
+= str
.size() + 1; // account for null terminator
1449 void StringTableSection::writeTo(uint8_t *buf
) const {
1451 for (StringRef str
: strings
) {
1452 memcpy(buf
+ off
, str
.data(), str
.size());
1453 off
+= str
.size() + 1; // account for null terminator
1457 static_assert((CodeSignatureSection::blobHeadersSize
% 8) == 0);
1458 static_assert((CodeSignatureSection::fixedHeadersSize
% 8) == 0);
1460 CodeSignatureSection::CodeSignatureSection()
1461 : LinkEditSection(segment_names::linkEdit
, section_names::codeSignature
) {
1462 align
= 16; // required by libstuff
1464 // XXX: This mimics LD64, where it uses the install-name as codesign
1465 // identifier, if available.
1466 if (!config
->installName
.empty())
1467 fileName
= config
->installName
;
1469 // FIXME: Consider using finalOutput instead of outputFile.
1470 fileName
= config
->outputFile
;
1472 size_t slashIndex
= fileName
.rfind("/");
1473 if (slashIndex
!= std::string::npos
)
1474 fileName
= fileName
.drop_front(slashIndex
+ 1);
1476 // NOTE: Any changes to these calculations should be repeated
1477 // in llvm-objcopy's MachOLayoutBuilder::layoutTail.
1478 allHeadersSize
= alignTo
<16>(fixedHeadersSize
+ fileName
.size() + 1);
1479 fileNamePad
= allHeadersSize
- fixedHeadersSize
- fileName
.size();
1482 uint32_t CodeSignatureSection::getBlockCount() const {
1483 return (fileOff
+ blockSize
- 1) / blockSize
;
1486 uint64_t CodeSignatureSection::getRawSize() const {
1487 return allHeadersSize
+ getBlockCount() * hashSize
;
1490 void CodeSignatureSection::writeHashes(uint8_t *buf
) const {
1491 // NOTE: Changes to this functionality should be repeated in llvm-objcopy's
1492 // MachOWriter::writeSignatureData.
1493 uint8_t *hashes
= buf
+ fileOff
+ allHeadersSize
;
1494 parallelFor(0, getBlockCount(), [&](size_t i
) {
1495 sha256(buf
+ i
* blockSize
,
1496 std::min(static_cast<size_t>(fileOff
- i
* blockSize
), blockSize
),
1497 hashes
+ i
* hashSize
);
1499 #if defined(__APPLE__)
1500 // This is macOS-specific work-around and makes no sense for any
1501 // other host OS. See https://openradar.appspot.com/FB8914231
1503 // The macOS kernel maintains a signature-verification cache to
1504 // quickly validate applications at time of execve(2). The trouble
1505 // is that for the kernel creates the cache entry at the time of the
1506 // mmap(2) call, before we have a chance to write either the code to
1507 // sign or the signature header+hashes. The fix is to invalidate
1508 // all cached data associated with the output file, thus discarding
1509 // the bogus prematurely-cached signature.
1510 msync(buf
, fileOff
+ getSize(), MS_INVALIDATE
);
1514 void CodeSignatureSection::writeTo(uint8_t *buf
) const {
1515 // NOTE: Changes to this functionality should be repeated in llvm-objcopy's
1516 // MachOWriter::writeSignatureData.
1517 uint32_t signatureSize
= static_cast<uint32_t>(getSize());
1518 auto *superBlob
= reinterpret_cast<CS_SuperBlob
*>(buf
);
1519 write32be(&superBlob
->magic
, CSMAGIC_EMBEDDED_SIGNATURE
);
1520 write32be(&superBlob
->length
, signatureSize
);
1521 write32be(&superBlob
->count
, 1);
1522 auto *blobIndex
= reinterpret_cast<CS_BlobIndex
*>(&superBlob
[1]);
1523 write32be(&blobIndex
->type
, CSSLOT_CODEDIRECTORY
);
1524 write32be(&blobIndex
->offset
, blobHeadersSize
);
1525 auto *codeDirectory
=
1526 reinterpret_cast<CS_CodeDirectory
*>(buf
+ blobHeadersSize
);
1527 write32be(&codeDirectory
->magic
, CSMAGIC_CODEDIRECTORY
);
1528 write32be(&codeDirectory
->length
, signatureSize
- blobHeadersSize
);
1529 write32be(&codeDirectory
->version
, CS_SUPPORTSEXECSEG
);
1530 write32be(&codeDirectory
->flags
, CS_ADHOC
| CS_LINKER_SIGNED
);
1531 write32be(&codeDirectory
->hashOffset
,
1532 sizeof(CS_CodeDirectory
) + fileName
.size() + fileNamePad
);
1533 write32be(&codeDirectory
->identOffset
, sizeof(CS_CodeDirectory
));
1534 codeDirectory
->nSpecialSlots
= 0;
1535 write32be(&codeDirectory
->nCodeSlots
, getBlockCount());
1536 write32be(&codeDirectory
->codeLimit
, fileOff
);
1537 codeDirectory
->hashSize
= static_cast<uint8_t>(hashSize
);
1538 codeDirectory
->hashType
= kSecCodeSignatureHashSHA256
;
1539 codeDirectory
->platform
= 0;
1540 codeDirectory
->pageSize
= blockSizeShift
;
1541 codeDirectory
->spare2
= 0;
1542 codeDirectory
->scatterOffset
= 0;
1543 codeDirectory
->teamOffset
= 0;
1544 codeDirectory
->spare3
= 0;
1545 codeDirectory
->codeLimit64
= 0;
1546 OutputSegment
*textSeg
= getOrCreateOutputSegment(segment_names::text
);
1547 write64be(&codeDirectory
->execSegBase
, textSeg
->fileOff
);
1548 write64be(&codeDirectory
->execSegLimit
, textSeg
->fileSize
);
1549 write64be(&codeDirectory
->execSegFlags
,
1550 config
->outputType
== MH_EXECUTE
? CS_EXECSEG_MAIN_BINARY
: 0);
1551 auto *id
= reinterpret_cast<char *>(&codeDirectory
[1]);
1552 memcpy(id
, fileName
.begin(), fileName
.size());
1553 memset(id
+ fileName
.size(), 0, fileNamePad
);
1556 CStringSection::CStringSection(const char *name
)
1557 : SyntheticSection(segment_names::text
, name
) {
1558 flags
= S_CSTRING_LITERALS
;
1561 void CStringSection::addInput(CStringInputSection
*isec
) {
1562 isec
->parent
= this;
1563 inputs
.push_back(isec
);
1564 if (isec
->align
> align
)
1565 align
= isec
->align
;
1568 void CStringSection::writeTo(uint8_t *buf
) const {
1569 for (const CStringInputSection
*isec
: inputs
) {
1570 for (const auto &[i
, piece
] : llvm::enumerate(isec
->pieces
)) {
1573 StringRef string
= isec
->getStringRef(i
);
1574 memcpy(buf
+ piece
.outSecOff
, string
.data(), string
.size());
1579 void CStringSection::finalizeContents() {
1580 uint64_t offset
= 0;
1581 for (CStringInputSection
*isec
: inputs
) {
1582 for (const auto &[i
, piece
] : llvm::enumerate(isec
->pieces
)) {
1585 // See comment above DeduplicatedCStringSection for how alignment is
1587 uint32_t pieceAlign
= 1
1588 << llvm::countr_zero(isec
->align
| piece
.inSecOff
);
1589 offset
= alignToPowerOf2(offset
, pieceAlign
);
1590 piece
.outSecOff
= offset
;
1591 isec
->isFinal
= true;
1592 StringRef string
= isec
->getStringRef(i
);
1593 offset
+= string
.size() + 1; // account for null terminator
1599 // Mergeable cstring literals are found under the __TEXT,__cstring section. In
1600 // contrast to ELF, which puts strings that need different alignments into
1601 // different sections, clang's Mach-O backend puts them all in one section.
1602 // Strings that need to be aligned have the .p2align directive emitted before
1603 // them, which simply translates into zero padding in the object file. In other
1604 // words, we have to infer the desired alignment of these cstrings from their
1607 // We differ slightly from ld64 in how we've chosen to align these cstrings.
1608 // Both LLD and ld64 preserve the number of trailing zeros in each cstring's
1609 // address in the input object files. When deduplicating identical cstrings,
1610 // both linkers pick the cstring whose address has more trailing zeros, and
1611 // preserve the alignment of that address in the final binary. However, ld64
1612 // goes a step further and also preserves the offset of the cstring from the
1613 // last section-aligned address. I.e. if a cstring is at offset 18 in the
1614 // input, with a section alignment of 16, then both LLD and ld64 will ensure the
1615 // final address is 2-byte aligned (since 18 == 16 + 2). But ld64 will also
1616 // ensure that the final address is of the form 16 * k + 2 for some k.
1618 // Note that ld64's heuristic means that a dedup'ed cstring's final address is
1619 // dependent on the order of the input object files. E.g. if in addition to the
1620 // cstring at offset 18 above, we have a duplicate one in another file with a
1621 // `.cstring` section alignment of 2 and an offset of zero, then ld64 will pick
1622 // the cstring from the object file earlier on the command line (since both have
1623 // the same number of trailing zeros in their address). So the final cstring may
1624 // either be at some address `16 * k + 2` or at some address `2 * k`.
1626 // I've opted not to follow this behavior primarily for implementation
1627 // simplicity, and secondarily to save a few more bytes. It's not clear to me
1628 // that preserving the section alignment + offset is ever necessary, and there
1629 // are many cases that are clearly redundant. In particular, if an x86_64 object
1630 // file contains some strings that are accessed via SIMD instructions, then the
1631 // .cstring section in the object file will be 16-byte-aligned (since SIMD
1632 // requires its operand addresses to be 16-byte aligned). However, there will
1633 // typically also be other cstrings in the same file that aren't used via SIMD
1634 // and don't need this alignment. They will be emitted at some arbitrary address
1635 // `A`, but ld64 will treat them as being 16-byte aligned with an offset of `16
1637 void DeduplicatedCStringSection::finalizeContents() {
1638 // Find the largest alignment required for each string.
1639 for (const CStringInputSection
*isec
: inputs
) {
1640 for (const auto &[i
, piece
] : llvm::enumerate(isec
->pieces
)) {
1643 auto s
= isec
->getCachedHashStringRef(i
);
1644 assert(isec
->align
!= 0);
1645 uint8_t trailingZeros
= llvm::countr_zero(isec
->align
| piece
.inSecOff
);
1646 auto it
= stringOffsetMap
.insert(
1647 std::make_pair(s
, StringOffset(trailingZeros
)));
1648 if (!it
.second
&& it
.first
->second
.trailingZeros
< trailingZeros
)
1649 it
.first
->second
.trailingZeros
= trailingZeros
;
1653 // Assign an offset for each string and save it to the corresponding
1654 // StringPieces for easy access.
1655 for (CStringInputSection
*isec
: inputs
) {
1656 for (const auto &[i
, piece
] : llvm::enumerate(isec
->pieces
)) {
1659 auto s
= isec
->getCachedHashStringRef(i
);
1660 auto it
= stringOffsetMap
.find(s
);
1661 assert(it
!= stringOffsetMap
.end());
1662 StringOffset
&offsetInfo
= it
->second
;
1663 if (offsetInfo
.outSecOff
== UINT64_MAX
) {
1664 offsetInfo
.outSecOff
=
1665 alignToPowerOf2(size
, 1ULL << offsetInfo
.trailingZeros
);
1667 offsetInfo
.outSecOff
+ s
.size() + 1; // account for null terminator
1669 piece
.outSecOff
= offsetInfo
.outSecOff
;
1671 isec
->isFinal
= true;
1675 void DeduplicatedCStringSection::writeTo(uint8_t *buf
) const {
1676 for (const auto &p
: stringOffsetMap
) {
1677 StringRef data
= p
.first
.val();
1678 uint64_t off
= p
.second
.outSecOff
;
1680 memcpy(buf
+ off
, data
.data(), data
.size());
1684 DeduplicatedCStringSection::StringOffset
1685 DeduplicatedCStringSection::getStringOffset(StringRef str
) const {
1686 // StringPiece uses 31 bits to store the hashes, so we replicate that
1687 uint32_t hash
= xxh3_64bits(str
) & 0x7fffffff;
1688 auto offset
= stringOffsetMap
.find(CachedHashStringRef(str
, hash
));
1689 assert(offset
!= stringOffsetMap
.end() &&
1690 "Looked-up strings should always exist in section");
1691 return offset
->second
;
1694 // This section is actually emitted as __TEXT,__const by ld64, but clang may
1695 // emit input sections of that name, and LLD doesn't currently support mixing
1696 // synthetic and concat-type OutputSections. To work around this, I've given
1697 // our merged-literals section a different name.
1698 WordLiteralSection::WordLiteralSection()
1699 : SyntheticSection(segment_names::text
, section_names::literals
) {
1703 void WordLiteralSection::addInput(WordLiteralInputSection
*isec
) {
1704 isec
->parent
= this;
1705 inputs
.push_back(isec
);
1708 void WordLiteralSection::finalizeContents() {
1709 for (WordLiteralInputSection
*isec
: inputs
) {
1710 // We do all processing of the InputSection here, so it will be effectively
1712 isec
->isFinal
= true;
1713 const uint8_t *buf
= isec
->data
.data();
1714 switch (sectionType(isec
->getFlags())) {
1715 case S_4BYTE_LITERALS
: {
1716 for (size_t off
= 0, e
= isec
->data
.size(); off
< e
; off
+= 4) {
1717 if (!isec
->isLive(off
))
1719 uint32_t value
= *reinterpret_cast<const uint32_t *>(buf
+ off
);
1720 literal4Map
.emplace(value
, literal4Map
.size());
1724 case S_8BYTE_LITERALS
: {
1725 for (size_t off
= 0, e
= isec
->data
.size(); off
< e
; off
+= 8) {
1726 if (!isec
->isLive(off
))
1728 uint64_t value
= *reinterpret_cast<const uint64_t *>(buf
+ off
);
1729 literal8Map
.emplace(value
, literal8Map
.size());
1733 case S_16BYTE_LITERALS
: {
1734 for (size_t off
= 0, e
= isec
->data
.size(); off
< e
; off
+= 16) {
1735 if (!isec
->isLive(off
))
1737 UInt128 value
= *reinterpret_cast<const UInt128
*>(buf
+ off
);
1738 literal16Map
.emplace(value
, literal16Map
.size());
1743 llvm_unreachable("invalid literal section type");
1748 void WordLiteralSection::writeTo(uint8_t *buf
) const {
1749 // Note that we don't attempt to do any endianness conversion in addInput(),
1750 // so we don't do it here either -- just write out the original value,
1752 for (const auto &p
: literal16Map
)
1753 memcpy(buf
+ p
.second
* 16, &p
.first
, 16);
1754 buf
+= literal16Map
.size() * 16;
1756 for (const auto &p
: literal8Map
)
1757 memcpy(buf
+ p
.second
* 8, &p
.first
, 8);
1758 buf
+= literal8Map
.size() * 8;
1760 for (const auto &p
: literal4Map
)
1761 memcpy(buf
+ p
.second
* 4, &p
.first
, 4);
1764 ObjCImageInfoSection::ObjCImageInfoSection()
1765 : SyntheticSection(segment_names::data
, section_names::objCImageInfo
) {}
1767 ObjCImageInfoSection::ImageInfo
1768 ObjCImageInfoSection::parseImageInfo(const InputFile
*file
) {
1770 ArrayRef
<uint8_t> data
= file
->objCImageInfo
;
1771 // The image info struct has the following layout:
1773 // uint32_t version;
1776 if (data
.size() < 8) {
1777 warn(toString(file
) + ": invalid __objc_imageinfo size");
1781 auto *buf
= reinterpret_cast<const uint32_t *>(data
.data());
1782 if (read32le(buf
) != 0) {
1783 warn(toString(file
) + ": invalid __objc_imageinfo version");
1787 uint32_t flags
= read32le(buf
+ 1);
1788 info
.swiftVersion
= (flags
>> 8) & 0xff;
1789 info
.hasCategoryClassProperties
= flags
& 0x40;
1793 static std::string
swiftVersionString(uint8_t version
) {
1806 return ("0x" + Twine::utohexstr(version
)).str();
1810 // Validate each object file's __objc_imageinfo and use them to generate the
1811 // image info for the output binary. Only two pieces of info are relevant:
1812 // 1. The Swift version (should be identical across inputs)
1813 // 2. `bool hasCategoryClassProperties` (true only if true for all inputs)
1814 void ObjCImageInfoSection::finalizeContents() {
1815 assert(files
.size() != 0); // should have already been checked via isNeeded()
1817 info
.hasCategoryClassProperties
= true;
1818 const InputFile
*firstFile
;
1819 for (const InputFile
*file
: files
) {
1820 ImageInfo inputInfo
= parseImageInfo(file
);
1821 info
.hasCategoryClassProperties
&= inputInfo
.hasCategoryClassProperties
;
1823 // swiftVersion 0 means no Swift is present, so no version checking required
1824 if (inputInfo
.swiftVersion
== 0)
1827 if (info
.swiftVersion
!= 0 && info
.swiftVersion
!= inputInfo
.swiftVersion
) {
1828 error("Swift version mismatch: " + toString(firstFile
) + " has version " +
1829 swiftVersionString(info
.swiftVersion
) + " but " + toString(file
) +
1830 " has version " + swiftVersionString(inputInfo
.swiftVersion
));
1832 info
.swiftVersion
= inputInfo
.swiftVersion
;
1838 void ObjCImageInfoSection::writeTo(uint8_t *buf
) const {
1839 uint32_t flags
= info
.hasCategoryClassProperties
? 0x40 : 0x0;
1840 flags
|= info
.swiftVersion
<< 8;
1841 write32le(buf
+ 4, flags
);
1844 InitOffsetsSection::InitOffsetsSection()
1845 : SyntheticSection(segment_names::text
, section_names::initOffsets
) {
1846 flags
= S_INIT_FUNC_OFFSETS
;
1847 align
= 4; // This section contains 32-bit integers.
1850 uint64_t InitOffsetsSection::getSize() const {
1852 for (const ConcatInputSection
*isec
: sections
)
1853 count
+= isec
->relocs
.size();
1854 return count
* sizeof(uint32_t);
1857 void InitOffsetsSection::writeTo(uint8_t *buf
) const {
1858 // FIXME: Add function specified by -init when that argument is implemented.
1859 for (ConcatInputSection
*isec
: sections
) {
1860 for (const Reloc
&rel
: isec
->relocs
) {
1861 const Symbol
*referent
= rel
.referent
.dyn_cast
<Symbol
*>();
1862 assert(referent
&& "section relocation should have been rejected");
1863 uint64_t offset
= referent
->getVA() - in
.header
->addr
;
1864 // FIXME: Can we handle this gracefully?
1865 if (offset
> UINT32_MAX
)
1866 fatal(isec
->getLocation(rel
.offset
) + ": offset to initializer " +
1867 referent
->getName() + " (" + utohexstr(offset
) +
1868 ") does not fit in 32 bits");
1870 // Entries need to be added in the order they appear in the section, but
1871 // relocations aren't guaranteed to be sorted.
1872 size_t index
= rel
.offset
>> target
->p2WordSize
;
1873 write32le(&buf
[index
* sizeof(uint32_t)], offset
);
1875 buf
+= isec
->relocs
.size() * sizeof(uint32_t);
1879 // The inputs are __mod_init_func sections, which contain pointers to
1880 // initializer functions, therefore all relocations should be of the UNSIGNED
1881 // type. InitOffsetsSection stores offsets, so if the initializer's address is
1882 // not known at link time, stub-indirection has to be used.
1883 void InitOffsetsSection::setUp() {
1884 for (const ConcatInputSection
*isec
: sections
) {
1885 for (const Reloc
&rel
: isec
->relocs
) {
1886 RelocAttrs attrs
= target
->getRelocAttrs(rel
.type
);
1887 if (!attrs
.hasAttr(RelocAttrBits::UNSIGNED
))
1888 error(isec
->getLocation(rel
.offset
) +
1889 ": unsupported relocation type: " + attrs
.name
);
1890 if (rel
.addend
!= 0)
1891 error(isec
->getLocation(rel
.offset
) +
1892 ": relocation addend is not representable in __init_offsets");
1893 if (rel
.referent
.is
<InputSection
*>())
1894 error(isec
->getLocation(rel
.offset
) +
1895 ": unexpected section relocation");
1897 Symbol
*sym
= rel
.referent
.dyn_cast
<Symbol
*>();
1898 if (auto *undefined
= dyn_cast
<Undefined
>(sym
))
1899 treatUndefinedSymbol(*undefined
, isec
, rel
.offset
);
1900 if (needsBinding(sym
))
1901 in
.stubs
->addEntry(sym
);
1906 void macho::createSyntheticSymbols() {
1907 auto addHeaderSymbol
= [](const char *name
) {
1908 symtab
->addSynthetic(name
, in
.header
->isec
, /*value=*/0,
1909 /*isPrivateExtern=*/true, /*includeInSymtab=*/false,
1910 /*referencedDynamically=*/false);
1913 switch (config
->outputType
) {
1914 // FIXME: Assign the right address value for these symbols
1915 // (rather than 0). But we need to do that after assignAddresses().
1917 // If linking PIE, __mh_execute_header is a defined symbol in
1919 // Otherwise, it's an absolute symbol.
1921 symtab
->addSynthetic("__mh_execute_header", in
.header
->isec
, /*value=*/0,
1922 /*isPrivateExtern=*/false, /*includeInSymtab=*/true,
1923 /*referencedDynamically=*/true);
1925 symtab
->addSynthetic("__mh_execute_header", /*isec=*/nullptr, /*value=*/0,
1926 /*isPrivateExtern=*/false, /*includeInSymtab=*/true,
1927 /*referencedDynamically=*/true);
1930 // The following symbols are N_SECT symbols, even though the header is not
1931 // part of any section and that they are private to the bundle/dylib/object
1932 // they are part of.
1934 addHeaderSymbol("__mh_bundle_header");
1937 addHeaderSymbol("__mh_dylib_header");
1940 addHeaderSymbol("__mh_dylinker_header");
1943 addHeaderSymbol("__mh_object_header");
1946 llvm_unreachable("unexpected outputType");
1950 // The Itanium C++ ABI requires dylibs to pass a pointer to __cxa_atexit
1951 // which does e.g. cleanup of static global variables. The ABI document
1952 // says that the pointer can point to any address in one of the dylib's
1953 // segments, but in practice ld64 seems to set it to point to the header,
1954 // so that's what's implemented here.
1955 addHeaderSymbol("___dso_handle");
1958 ChainedFixupsSection::ChainedFixupsSection()
1959 : LinkEditSection(segment_names::linkEdit
, section_names::chainFixups
) {}
1961 bool ChainedFixupsSection::isNeeded() const {
1962 assert(config
->emitChainedFixups
);
1963 // dyld always expects LC_DYLD_CHAINED_FIXUPS to point to a valid
1964 // dyld_chained_fixups_header, so we create this section even if there aren't
1969 static bool needsWeakBind(const Symbol
&sym
) {
1970 if (auto *dysym
= dyn_cast
<DylibSymbol
>(&sym
))
1971 return dysym
->isWeakDef();
1972 if (auto *defined
= dyn_cast
<Defined
>(&sym
))
1973 return defined
->isExternalWeakDef();
1977 void ChainedFixupsSection::addBinding(const Symbol
*sym
,
1978 const InputSection
*isec
, uint64_t offset
,
1980 locations
.emplace_back(isec
, offset
);
1981 int64_t outlineAddend
= (addend
< 0 || addend
> 0xFF) ? addend
: 0;
1982 auto [it
, inserted
] = bindings
.insert(
1983 {{sym
, outlineAddend
}, static_cast<uint32_t>(bindings
.size())});
1986 symtabSize
+= sym
->getName().size() + 1;
1987 hasWeakBind
= hasWeakBind
|| needsWeakBind(*sym
);
1988 if (!isInt
<23>(outlineAddend
))
1989 needsLargeAddend
= true;
1990 else if (outlineAddend
!= 0)
1995 std::pair
<uint32_t, uint8_t>
1996 ChainedFixupsSection::getBinding(const Symbol
*sym
, int64_t addend
) const {
1997 int64_t outlineAddend
= (addend
< 0 || addend
> 0xFF) ? addend
: 0;
1998 auto it
= bindings
.find({sym
, outlineAddend
});
1999 assert(it
!= bindings
.end() && "binding not found in the imports table");
2000 if (outlineAddend
== 0)
2001 return {it
->second
, addend
};
2002 return {it
->second
, 0};
2005 static size_t writeImport(uint8_t *buf
, int format
, uint32_t libOrdinal
,
2006 bool weakRef
, uint32_t nameOffset
, int64_t addend
) {
2008 case DYLD_CHAINED_IMPORT
: {
2009 auto *import
= reinterpret_cast<dyld_chained_import
*>(buf
);
2010 import
->lib_ordinal
= libOrdinal
;
2011 import
->weak_import
= weakRef
;
2012 import
->name_offset
= nameOffset
;
2013 return sizeof(dyld_chained_import
);
2015 case DYLD_CHAINED_IMPORT_ADDEND
: {
2016 auto *import
= reinterpret_cast<dyld_chained_import_addend
*>(buf
);
2017 import
->lib_ordinal
= libOrdinal
;
2018 import
->weak_import
= weakRef
;
2019 import
->name_offset
= nameOffset
;
2020 import
->addend
= addend
;
2021 return sizeof(dyld_chained_import_addend
);
2023 case DYLD_CHAINED_IMPORT_ADDEND64
: {
2024 auto *import
= reinterpret_cast<dyld_chained_import_addend64
*>(buf
);
2025 import
->lib_ordinal
= libOrdinal
;
2026 import
->weak_import
= weakRef
;
2027 import
->name_offset
= nameOffset
;
2028 import
->addend
= addend
;
2029 return sizeof(dyld_chained_import_addend64
);
2032 llvm_unreachable("Unknown import format");
2036 size_t ChainedFixupsSection::SegmentInfo::getSize() const {
2037 assert(pageStarts
.size() > 0 && "SegmentInfo for segment with no fixups?");
2038 return alignTo
<8>(sizeof(dyld_chained_starts_in_segment
) +
2039 pageStarts
.back().first
* sizeof(uint16_t));
2042 size_t ChainedFixupsSection::SegmentInfo::writeTo(uint8_t *buf
) const {
2043 auto *segInfo
= reinterpret_cast<dyld_chained_starts_in_segment
*>(buf
);
2044 segInfo
->size
= getSize();
2045 segInfo
->page_size
= target
->getPageSize();
2046 // FIXME: Use DYLD_CHAINED_PTR_64_OFFSET on newer OS versions.
2047 segInfo
->pointer_format
= DYLD_CHAINED_PTR_64
;
2048 segInfo
->segment_offset
= oseg
->addr
- in
.header
->addr
;
2049 segInfo
->max_valid_pointer
= 0; // not used on 64-bit
2050 segInfo
->page_count
= pageStarts
.back().first
+ 1;
2052 uint16_t *starts
= segInfo
->page_start
;
2053 for (size_t i
= 0; i
< segInfo
->page_count
; ++i
)
2054 starts
[i
] = DYLD_CHAINED_PTR_START_NONE
;
2056 for (auto [pageIdx
, startAddr
] : pageStarts
)
2057 starts
[pageIdx
] = startAddr
;
2058 return segInfo
->size
;
2061 static size_t importEntrySize(int format
) {
2063 case DYLD_CHAINED_IMPORT
:
2064 return sizeof(dyld_chained_import
);
2065 case DYLD_CHAINED_IMPORT_ADDEND
:
2066 return sizeof(dyld_chained_import_addend
);
2067 case DYLD_CHAINED_IMPORT_ADDEND64
:
2068 return sizeof(dyld_chained_import_addend64
);
2070 llvm_unreachable("Unknown import format");
2074 // This is step 3 of the algorithm described in the class comment of
2075 // ChainedFixupsSection.
2077 // LC_DYLD_CHAINED_FIXUPS data consists of (in this order):
2078 // * A dyld_chained_fixups_header
2079 // * A dyld_chained_starts_in_image
2080 // * One dyld_chained_starts_in_segment per segment
2081 // * List of all imports (dyld_chained_import, dyld_chained_import_addend, or
2082 // dyld_chained_import_addend64)
2083 // * Names of imported symbols
2084 void ChainedFixupsSection::writeTo(uint8_t *buf
) const {
2085 auto *header
= reinterpret_cast<dyld_chained_fixups_header
*>(buf
);
2086 header
->fixups_version
= 0;
2087 header
->imports_count
= bindings
.size();
2088 header
->imports_format
= importFormat
;
2089 header
->symbols_format
= 0;
2091 buf
+= alignTo
<8>(sizeof(*header
));
2093 auto curOffset
= [&buf
, &header
]() -> uint32_t {
2094 return buf
- reinterpret_cast<uint8_t *>(header
);
2097 header
->starts_offset
= curOffset();
2099 auto *imageInfo
= reinterpret_cast<dyld_chained_starts_in_image
*>(buf
);
2100 imageInfo
->seg_count
= outputSegments
.size();
2101 uint32_t *segStarts
= imageInfo
->seg_info_offset
;
2103 // dyld_chained_starts_in_image ends in a flexible array member containing an
2104 // uint32_t for each segment. Leave room for it, and fill it via segStarts.
2105 buf
+= alignTo
<8>(offsetof(dyld_chained_starts_in_image
, seg_info_offset
) +
2106 outputSegments
.size() * sizeof(uint32_t));
2108 // Initialize all offsets to 0, which indicates that the segment does not have
2109 // fixups. Those that do have them will be filled in below.
2110 for (size_t i
= 0; i
< outputSegments
.size(); ++i
)
2113 for (const SegmentInfo
&seg
: fixupSegments
) {
2114 segStarts
[seg
.oseg
->index
] = curOffset() - header
->starts_offset
;
2115 buf
+= seg
.writeTo(buf
);
2118 // Write imports table.
2119 header
->imports_offset
= curOffset();
2120 uint64_t nameOffset
= 0;
2121 for (auto [import
, idx
] : bindings
) {
2122 const Symbol
&sym
= *import
.first
;
2123 int16_t libOrdinal
= needsWeakBind(sym
)
2124 ? (int64_t)BIND_SPECIAL_DYLIB_WEAK_LOOKUP
2125 : ordinalForSymbol(sym
);
2126 buf
+= writeImport(buf
, importFormat
, libOrdinal
, sym
.isWeakRef(),
2127 nameOffset
, import
.second
);
2128 nameOffset
+= sym
.getName().size() + 1;
2131 // Write imported symbol names.
2132 header
->symbols_offset
= curOffset();
2133 for (auto [import
, idx
] : bindings
) {
2134 StringRef name
= import
.first
->getName();
2135 memcpy(buf
, name
.data(), name
.size());
2136 buf
+= name
.size() + 1; // account for null terminator
2139 assert(curOffset() == getRawSize());
2142 // This is step 2 of the algorithm described in the class comment of
2143 // ChainedFixupsSection.
2144 void ChainedFixupsSection::finalizeContents() {
2145 assert(target
->wordSize
== 8 && "Only 64-bit platforms are supported");
2146 assert(config
->emitChainedFixups
);
2148 if (!isUInt
<32>(symtabSize
))
2149 error("cannot encode chained fixups: imported symbols table size " +
2150 Twine(symtabSize
) + " exceeds 4 GiB");
2152 if (needsLargeAddend
|| !isUInt
<23>(symtabSize
))
2153 importFormat
= DYLD_CHAINED_IMPORT_ADDEND64
;
2154 else if (needsAddend
)
2155 importFormat
= DYLD_CHAINED_IMPORT_ADDEND
;
2157 importFormat
= DYLD_CHAINED_IMPORT
;
2159 for (Location
&loc
: locations
)
2161 loc
.isec
->parent
->getSegmentOffset() + loc
.isec
->getOffset(loc
.offset
);
2163 llvm::sort(locations
, [](const Location
&a
, const Location
&b
) {
2164 const OutputSegment
*segA
= a
.isec
->parent
->parent
;
2165 const OutputSegment
*segB
= b
.isec
->parent
->parent
;
2167 return a
.offset
< b
.offset
;
2168 return segA
->addr
< segB
->addr
;
2171 auto sameSegment
= [](const Location
&a
, const Location
&b
) {
2172 return a
.isec
->parent
->parent
== b
.isec
->parent
->parent
;
2175 const uint64_t pageSize
= target
->getPageSize();
2176 for (size_t i
= 0, count
= locations
.size(); i
< count
;) {
2177 const Location
&firstLoc
= locations
[i
];
2178 fixupSegments
.emplace_back(firstLoc
.isec
->parent
->parent
);
2179 while (i
< count
&& sameSegment(locations
[i
], firstLoc
)) {
2180 uint32_t pageIdx
= locations
[i
].offset
/ pageSize
;
2181 fixupSegments
.back().pageStarts
.emplace_back(
2182 pageIdx
, locations
[i
].offset
% pageSize
);
2184 while (i
< count
&& sameSegment(locations
[i
], firstLoc
) &&
2185 locations
[i
].offset
/ pageSize
== pageIdx
)
2190 // Compute expected encoded size.
2191 size
= alignTo
<8>(sizeof(dyld_chained_fixups_header
));
2192 size
+= alignTo
<8>(offsetof(dyld_chained_starts_in_image
, seg_info_offset
) +
2193 outputSegments
.size() * sizeof(uint32_t));
2194 for (const SegmentInfo
&seg
: fixupSegments
)
2195 size
+= seg
.getSize();
2196 size
+= importEntrySize(importFormat
) * bindings
.size();
2200 template SymtabSection
*macho::makeSymtabSection
<LP64
>(StringTableSection
&);
2201 template SymtabSection
*macho::makeSymtabSection
<ILP32
>(StringTableSection
&);