1 //===- SyntheticSections.cpp ---------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "SyntheticSections.h"
10 #include "ConcatOutputSection.h"
12 #include "ExportTrie.h"
13 #include "InputFiles.h"
14 #include "MachOStructs.h"
16 #include "OutputSegment.h"
17 #include "SymbolTable.h"
20 #include "lld/Common/CommonLinkerContext.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/Config/llvm-config.h"
23 #include "llvm/Support/EndianStream.h"
24 #include "llvm/Support/FileSystem.h"
25 #include "llvm/Support/LEB128.h"
26 #include "llvm/Support/Parallel.h"
27 #include "llvm/Support/Path.h"
28 #include "llvm/Support/xxhash.h"
30 #if defined(__APPLE__)
33 #define COMMON_DIGEST_FOR_OPENSSL
34 #include <CommonCrypto/CommonDigest.h>
36 #include "llvm/Support/SHA256.h"
40 using namespace llvm::MachO
;
41 using namespace llvm::support
;
42 using namespace llvm::support::endian
;
44 using namespace lld::macho
;
46 // Reads `len` bytes at data and writes the 32-byte SHA256 checksum to `output`.
47 static void sha256(const uint8_t *data
, size_t len
, uint8_t *output
) {
48 #if defined(__APPLE__)
49 // FIXME: Make LLVM's SHA256 faster and use it unconditionally. See PR56121
50 // for some notes on this.
51 CC_SHA256(data
, len
, output
);
53 ArrayRef
<uint8_t> block(data
, len
);
54 std::array
<uint8_t, 32> hash
= SHA256::hash(block
);
55 static_assert(hash
.size() == CodeSignatureSection::hashSize
);
56 memcpy(output
, hash
.data(), hash
.size());
61 std::vector
<SyntheticSection
*> macho::syntheticSections
;
63 SyntheticSection::SyntheticSection(const char *segname
, const char *name
)
64 : OutputSection(SyntheticKind
, name
) {
65 std::tie(this->segname
, this->name
) = maybeRenameSection({segname
, name
});
66 isec
= makeSyntheticInputSection(segname
, name
);
68 syntheticSections
.push_back(this);
71 // dyld3's MachOLoaded::getSlide() assumes that the __TEXT segment starts
72 // from the beginning of the file (i.e. the header).
73 MachHeaderSection::MachHeaderSection()
74 : SyntheticSection(segment_names::text
, section_names::header
) {
75 // XXX: This is a hack. (See D97007)
76 // Setting the index to 1 to pretend that this section is the text
82 void MachHeaderSection::addLoadCommand(LoadCommand
*lc
) {
83 loadCommands
.push_back(lc
);
84 sizeOfCmds
+= lc
->getSize();
87 uint64_t MachHeaderSection::getSize() const {
88 uint64_t size
= target
->headerSize
+ sizeOfCmds
+ config
->headerPad
;
89 // If we are emitting an encryptable binary, our load commands must have a
90 // separate (non-encrypted) page to themselves.
91 if (config
->emitEncryptionInfo
)
92 size
= alignToPowerOf2(size
, target
->getPageSize());
96 static uint32_t cpuSubtype() {
97 uint32_t subtype
= target
->cpuSubtype
;
99 if (config
->outputType
== MH_EXECUTE
&& !config
->staticLink
&&
100 target
->cpuSubtype
== CPU_SUBTYPE_X86_64_ALL
&&
101 config
->platform() == PLATFORM_MACOS
&&
102 config
->platformInfo
.target
.MinDeployment
>= VersionTuple(10, 5))
103 subtype
|= CPU_SUBTYPE_LIB64
;
108 static bool hasWeakBinding() {
109 return config
->emitChainedFixups
? in
.chainedFixups
->hasWeakBinding()
110 : in
.weakBinding
->hasEntry();
113 static bool hasNonWeakDefinition() {
114 return config
->emitChainedFixups
? in
.chainedFixups
->hasNonWeakDefinition()
115 : in
.weakBinding
->hasNonWeakDefinition();
118 void MachHeaderSection::writeTo(uint8_t *buf
) const {
119 auto *hdr
= reinterpret_cast<mach_header
*>(buf
);
120 hdr
->magic
= target
->magic
;
121 hdr
->cputype
= target
->cpuType
;
122 hdr
->cpusubtype
= cpuSubtype();
123 hdr
->filetype
= config
->outputType
;
124 hdr
->ncmds
= loadCommands
.size();
125 hdr
->sizeofcmds
= sizeOfCmds
;
126 hdr
->flags
= MH_DYLDLINK
;
128 if (config
->namespaceKind
== NamespaceKind::twolevel
)
129 hdr
->flags
|= MH_NOUNDEFS
| MH_TWOLEVEL
;
131 if (config
->outputType
== MH_DYLIB
&& !config
->hasReexports
)
132 hdr
->flags
|= MH_NO_REEXPORTED_DYLIBS
;
134 if (config
->markDeadStrippableDylib
)
135 hdr
->flags
|= MH_DEAD_STRIPPABLE_DYLIB
;
137 if (config
->outputType
== MH_EXECUTE
&& config
->isPic
)
138 hdr
->flags
|= MH_PIE
;
140 if (config
->outputType
== MH_DYLIB
&& config
->applicationExtension
)
141 hdr
->flags
|= MH_APP_EXTENSION_SAFE
;
143 if (in
.exports
->hasWeakSymbol
|| hasNonWeakDefinition())
144 hdr
->flags
|= MH_WEAK_DEFINES
;
146 if (in
.exports
->hasWeakSymbol
|| hasWeakBinding())
147 hdr
->flags
|= MH_BINDS_TO_WEAK
;
149 for (const OutputSegment
*seg
: outputSegments
) {
150 for (const OutputSection
*osec
: seg
->getSections()) {
151 if (isThreadLocalVariables(osec
->flags
)) {
152 hdr
->flags
|= MH_HAS_TLV_DESCRIPTORS
;
158 uint8_t *p
= reinterpret_cast<uint8_t *>(hdr
) + target
->headerSize
;
159 for (const LoadCommand
*lc
: loadCommands
) {
165 PageZeroSection::PageZeroSection()
166 : SyntheticSection(segment_names::pageZero
, section_names::pageZero
) {}
168 RebaseSection::RebaseSection()
169 : LinkEditSection(segment_names::linkEdit
, section_names::rebase
) {}
173 uint64_t sequenceLength
;
178 static void emitIncrement(uint64_t incr
, raw_svector_ostream
&os
) {
181 if ((incr
>> target
->p2WordSize
) <= REBASE_IMMEDIATE_MASK
&&
182 (incr
% target
->wordSize
) == 0) {
183 os
<< static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_IMM_SCALED
|
184 (incr
>> target
->p2WordSize
));
186 os
<< static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_ULEB
);
187 encodeULEB128(incr
, os
);
191 static void flushRebase(const RebaseState
&state
, raw_svector_ostream
&os
) {
192 assert(state
.sequenceLength
> 0);
194 if (state
.skipLength
== target
->wordSize
) {
195 if (state
.sequenceLength
<= REBASE_IMMEDIATE_MASK
) {
196 os
<< static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_IMM_TIMES
|
197 state
.sequenceLength
);
199 os
<< static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_ULEB_TIMES
);
200 encodeULEB128(state
.sequenceLength
, os
);
202 } else if (state
.sequenceLength
== 1) {
203 os
<< static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB
);
204 encodeULEB128(state
.skipLength
- target
->wordSize
, os
);
206 os
<< static_cast<uint8_t>(
207 REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB
);
208 encodeULEB128(state
.sequenceLength
, os
);
209 encodeULEB128(state
.skipLength
- target
->wordSize
, os
);
213 // Rebases are communicated to dyld using a bytecode, whose opcodes cause the
214 // memory location at a specific address to be rebased and/or the address to be
217 // Opcode REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB is the most generic
218 // one, encoding a series of evenly spaced addresses. This algorithm works by
219 // splitting up the sorted list of addresses into such chunks. If the locations
220 // are consecutive or the sequence consists of a single location, flushRebase
221 // will use a smaller, more specialized encoding.
222 static void encodeRebases(const OutputSegment
*seg
,
223 MutableArrayRef
<Location
> locations
,
224 raw_svector_ostream
&os
) {
225 // dyld operates on segments. Translate section offsets into segment offsets.
226 for (Location
&loc
: locations
)
228 loc
.isec
->parent
->getSegmentOffset() + loc
.isec
->getOffset(loc
.offset
);
229 // The algorithm assumes that locations are unique.
231 llvm::unique(locations
, [](const Location
&a
, const Location
&b
) {
232 return a
.offset
== b
.offset
;
234 size_t count
= end
- locations
.begin();
236 os
<< static_cast<uint8_t>(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
|
238 assert(!locations
.empty());
239 uint64_t offset
= locations
[0].offset
;
240 encodeULEB128(offset
, os
);
242 RebaseState state
{1, target
->wordSize
};
244 for (size_t i
= 1; i
< count
; ++i
) {
245 offset
= locations
[i
].offset
;
247 uint64_t skip
= offset
- locations
[i
- 1].offset
;
248 assert(skip
!= 0 && "duplicate locations should have been weeded out");
250 if (skip
== state
.skipLength
) {
251 ++state
.sequenceLength
;
252 } else if (state
.sequenceLength
== 1) {
253 ++state
.sequenceLength
;
254 state
.skipLength
= skip
;
255 } else if (skip
< state
.skipLength
) {
256 // The address is lower than what the rebase pointer would be if the last
257 // location would be part of a sequence. We start a new sequence from the
258 // previous location.
259 --state
.sequenceLength
;
260 flushRebase(state
, os
);
262 state
.sequenceLength
= 2;
263 state
.skipLength
= skip
;
265 // The address is at some positive offset from the rebase pointer. We
266 // start a new sequence which begins with the current location.
267 flushRebase(state
, os
);
268 emitIncrement(skip
- state
.skipLength
, os
);
269 state
.sequenceLength
= 1;
270 state
.skipLength
= target
->wordSize
;
273 flushRebase(state
, os
);
276 void RebaseSection::finalizeContents() {
277 if (locations
.empty())
280 raw_svector_ostream os
{contents
};
281 os
<< static_cast<uint8_t>(REBASE_OPCODE_SET_TYPE_IMM
| REBASE_TYPE_POINTER
);
283 llvm::sort(locations
, [](const Location
&a
, const Location
&b
) {
284 return a
.isec
->getVA(a
.offset
) < b
.isec
->getVA(b
.offset
);
287 for (size_t i
= 0, count
= locations
.size(); i
< count
;) {
288 const OutputSegment
*seg
= locations
[i
].isec
->parent
->parent
;
290 while (j
< count
&& locations
[j
].isec
->parent
->parent
== seg
)
292 encodeRebases(seg
, {locations
.data() + i
, locations
.data() + j
}, os
);
295 os
<< static_cast<uint8_t>(REBASE_OPCODE_DONE
);
298 void RebaseSection::writeTo(uint8_t *buf
) const {
299 memcpy(buf
, contents
.data(), contents
.size());
302 NonLazyPointerSectionBase::NonLazyPointerSectionBase(const char *segname
,
304 : SyntheticSection(segname
, name
) {
305 align
= target
->wordSize
;
308 void macho::addNonLazyBindingEntries(const Symbol
*sym
,
309 const InputSection
*isec
, uint64_t offset
,
311 if (config
->emitChainedFixups
) {
312 if (needsBinding(sym
))
313 in
.chainedFixups
->addBinding(sym
, isec
, offset
, addend
);
314 else if (isa
<Defined
>(sym
))
315 in
.chainedFixups
->addRebase(isec
, offset
);
317 llvm_unreachable("cannot bind to an undefined symbol");
321 if (const auto *dysym
= dyn_cast
<DylibSymbol
>(sym
)) {
322 in
.binding
->addEntry(dysym
, isec
, offset
, addend
);
323 if (dysym
->isWeakDef())
324 in
.weakBinding
->addEntry(sym
, isec
, offset
, addend
);
325 } else if (const auto *defined
= dyn_cast
<Defined
>(sym
)) {
326 in
.rebase
->addEntry(isec
, offset
);
327 if (defined
->isExternalWeakDef())
328 in
.weakBinding
->addEntry(sym
, isec
, offset
, addend
);
329 else if (defined
->interposable
)
330 in
.binding
->addEntry(sym
, isec
, offset
, addend
);
332 // Undefined symbols are filtered out in scanRelocations(); we should never
334 llvm_unreachable("cannot bind to an undefined symbol");
338 void NonLazyPointerSectionBase::addEntry(Symbol
*sym
) {
339 if (entries
.insert(sym
)) {
340 assert(!sym
->isInGot());
341 sym
->gotIndex
= entries
.size() - 1;
343 addNonLazyBindingEntries(sym
, isec
, sym
->gotIndex
* target
->wordSize
);
347 void macho::writeChainedRebase(uint8_t *buf
, uint64_t targetVA
) {
348 assert(config
->emitChainedFixups
);
349 assert(target
->wordSize
== 8 && "Only 64-bit platforms are supported");
350 auto *rebase
= reinterpret_cast<dyld_chained_ptr_64_rebase
*>(buf
);
351 rebase
->target
= targetVA
& 0xf'ffff'ffff;
352 rebase
->high8
= (targetVA
>> 56);
353 rebase
->reserved
= 0;
357 // The fixup format places a 64 GiB limit on the output's size.
358 // Should we handle this gracefully?
359 uint64_t encodedVA
= rebase
->target
| ((uint64_t)rebase
->high8
<< 56);
360 if (encodedVA
!= targetVA
)
361 error("rebase target address 0x" + Twine::utohexstr(targetVA
) +
362 " does not fit into chained fixup. Re-link with -no_fixup_chains");
365 static void writeChainedBind(uint8_t *buf
, const Symbol
*sym
, int64_t addend
) {
366 assert(config
->emitChainedFixups
);
367 assert(target
->wordSize
== 8 && "Only 64-bit platforms are supported");
368 auto *bind
= reinterpret_cast<dyld_chained_ptr_64_bind
*>(buf
);
369 auto [ordinal
, inlineAddend
] = in
.chainedFixups
->getBinding(sym
, addend
);
370 bind
->ordinal
= ordinal
;
371 bind
->addend
= inlineAddend
;
377 void macho::writeChainedFixup(uint8_t *buf
, const Symbol
*sym
, int64_t addend
) {
378 if (needsBinding(sym
))
379 writeChainedBind(buf
, sym
, addend
);
381 writeChainedRebase(buf
, sym
->getVA() + addend
);
384 void NonLazyPointerSectionBase::writeTo(uint8_t *buf
) const {
385 if (config
->emitChainedFixups
) {
386 for (const auto &[i
, entry
] : llvm::enumerate(entries
))
387 writeChainedFixup(&buf
[i
* target
->wordSize
], entry
, 0);
389 for (const auto &[i
, entry
] : llvm::enumerate(entries
))
390 if (auto *defined
= dyn_cast
<Defined
>(entry
))
391 write64le(&buf
[i
* target
->wordSize
], defined
->getVA());
395 GotSection::GotSection()
396 : NonLazyPointerSectionBase(segment_names::data
, section_names::got
) {
397 flags
= S_NON_LAZY_SYMBOL_POINTERS
;
400 TlvPointerSection::TlvPointerSection()
401 : NonLazyPointerSectionBase(segment_names::data
,
402 section_names::threadPtrs
) {
403 flags
= S_THREAD_LOCAL_VARIABLE_POINTERS
;
406 BindingSection::BindingSection()
407 : LinkEditSection(segment_names::linkEdit
, section_names::binding
) {}
411 OutputSegment
*segment
= nullptr;
416 // Default value of 0xF0 is not valid opcode and should make the program
417 // scream instead of accidentally writing "valid" values.
418 uint8_t opcode
= 0xF0;
420 uint64_t consecutiveCount
= 0;
424 // Encode a sequence of opcodes that tell dyld to write the address of symbol +
425 // addend at osec->addr + outSecOff.
427 // The bind opcode "interpreter" remembers the values of each binding field, so
428 // we only need to encode the differences between bindings. Hence the use of
430 static void encodeBinding(const OutputSection
*osec
, uint64_t outSecOff
,
431 int64_t addend
, Binding
&lastBinding
,
432 std::vector
<BindIR
> &opcodes
) {
433 OutputSegment
*seg
= osec
->parent
;
434 uint64_t offset
= osec
->getSegmentOffset() + outSecOff
;
435 if (lastBinding
.segment
!= seg
) {
437 {static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
|
440 lastBinding
.segment
= seg
;
441 lastBinding
.offset
= offset
;
442 } else if (lastBinding
.offset
!= offset
) {
443 opcodes
.push_back({BIND_OPCODE_ADD_ADDR_ULEB
, offset
- lastBinding
.offset
});
444 lastBinding
.offset
= offset
;
447 if (lastBinding
.addend
!= addend
) {
449 {BIND_OPCODE_SET_ADDEND_SLEB
, static_cast<uint64_t>(addend
)});
450 lastBinding
.addend
= addend
;
453 opcodes
.push_back({BIND_OPCODE_DO_BIND
, 0});
454 // DO_BIND causes dyld to both perform the binding and increment the offset
455 lastBinding
.offset
+= target
->wordSize
;
458 static void optimizeOpcodes(std::vector
<BindIR
> &opcodes
) {
459 // Pass 1: Combine bind/add pairs
462 for (i
= 1; i
< opcodes
.size(); ++i
, ++pWrite
) {
463 if ((opcodes
[i
].opcode
== BIND_OPCODE_ADD_ADDR_ULEB
) &&
464 (opcodes
[i
- 1].opcode
== BIND_OPCODE_DO_BIND
)) {
465 opcodes
[pWrite
].opcode
= BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB
;
466 opcodes
[pWrite
].data
= opcodes
[i
].data
;
469 opcodes
[pWrite
] = opcodes
[i
- 1];
472 if (i
== opcodes
.size())
473 opcodes
[pWrite
] = opcodes
[i
- 1];
474 opcodes
.resize(pWrite
+ 1);
476 // Pass 2: Compress two or more bind_add opcodes
478 for (i
= 1; i
< opcodes
.size(); ++i
, ++pWrite
) {
479 if ((opcodes
[i
].opcode
== BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB
) &&
480 (opcodes
[i
- 1].opcode
== BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB
) &&
481 (opcodes
[i
].data
== opcodes
[i
- 1].data
)) {
482 opcodes
[pWrite
].opcode
= BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB
;
483 opcodes
[pWrite
].consecutiveCount
= 2;
484 opcodes
[pWrite
].data
= opcodes
[i
].data
;
486 while (i
< opcodes
.size() &&
487 (opcodes
[i
].opcode
== BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB
) &&
488 (opcodes
[i
].data
== opcodes
[i
- 1].data
)) {
489 opcodes
[pWrite
].consecutiveCount
++;
493 opcodes
[pWrite
] = opcodes
[i
- 1];
496 if (i
== opcodes
.size())
497 opcodes
[pWrite
] = opcodes
[i
- 1];
498 opcodes
.resize(pWrite
+ 1);
500 // Pass 3: Use immediate encodings
501 // Every binding is the size of one pointer. If the next binding is a
502 // multiple of wordSize away that is within BIND_IMMEDIATE_MASK, the
503 // opcode can be scaled by wordSize into a single byte and dyld will
504 // expand it to the correct address.
505 for (auto &p
: opcodes
) {
506 // It's unclear why the check needs to be less than BIND_IMMEDIATE_MASK,
507 // but ld64 currently does this. This could be a potential bug, but
508 // for now, perform the same behavior to prevent mysterious bugs.
509 if ((p
.opcode
== BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB
) &&
510 ((p
.data
/ target
->wordSize
) < BIND_IMMEDIATE_MASK
) &&
511 ((p
.data
% target
->wordSize
) == 0)) {
512 p
.opcode
= BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED
;
513 p
.data
/= target
->wordSize
;
518 static void flushOpcodes(const BindIR
&op
, raw_svector_ostream
&os
) {
519 uint8_t opcode
= op
.opcode
& BIND_OPCODE_MASK
;
521 case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
:
522 case BIND_OPCODE_ADD_ADDR_ULEB
:
523 case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB
:
525 encodeULEB128(op
.data
, os
);
527 case BIND_OPCODE_SET_ADDEND_SLEB
:
529 encodeSLEB128(static_cast<int64_t>(op
.data
), os
);
531 case BIND_OPCODE_DO_BIND
:
534 case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB
:
536 encodeULEB128(op
.consecutiveCount
, os
);
537 encodeULEB128(op
.data
, os
);
539 case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED
:
540 os
<< static_cast<uint8_t>(op
.opcode
| op
.data
);
543 llvm_unreachable("cannot bind to an unrecognized symbol");
547 static bool needsWeakBind(const Symbol
&sym
) {
548 if (auto *dysym
= dyn_cast
<DylibSymbol
>(&sym
))
549 return dysym
->isWeakDef();
550 if (auto *defined
= dyn_cast
<Defined
>(&sym
))
551 return defined
->isExternalWeakDef();
555 // Non-weak bindings need to have their dylib ordinal encoded as well.
556 static int16_t ordinalForDylibSymbol(const DylibSymbol
&dysym
) {
557 if (config
->namespaceKind
== NamespaceKind::flat
|| dysym
.isDynamicLookup())
558 return static_cast<int16_t>(BIND_SPECIAL_DYLIB_FLAT_LOOKUP
);
559 assert(dysym
.getFile()->isReferenced());
560 return dysym
.getFile()->ordinal
;
563 static int16_t ordinalForSymbol(const Symbol
&sym
) {
564 if (config
->emitChainedFixups
&& needsWeakBind(sym
))
565 return BIND_SPECIAL_DYLIB_WEAK_LOOKUP
;
566 if (const auto *dysym
= dyn_cast
<DylibSymbol
>(&sym
))
567 return ordinalForDylibSymbol(*dysym
);
568 assert(cast
<Defined
>(&sym
)->interposable
);
569 return BIND_SPECIAL_DYLIB_FLAT_LOOKUP
;
572 static void encodeDylibOrdinal(int16_t ordinal
, raw_svector_ostream
&os
) {
574 os
<< static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM
|
575 (ordinal
& BIND_IMMEDIATE_MASK
));
576 } else if (ordinal
<= BIND_IMMEDIATE_MASK
) {
577 os
<< static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM
| ordinal
);
579 os
<< static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB
);
580 encodeULEB128(ordinal
, os
);
584 static void encodeWeakOverride(const Defined
*defined
,
585 raw_svector_ostream
&os
) {
586 os
<< static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
|
587 BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION
)
588 << defined
->getName() << '\0';
591 // Organize the bindings so we can encoded them with fewer opcodes.
593 // First, all bindings for a given symbol should be grouped together.
594 // BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM is the largest opcode (since it
595 // has an associated symbol string), so we only want to emit it once per symbol.
597 // Within each group, we sort the bindings by address. Since bindings are
598 // delta-encoded, sorting them allows for a more compact result. Note that
599 // sorting by address alone ensures that bindings for the same segment / section
600 // are located together, minimizing the number of times we have to emit
601 // BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB.
603 // Finally, we sort the symbols by the address of their first binding, again
604 // to facilitate the delta-encoding process.
606 std::vector
<std::pair
<const Sym
*, std::vector
<BindingEntry
>>>
607 sortBindings(const BindingsMap
<const Sym
*> &bindingsMap
) {
608 std::vector
<std::pair
<const Sym
*, std::vector
<BindingEntry
>>> bindingsVec(
609 bindingsMap
.begin(), bindingsMap
.end());
610 for (auto &p
: bindingsVec
) {
611 std::vector
<BindingEntry
> &bindings
= p
.second
;
612 llvm::sort(bindings
, [](const BindingEntry
&a
, const BindingEntry
&b
) {
613 return a
.target
.getVA() < b
.target
.getVA();
616 llvm::sort(bindingsVec
, [](const auto &a
, const auto &b
) {
617 return a
.second
[0].target
.getVA() < b
.second
[0].target
.getVA();
622 // Emit bind opcodes, which are a stream of byte-sized opcodes that dyld
623 // interprets to update a record with the following fields:
624 // * segment index (of the segment to write the symbol addresses to, typically
625 // the __DATA_CONST segment which contains the GOT)
626 // * offset within the segment, indicating the next location to write a binding
628 // * symbol library ordinal (the index of its library's LC_LOAD_DYLIB command)
631 // When dyld sees BIND_OPCODE_DO_BIND, it uses the current record state to bind
632 // a symbol in the GOT, and increments the segment offset to point to the next
633 // entry. It does *not* clear the record state after doing the bind, so
634 // subsequent opcodes only need to encode the differences between bindings.
635 void BindingSection::finalizeContents() {
636 raw_svector_ostream os
{contents
};
638 int16_t lastOrdinal
= 0;
640 for (auto &p
: sortBindings(bindingsMap
)) {
641 const Symbol
*sym
= p
.first
;
642 std::vector
<BindingEntry
> &bindings
= p
.second
;
643 uint8_t flags
= BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
;
644 if (sym
->isWeakRef())
645 flags
|= BIND_SYMBOL_FLAGS_WEAK_IMPORT
;
646 os
<< flags
<< sym
->getName() << '\0'
647 << static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM
| BIND_TYPE_POINTER
);
648 int16_t ordinal
= ordinalForSymbol(*sym
);
649 if (ordinal
!= lastOrdinal
) {
650 encodeDylibOrdinal(ordinal
, os
);
651 lastOrdinal
= ordinal
;
653 std::vector
<BindIR
> opcodes
;
654 for (const BindingEntry
&b
: bindings
)
655 encodeBinding(b
.target
.isec
->parent
,
656 b
.target
.isec
->getOffset(b
.target
.offset
), b
.addend
,
657 lastBinding
, opcodes
);
658 if (config
->optimize
> 1)
659 optimizeOpcodes(opcodes
);
660 for (const auto &op
: opcodes
)
661 flushOpcodes(op
, os
);
663 if (!bindingsMap
.empty())
664 os
<< static_cast<uint8_t>(BIND_OPCODE_DONE
);
667 void BindingSection::writeTo(uint8_t *buf
) const {
668 memcpy(buf
, contents
.data(), contents
.size());
671 WeakBindingSection::WeakBindingSection()
672 : LinkEditSection(segment_names::linkEdit
, section_names::weakBinding
) {}
674 void WeakBindingSection::finalizeContents() {
675 raw_svector_ostream os
{contents
};
678 for (const Defined
*defined
: definitions
)
679 encodeWeakOverride(defined
, os
);
681 for (auto &p
: sortBindings(bindingsMap
)) {
682 const Symbol
*sym
= p
.first
;
683 std::vector
<BindingEntry
> &bindings
= p
.second
;
684 os
<< static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
)
685 << sym
->getName() << '\0'
686 << static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM
| BIND_TYPE_POINTER
);
687 std::vector
<BindIR
> opcodes
;
688 for (const BindingEntry
&b
: bindings
)
689 encodeBinding(b
.target
.isec
->parent
,
690 b
.target
.isec
->getOffset(b
.target
.offset
), b
.addend
,
691 lastBinding
, opcodes
);
692 if (config
->optimize
> 1)
693 optimizeOpcodes(opcodes
);
694 for (const auto &op
: opcodes
)
695 flushOpcodes(op
, os
);
697 if (!bindingsMap
.empty() || !definitions
.empty())
698 os
<< static_cast<uint8_t>(BIND_OPCODE_DONE
);
701 void WeakBindingSection::writeTo(uint8_t *buf
) const {
702 memcpy(buf
, contents
.data(), contents
.size());
705 StubsSection::StubsSection()
706 : SyntheticSection(segment_names::text
, section_names::stubs
) {
707 flags
= S_SYMBOL_STUBS
| S_ATTR_SOME_INSTRUCTIONS
| S_ATTR_PURE_INSTRUCTIONS
;
708 // The stubs section comprises machine instructions, which are aligned to
709 // 4 bytes on the archs we care about.
711 reserved2
= target
->stubSize
;
714 uint64_t StubsSection::getSize() const {
715 return entries
.size() * target
->stubSize
;
718 void StubsSection::writeTo(uint8_t *buf
) const {
720 for (const Symbol
*sym
: entries
) {
722 config
->emitChainedFixups
? sym
->getGotVA() : sym
->getLazyPtrVA();
723 target
->writeStub(buf
+ off
, *sym
, pointerVA
);
724 off
+= target
->stubSize
;
728 void StubsSection::finalize() { isFinal
= true; }
730 static void addBindingsForStub(Symbol
*sym
) {
731 assert(!config
->emitChainedFixups
);
732 if (auto *dysym
= dyn_cast
<DylibSymbol
>(sym
)) {
733 if (sym
->isWeakDef()) {
734 in
.binding
->addEntry(dysym
, in
.lazyPointers
->isec
,
735 sym
->stubsIndex
* target
->wordSize
);
736 in
.weakBinding
->addEntry(sym
, in
.lazyPointers
->isec
,
737 sym
->stubsIndex
* target
->wordSize
);
739 in
.lazyBinding
->addEntry(dysym
);
741 } else if (auto *defined
= dyn_cast
<Defined
>(sym
)) {
742 if (defined
->isExternalWeakDef()) {
743 in
.rebase
->addEntry(in
.lazyPointers
->isec
,
744 sym
->stubsIndex
* target
->wordSize
);
745 in
.weakBinding
->addEntry(sym
, in
.lazyPointers
->isec
,
746 sym
->stubsIndex
* target
->wordSize
);
747 } else if (defined
->interposable
) {
748 in
.lazyBinding
->addEntry(sym
);
750 llvm_unreachable("invalid stub target");
753 llvm_unreachable("invalid stub target symbol type");
757 void StubsSection::addEntry(Symbol
*sym
) {
758 bool inserted
= entries
.insert(sym
);
760 sym
->stubsIndex
= entries
.size() - 1;
762 if (config
->emitChainedFixups
)
763 in
.got
->addEntry(sym
);
765 addBindingsForStub(sym
);
769 StubHelperSection::StubHelperSection()
770 : SyntheticSection(segment_names::text
, section_names::stubHelper
) {
771 flags
= S_ATTR_SOME_INSTRUCTIONS
| S_ATTR_PURE_INSTRUCTIONS
;
772 align
= 4; // This section comprises machine instructions
775 uint64_t StubHelperSection::getSize() const {
776 return target
->stubHelperHeaderSize
+
777 in
.lazyBinding
->getEntries().size() * target
->stubHelperEntrySize
;
780 bool StubHelperSection::isNeeded() const { return in
.lazyBinding
->isNeeded(); }
782 void StubHelperSection::writeTo(uint8_t *buf
) const {
783 target
->writeStubHelperHeader(buf
);
784 size_t off
= target
->stubHelperHeaderSize
;
785 for (const Symbol
*sym
: in
.lazyBinding
->getEntries()) {
786 target
->writeStubHelperEntry(buf
+ off
, *sym
, addr
+ off
);
787 off
+= target
->stubHelperEntrySize
;
791 void StubHelperSection::setUp() {
792 Symbol
*binder
= symtab
->addUndefined("dyld_stub_binder", /*file=*/nullptr,
793 /*isWeakRef=*/false);
794 if (auto *undefined
= dyn_cast
<Undefined
>(binder
))
795 treatUndefinedSymbol(*undefined
,
796 "lazy binding (normally in libSystem.dylib)");
798 // treatUndefinedSymbol() can replace binder with a DylibSymbol; re-check.
799 stubBinder
= dyn_cast_or_null
<DylibSymbol
>(binder
);
800 if (stubBinder
== nullptr)
803 in
.got
->addEntry(stubBinder
);
805 in
.imageLoaderCache
->parent
=
806 ConcatOutputSection::getOrCreateForInput(in
.imageLoaderCache
);
807 addInputSection(in
.imageLoaderCache
);
808 // Since this isn't in the symbol table or in any input file, the noDeadStrip
809 // argument doesn't matter.
811 make
<Defined
>("__dyld_private", nullptr, in
.imageLoaderCache
, 0, 0,
813 /*isExternal=*/false, /*isPrivateExtern=*/false,
814 /*includeInSymtab=*/true,
815 /*isReferencedDynamically=*/false,
816 /*noDeadStrip=*/false);
817 dyldPrivate
->used
= true;
820 llvm::DenseMap
<llvm::CachedHashStringRef
, ConcatInputSection
*>
821 ObjCSelRefsHelper::methnameToSelref
;
822 void ObjCSelRefsHelper::initialize() {
823 // Do not fold selrefs without ICF.
824 if (config
->icfLevel
== ICFLevel::none
)
827 // Search methnames already referenced in __objc_selrefs
828 // Map the name to the corresponding selref entry
829 // which we will reuse when creating objc stubs.
830 for (ConcatInputSection
*isec
: inputSections
) {
831 if (isec
->shouldOmitFromOutput())
833 if (isec
->getName() != section_names::objcSelrefs
)
835 // We expect a single relocation per selref entry to __objc_methname that
836 // might be aggregated.
837 assert(isec
->relocs
.size() == 1);
838 auto Reloc
= isec
->relocs
[0];
839 if (const auto *sym
= Reloc
.referent
.dyn_cast
<Symbol
*>()) {
840 if (const auto *d
= dyn_cast
<Defined
>(sym
)) {
841 auto *cisec
= cast
<CStringInputSection
>(d
->isec());
842 auto methname
= cisec
->getStringRefAtOffset(d
->value
);
843 methnameToSelref
[CachedHashStringRef(methname
)] = isec
;
849 void ObjCSelRefsHelper::cleanup() { methnameToSelref
.clear(); }
851 ConcatInputSection
*ObjCSelRefsHelper::makeSelRef(StringRef methname
) {
852 auto methnameOffset
=
853 in
.objcMethnameSection
->getStringOffset(methname
).outSecOff
;
855 size_t wordSize
= target
->wordSize
;
856 uint8_t *selrefData
= bAlloc().Allocate
<uint8_t>(wordSize
);
857 write64le(selrefData
, methnameOffset
);
858 ConcatInputSection
*objcSelref
=
859 makeSyntheticInputSection(segment_names::data
, section_names::objcSelrefs
,
860 S_LITERAL_POINTERS
| S_ATTR_NO_DEAD_STRIP
,
861 ArrayRef
<uint8_t>{selrefData
, wordSize
},
863 assert(objcSelref
->live
);
864 objcSelref
->relocs
.push_back({/*type=*/target
->unsignedRelocType
,
865 /*pcrel=*/false, /*length=*/3,
867 /*addend=*/static_cast<int64_t>(methnameOffset
),
868 /*referent=*/in
.objcMethnameSection
->isec
});
869 objcSelref
->parent
= ConcatOutputSection::getOrCreateForInput(objcSelref
);
870 addInputSection(objcSelref
);
871 objcSelref
->isFinal
= true;
872 methnameToSelref
[CachedHashStringRef(methname
)] = objcSelref
;
876 ConcatInputSection
*ObjCSelRefsHelper::getSelRef(StringRef methname
) {
877 auto it
= methnameToSelref
.find(CachedHashStringRef(methname
));
878 if (it
== methnameToSelref
.end())
883 ObjCStubsSection::ObjCStubsSection()
884 : SyntheticSection(segment_names::text
, section_names::objcStubs
) {
885 flags
= S_ATTR_SOME_INSTRUCTIONS
| S_ATTR_PURE_INSTRUCTIONS
;
886 align
= config
->objcStubsMode
== ObjCStubsMode::fast
887 ? target
->objcStubsFastAlignment
888 : target
->objcStubsSmallAlignment
;
891 bool ObjCStubsSection::isObjCStubSymbol(Symbol
*sym
) {
892 return sym
->getName().starts_with(symbolPrefix
);
895 StringRef
ObjCStubsSection::getMethname(Symbol
*sym
) {
896 assert(isObjCStubSymbol(sym
) && "not an objc stub");
897 auto name
= sym
->getName();
898 StringRef methname
= name
.drop_front(symbolPrefix
.size());
902 void ObjCStubsSection::addEntry(Symbol
*sym
) {
903 StringRef methname
= getMethname(sym
);
904 // We create a selref entry for each unique methname.
905 if (!ObjCSelRefsHelper::getSelRef(methname
))
906 ObjCSelRefsHelper::makeSelRef(methname
);
908 auto stubSize
= config
->objcStubsMode
== ObjCStubsMode::fast
909 ? target
->objcStubsFastSize
910 : target
->objcStubsSmallSize
;
911 Defined
*newSym
= replaceSymbol
<Defined
>(
912 sym
, sym
->getName(), nullptr, isec
,
913 /*value=*/symbols
.size() * stubSize
,
915 /*isWeakDef=*/false, /*isExternal=*/true, /*isPrivateExtern=*/true,
916 /*includeInSymtab=*/true, /*isReferencedDynamically=*/false,
917 /*noDeadStrip=*/false);
918 symbols
.push_back(newSym
);
921 void ObjCStubsSection::setUp() {
922 objcMsgSend
= symtab
->addUndefined("_objc_msgSend", /*file=*/nullptr,
923 /*isWeakRef=*/false);
924 if (auto *undefined
= dyn_cast
<Undefined
>(objcMsgSend
))
925 treatUndefinedSymbol(*undefined
,
926 "lazy binding (normally in libobjc.dylib)");
927 objcMsgSend
->used
= true;
928 if (config
->objcStubsMode
== ObjCStubsMode::fast
) {
929 in
.got
->addEntry(objcMsgSend
);
930 assert(objcMsgSend
->isInGot());
932 assert(config
->objcStubsMode
== ObjCStubsMode::small
);
933 // In line with ld64's behavior, when objc_msgSend is a direct symbol,
934 // we directly reference it.
935 // In other cases, typically when binding in libobjc.dylib,
936 // we generate a stub to invoke objc_msgSend.
937 if (!isa
<Defined
>(objcMsgSend
))
938 in
.stubs
->addEntry(objcMsgSend
);
942 uint64_t ObjCStubsSection::getSize() const {
943 auto stubSize
= config
->objcStubsMode
== ObjCStubsMode::fast
944 ? target
->objcStubsFastSize
945 : target
->objcStubsSmallSize
;
946 return stubSize
* symbols
.size();
949 void ObjCStubsSection::writeTo(uint8_t *buf
) const {
950 uint64_t stubOffset
= 0;
951 for (size_t i
= 0, n
= symbols
.size(); i
< n
; ++i
) {
952 Defined
*sym
= symbols
[i
];
954 auto methname
= getMethname(sym
);
955 InputSection
*selRef
= ObjCSelRefsHelper::getSelRef(methname
);
956 assert(selRef
!= nullptr && "no selref for methname");
957 auto selrefAddr
= selRef
->getVA(0);
958 target
->writeObjCMsgSendStub(buf
+ stubOffset
, sym
, in
.objcStubs
->addr
,
959 stubOffset
, selrefAddr
, objcMsgSend
);
963 LazyPointerSection::LazyPointerSection()
964 : SyntheticSection(segment_names::data
, section_names::lazySymbolPtr
) {
965 align
= target
->wordSize
;
966 flags
= S_LAZY_SYMBOL_POINTERS
;
969 uint64_t LazyPointerSection::getSize() const {
970 return in
.stubs
->getEntries().size() * target
->wordSize
;
973 bool LazyPointerSection::isNeeded() const {
974 return !in
.stubs
->getEntries().empty();
977 void LazyPointerSection::writeTo(uint8_t *buf
) const {
979 for (const Symbol
*sym
: in
.stubs
->getEntries()) {
980 if (const auto *dysym
= dyn_cast
<DylibSymbol
>(sym
)) {
981 if (dysym
->hasStubsHelper()) {
982 uint64_t stubHelperOffset
=
983 target
->stubHelperHeaderSize
+
984 dysym
->stubsHelperIndex
* target
->stubHelperEntrySize
;
985 write64le(buf
+ off
, in
.stubHelper
->addr
+ stubHelperOffset
);
988 write64le(buf
+ off
, sym
->getVA());
990 off
+= target
->wordSize
;
994 LazyBindingSection::LazyBindingSection()
995 : LinkEditSection(segment_names::linkEdit
, section_names::lazyBinding
) {}
997 void LazyBindingSection::finalizeContents() {
998 // TODO: Just precompute output size here instead of writing to a temporary
1000 for (Symbol
*sym
: entries
)
1001 sym
->lazyBindOffset
= encode(*sym
);
1004 void LazyBindingSection::writeTo(uint8_t *buf
) const {
1005 memcpy(buf
, contents
.data(), contents
.size());
1008 void LazyBindingSection::addEntry(Symbol
*sym
) {
1009 assert(!config
->emitChainedFixups
&& "Chained fixups always bind eagerly");
1010 if (entries
.insert(sym
)) {
1011 sym
->stubsHelperIndex
= entries
.size() - 1;
1012 in
.rebase
->addEntry(in
.lazyPointers
->isec
,
1013 sym
->stubsIndex
* target
->wordSize
);
1017 // Unlike the non-lazy binding section, the bind opcodes in this section aren't
1018 // interpreted all at once. Rather, dyld will start interpreting opcodes at a
1019 // given offset, typically only binding a single symbol before it finds a
1020 // BIND_OPCODE_DONE terminator. As such, unlike in the non-lazy-binding case,
1021 // we cannot encode just the differences between symbols; we have to emit the
1022 // complete bind information for each symbol.
1023 uint32_t LazyBindingSection::encode(const Symbol
&sym
) {
1024 uint32_t opstreamOffset
= contents
.size();
1025 OutputSegment
*dataSeg
= in
.lazyPointers
->parent
;
1026 os
<< static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
|
1029 in
.lazyPointers
->addr
- dataSeg
->addr
+ sym
.stubsIndex
* target
->wordSize
;
1030 encodeULEB128(offset
, os
);
1031 encodeDylibOrdinal(ordinalForSymbol(sym
), os
);
1033 uint8_t flags
= BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
;
1034 if (sym
.isWeakRef())
1035 flags
|= BIND_SYMBOL_FLAGS_WEAK_IMPORT
;
1037 os
<< flags
<< sym
.getName() << '\0'
1038 << static_cast<uint8_t>(BIND_OPCODE_DO_BIND
)
1039 << static_cast<uint8_t>(BIND_OPCODE_DONE
);
1040 return opstreamOffset
;
1043 ExportSection::ExportSection()
1044 : LinkEditSection(segment_names::linkEdit
, section_names::export_
) {}
1046 void ExportSection::finalizeContents() {
1047 trieBuilder
.setImageBase(in
.header
->addr
);
1048 for (const Symbol
*sym
: symtab
->getSymbols()) {
1049 if (const auto *defined
= dyn_cast
<Defined
>(sym
)) {
1050 if (defined
->privateExtern
|| !defined
->isLive())
1052 trieBuilder
.addSymbol(*defined
);
1053 hasWeakSymbol
= hasWeakSymbol
|| sym
->isWeakDef();
1054 } else if (auto *dysym
= dyn_cast
<DylibSymbol
>(sym
)) {
1055 if (dysym
->shouldReexport
)
1056 trieBuilder
.addSymbol(*dysym
);
1059 size
= trieBuilder
.build();
1062 void ExportSection::writeTo(uint8_t *buf
) const { trieBuilder
.writeTo(buf
); }
1064 DataInCodeSection::DataInCodeSection()
1065 : LinkEditSection(segment_names::linkEdit
, section_names::dataInCode
) {}
1068 static std::vector
<MachO::data_in_code_entry
> collectDataInCodeEntries() {
1069 std::vector
<MachO::data_in_code_entry
> dataInCodeEntries
;
1070 for (const InputFile
*inputFile
: inputFiles
) {
1071 if (!isa
<ObjFile
>(inputFile
))
1073 const ObjFile
*objFile
= cast
<ObjFile
>(inputFile
);
1074 ArrayRef
<MachO::data_in_code_entry
> entries
= objFile
->getDataInCode();
1075 if (entries
.empty())
1078 std::vector
<MachO::data_in_code_entry
> sortedEntries
;
1079 sortedEntries
.assign(entries
.begin(), entries
.end());
1080 llvm::sort(sortedEntries
, [](const data_in_code_entry
&lhs
,
1081 const data_in_code_entry
&rhs
) {
1082 return lhs
.offset
< rhs
.offset
;
1085 // For each code subsection find 'data in code' entries residing in it.
1086 // Compute the new offset values as
1087 // <offset within subsection> + <subsection address> - <__TEXT address>.
1088 for (const Section
*section
: objFile
->sections
) {
1089 for (const Subsection
&subsec
: section
->subsections
) {
1090 const InputSection
*isec
= subsec
.isec
;
1091 if (!isCodeSection(isec
))
1093 if (cast
<ConcatInputSection
>(isec
)->shouldOmitFromOutput())
1095 const uint64_t beginAddr
= section
->addr
+ subsec
.offset
;
1096 auto it
= llvm::lower_bound(
1097 sortedEntries
, beginAddr
,
1098 [](const MachO::data_in_code_entry
&entry
, uint64_t addr
) {
1099 return entry
.offset
< addr
;
1101 const uint64_t endAddr
= beginAddr
+ isec
->getSize();
1102 for (const auto end
= sortedEntries
.end();
1103 it
!= end
&& it
->offset
+ it
->length
<= endAddr
; ++it
)
1104 dataInCodeEntries
.push_back(
1105 {static_cast<uint32_t>(isec
->getVA(it
->offset
- beginAddr
) -
1107 it
->length
, it
->kind
});
1112 // ld64 emits the table in sorted order too.
1113 llvm::sort(dataInCodeEntries
,
1114 [](const data_in_code_entry
&lhs
, const data_in_code_entry
&rhs
) {
1115 return lhs
.offset
< rhs
.offset
;
1117 return dataInCodeEntries
;
1120 void DataInCodeSection::finalizeContents() {
1121 entries
= target
->wordSize
== 8 ? collectDataInCodeEntries
<LP64
>()
1122 : collectDataInCodeEntries
<ILP32
>();
1125 void DataInCodeSection::writeTo(uint8_t *buf
) const {
1126 if (!entries
.empty())
1127 memcpy(buf
, entries
.data(), getRawSize());
1130 FunctionStartsSection::FunctionStartsSection()
1131 : LinkEditSection(segment_names::linkEdit
, section_names::functionStarts
) {}
1133 void FunctionStartsSection::finalizeContents() {
1134 raw_svector_ostream os
{contents
};
1135 std::vector
<uint64_t> addrs
;
1136 for (const InputFile
*file
: inputFiles
) {
1137 if (auto *objFile
= dyn_cast
<ObjFile
>(file
)) {
1138 for (const Symbol
*sym
: objFile
->symbols
) {
1139 if (const auto *defined
= dyn_cast_or_null
<Defined
>(sym
)) {
1140 if (!defined
->isec() || !isCodeSection(defined
->isec()) ||
1143 addrs
.push_back(defined
->getVA());
1149 uint64_t addr
= in
.header
->addr
;
1150 for (uint64_t nextAddr
: addrs
) {
1151 uint64_t delta
= nextAddr
- addr
;
1154 encodeULEB128(delta
, os
);
1160 void FunctionStartsSection::writeTo(uint8_t *buf
) const {
1161 memcpy(buf
, contents
.data(), contents
.size());
1164 SymtabSection::SymtabSection(StringTableSection
&stringTableSection
)
1165 : LinkEditSection(segment_names::linkEdit
, section_names::symbolTable
),
1166 stringTableSection(stringTableSection
) {}
1168 void SymtabSection::emitBeginSourceStab(StringRef sourceFile
) {
1169 StabsEntry
stab(N_SO
);
1170 stab
.strx
= stringTableSection
.addString(saver().save(sourceFile
));
1171 stabs
.emplace_back(std::move(stab
));
1174 void SymtabSection::emitEndSourceStab() {
1175 StabsEntry
stab(N_SO
);
1177 stabs
.emplace_back(std::move(stab
));
1180 void SymtabSection::emitObjectFileStab(ObjFile
*file
) {
1181 StabsEntry
stab(N_OSO
);
1182 stab
.sect
= target
->cpuSubtype
;
1183 SmallString
<261> path(!file
->archiveName
.empty() ? file
->archiveName
1185 std::error_code ec
= sys::fs::make_absolute(path
);
1187 fatal("failed to get absolute path for " + path
);
1189 if (!file
->archiveName
.empty())
1190 path
.append({"(", file
->getName(), ")"});
1192 StringRef adjustedPath
= saver().save(path
.str());
1193 adjustedPath
.consume_front(config
->osoPrefix
);
1195 stab
.strx
= stringTableSection
.addString(adjustedPath
);
1197 stab
.value
= file
->modTime
;
1198 stabs
.emplace_back(std::move(stab
));
1201 void SymtabSection::emitEndFunStab(Defined
*defined
) {
1202 StabsEntry
stab(N_FUN
);
1203 stab
.value
= defined
->size
;
1204 stabs
.emplace_back(std::move(stab
));
1207 void SymtabSection::emitStabs() {
1208 if (config
->omitDebugInfo
)
1211 for (const std::string
&s
: config
->astPaths
) {
1212 StabsEntry
astStab(N_AST
);
1213 astStab
.strx
= stringTableSection
.addString(s
);
1214 stabs
.emplace_back(std::move(astStab
));
1217 // Cache the file ID for each symbol in an std::pair for faster sorting.
1218 using SortingPair
= std::pair
<Defined
*, int>;
1219 std::vector
<SortingPair
> symbolsNeedingStabs
;
1220 for (const SymtabEntry
&entry
:
1221 concat
<SymtabEntry
>(localSymbols
, externalSymbols
)) {
1222 Symbol
*sym
= entry
.sym
;
1223 assert(sym
->isLive() &&
1224 "dead symbols should not be in localSymbols, externalSymbols");
1225 if (auto *defined
= dyn_cast
<Defined
>(sym
)) {
1226 // Excluded symbols should have been filtered out in finalizeContents().
1227 assert(defined
->includeInSymtab
);
1229 if (defined
->isAbsolute())
1232 // Constant-folded symbols go in the executable's symbol table, but don't
1233 // get a stabs entry unless --keep-icf-stabs flag is specified
1234 if (!config
->keepICFStabs
&& defined
->wasIdenticalCodeFolded
)
1237 ObjFile
*file
= defined
->getObjectFile();
1238 if (!file
|| !file
->compileUnit
)
1241 // We use 'originalIsec' to get the file id of the symbol since 'isec()'
1242 // might point to the merged ICF symbol's file
1243 symbolsNeedingStabs
.emplace_back(defined
,
1244 defined
->originalIsec
->getFile()->id
);
1248 llvm::stable_sort(symbolsNeedingStabs
,
1249 [&](const SortingPair
&a
, const SortingPair
&b
) {
1250 return a
.second
< b
.second
;
1253 // Emit STABS symbols so that dsymutil and/or the debugger can map address
1254 // regions in the final binary to the source and object files from which they
1256 InputFile
*lastFile
= nullptr;
1257 for (SortingPair
&pair
: symbolsNeedingStabs
) {
1258 Defined
*defined
= pair
.first
;
1259 // We use 'originalIsec' of the symbol since we care about the actual origin
1260 // of the symbol, not the canonical location returned by `isec()`.
1261 InputSection
*isec
= defined
->originalIsec
;
1262 ObjFile
*file
= cast
<ObjFile
>(isec
->getFile());
1264 if (lastFile
== nullptr || lastFile
!= file
) {
1265 if (lastFile
!= nullptr)
1266 emitEndSourceStab();
1269 emitBeginSourceStab(file
->sourceFile());
1270 emitObjectFileStab(file
);
1274 symStab
.sect
= isec
->parent
->index
;
1275 symStab
.strx
= stringTableSection
.addString(defined
->getName());
1276 symStab
.value
= defined
->getVA();
1278 if (isCodeSection(isec
)) {
1279 symStab
.type
= N_FUN
;
1280 stabs
.emplace_back(std::move(symStab
));
1281 emitEndFunStab(defined
);
1283 symStab
.type
= defined
->isExternal() ? N_GSYM
: N_STSYM
;
1284 stabs
.emplace_back(std::move(symStab
));
1289 emitEndSourceStab();
1292 void SymtabSection::finalizeContents() {
1293 auto addSymbol
= [&](std::vector
<SymtabEntry
> &symbols
, Symbol
*sym
) {
1294 uint32_t strx
= stringTableSection
.addString(sym
->getName());
1295 symbols
.push_back({sym
, strx
});
1298 std::function
<void(Symbol
*)> localSymbolsHandler
;
1299 switch (config
->localSymbolsPresence
) {
1300 case SymtabPresence::All
:
1301 localSymbolsHandler
= [&](Symbol
*sym
) { addSymbol(localSymbols
, sym
); };
1303 case SymtabPresence::None
:
1304 localSymbolsHandler
= [&](Symbol
*) { /* Do nothing*/ };
1306 case SymtabPresence::SelectivelyIncluded
:
1307 localSymbolsHandler
= [&](Symbol
*sym
) {
1308 if (config
->localSymbolPatterns
.match(sym
->getName()))
1309 addSymbol(localSymbols
, sym
);
1312 case SymtabPresence::SelectivelyExcluded
:
1313 localSymbolsHandler
= [&](Symbol
*sym
) {
1314 if (!config
->localSymbolPatterns
.match(sym
->getName()))
1315 addSymbol(localSymbols
, sym
);
1320 // Local symbols aren't in the SymbolTable, so we walk the list of object
1321 // files to gather them.
1322 // But if `-x` is set, then we don't need to. localSymbolsHandler() will do
1323 // the right thing regardless, but this check is a perf optimization because
1324 // iterating through all the input files and their symbols is expensive.
1325 if (config
->localSymbolsPresence
!= SymtabPresence::None
) {
1326 for (const InputFile
*file
: inputFiles
) {
1327 if (auto *objFile
= dyn_cast
<ObjFile
>(file
)) {
1328 for (Symbol
*sym
: objFile
->symbols
) {
1329 if (auto *defined
= dyn_cast_or_null
<Defined
>(sym
)) {
1330 if (defined
->isExternal() || !defined
->isLive() ||
1331 !defined
->includeInSymtab
)
1333 localSymbolsHandler(sym
);
1340 // __dyld_private is a local symbol too. It's linker-created and doesn't
1341 // exist in any object file.
1342 if (in
.stubHelper
&& in
.stubHelper
->dyldPrivate
)
1343 localSymbolsHandler(in
.stubHelper
->dyldPrivate
);
1345 for (Symbol
*sym
: symtab
->getSymbols()) {
1348 if (auto *defined
= dyn_cast
<Defined
>(sym
)) {
1349 if (!defined
->includeInSymtab
)
1351 assert(defined
->isExternal());
1352 if (defined
->privateExtern
)
1353 localSymbolsHandler(defined
);
1355 addSymbol(externalSymbols
, defined
);
1356 } else if (auto *dysym
= dyn_cast
<DylibSymbol
>(sym
)) {
1357 if (dysym
->isReferenced())
1358 addSymbol(undefinedSymbols
, sym
);
1363 uint32_t symtabIndex
= stabs
.size();
1364 for (const SymtabEntry
&entry
:
1365 concat
<SymtabEntry
>(localSymbols
, externalSymbols
, undefinedSymbols
)) {
1366 entry
.sym
->symtabIndex
= symtabIndex
++;
1370 uint32_t SymtabSection::getNumSymbols() const {
1371 return stabs
.size() + localSymbols
.size() + externalSymbols
.size() +
1372 undefinedSymbols
.size();
1375 // This serves to hide (type-erase) the template parameter from SymtabSection.
1376 template <class LP
> class SymtabSectionImpl final
: public SymtabSection
{
1378 SymtabSectionImpl(StringTableSection
&stringTableSection
)
1379 : SymtabSection(stringTableSection
) {}
1380 uint64_t getRawSize() const override
;
1381 void writeTo(uint8_t *buf
) const override
;
1384 template <class LP
> uint64_t SymtabSectionImpl
<LP
>::getRawSize() const {
1385 return getNumSymbols() * sizeof(typename
LP::nlist
);
1388 template <class LP
> void SymtabSectionImpl
<LP
>::writeTo(uint8_t *buf
) const {
1389 auto *nList
= reinterpret_cast<typename
LP::nlist
*>(buf
);
1390 // Emit the stabs entries before the "real" symbols. We cannot emit them
1391 // after as that would render Symbol::symtabIndex inaccurate.
1392 for (const StabsEntry
&entry
: stabs
) {
1393 nList
->n_strx
= entry
.strx
;
1394 nList
->n_type
= entry
.type
;
1395 nList
->n_sect
= entry
.sect
;
1396 nList
->n_desc
= entry
.desc
;
1397 nList
->n_value
= entry
.value
;
1401 for (const SymtabEntry
&entry
: concat
<const SymtabEntry
>(
1402 localSymbols
, externalSymbols
, undefinedSymbols
)) {
1403 nList
->n_strx
= entry
.strx
;
1404 // TODO populate n_desc with more flags
1405 if (auto *defined
= dyn_cast
<Defined
>(entry
.sym
)) {
1407 if (defined
->privateExtern
) {
1408 // Private external -- dylib scoped symbol.
1409 // Promote to non-external at link time.
1411 } else if (defined
->isExternal()) {
1412 // Normal global symbol.
1415 // TU-local symbol from localSymbols.
1419 if (defined
->isAbsolute()) {
1420 nList
->n_type
= scope
| N_ABS
;
1421 nList
->n_sect
= NO_SECT
;
1422 nList
->n_value
= defined
->value
;
1424 nList
->n_type
= scope
| N_SECT
;
1425 nList
->n_sect
= defined
->isec()->parent
->index
;
1426 // For the N_SECT symbol type, n_value is the address of the symbol
1427 nList
->n_value
= defined
->getVA();
1429 nList
->n_desc
|= defined
->isExternalWeakDef() ? N_WEAK_DEF
: 0;
1431 defined
->referencedDynamically
? REFERENCED_DYNAMICALLY
: 0;
1432 } else if (auto *dysym
= dyn_cast
<DylibSymbol
>(entry
.sym
)) {
1433 uint16_t n_desc
= nList
->n_desc
;
1434 int16_t ordinal
= ordinalForDylibSymbol(*dysym
);
1435 if (ordinal
== BIND_SPECIAL_DYLIB_FLAT_LOOKUP
)
1436 SET_LIBRARY_ORDINAL(n_desc
, DYNAMIC_LOOKUP_ORDINAL
);
1437 else if (ordinal
== BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE
)
1438 SET_LIBRARY_ORDINAL(n_desc
, EXECUTABLE_ORDINAL
);
1440 assert(ordinal
> 0);
1441 SET_LIBRARY_ORDINAL(n_desc
, static_cast<uint8_t>(ordinal
));
1444 nList
->n_type
= N_EXT
;
1445 n_desc
|= dysym
->isWeakDef() ? N_WEAK_DEF
: 0;
1446 n_desc
|= dysym
->isWeakRef() ? N_WEAK_REF
: 0;
1447 nList
->n_desc
= n_desc
;
1455 macho::makeSymtabSection(StringTableSection
&stringTableSection
) {
1456 return make
<SymtabSectionImpl
<LP
>>(stringTableSection
);
1459 IndirectSymtabSection::IndirectSymtabSection()
1460 : LinkEditSection(segment_names::linkEdit
,
1461 section_names::indirectSymbolTable
) {}
1463 uint32_t IndirectSymtabSection::getNumSymbols() const {
1464 uint32_t size
= in
.got
->getEntries().size() +
1465 in
.tlvPointers
->getEntries().size() +
1466 in
.stubs
->getEntries().size();
1467 if (!config
->emitChainedFixups
)
1468 size
+= in
.stubs
->getEntries().size();
1472 bool IndirectSymtabSection::isNeeded() const {
1473 return in
.got
->isNeeded() || in
.tlvPointers
->isNeeded() ||
1474 in
.stubs
->isNeeded();
1477 void IndirectSymtabSection::finalizeContents() {
1479 in
.got
->reserved1
= off
;
1480 off
+= in
.got
->getEntries().size();
1481 in
.tlvPointers
->reserved1
= off
;
1482 off
+= in
.tlvPointers
->getEntries().size();
1483 in
.stubs
->reserved1
= off
;
1484 if (in
.lazyPointers
) {
1485 off
+= in
.stubs
->getEntries().size();
1486 in
.lazyPointers
->reserved1
= off
;
1490 static uint32_t indirectValue(const Symbol
*sym
) {
1491 if (sym
->symtabIndex
== UINT32_MAX
|| !needsBinding(sym
))
1492 return INDIRECT_SYMBOL_LOCAL
;
1493 return sym
->symtabIndex
;
1496 void IndirectSymtabSection::writeTo(uint8_t *buf
) const {
1498 for (const Symbol
*sym
: in
.got
->getEntries()) {
1499 write32le(buf
+ off
* sizeof(uint32_t), indirectValue(sym
));
1502 for (const Symbol
*sym
: in
.tlvPointers
->getEntries()) {
1503 write32le(buf
+ off
* sizeof(uint32_t), indirectValue(sym
));
1506 for (const Symbol
*sym
: in
.stubs
->getEntries()) {
1507 write32le(buf
+ off
* sizeof(uint32_t), indirectValue(sym
));
1511 if (in
.lazyPointers
) {
1512 // There is a 1:1 correspondence between stubs and LazyPointerSection
1513 // entries. But giving __stubs and __la_symbol_ptr the same reserved1
1514 // (the offset into the indirect symbol table) so that they both refer
1515 // to the same range of offsets confuses `strip`, so write the stubs
1516 // symbol table offsets a second time.
1517 for (const Symbol
*sym
: in
.stubs
->getEntries()) {
1518 write32le(buf
+ off
* sizeof(uint32_t), indirectValue(sym
));
1524 StringTableSection::StringTableSection()
1525 : LinkEditSection(segment_names::linkEdit
, section_names::stringTable
) {}
1527 uint32_t StringTableSection::addString(StringRef str
) {
1528 uint32_t strx
= size
;
1529 strings
.push_back(str
); // TODO: consider deduplicating strings
1530 size
+= str
.size() + 1; // account for null terminator
1534 void StringTableSection::writeTo(uint8_t *buf
) const {
1536 for (StringRef str
: strings
) {
1537 memcpy(buf
+ off
, str
.data(), str
.size());
1538 off
+= str
.size() + 1; // account for null terminator
1542 static_assert((CodeSignatureSection::blobHeadersSize
% 8) == 0);
1543 static_assert((CodeSignatureSection::fixedHeadersSize
% 8) == 0);
1545 CodeSignatureSection::CodeSignatureSection()
1546 : LinkEditSection(segment_names::linkEdit
, section_names::codeSignature
) {
1547 align
= 16; // required by libstuff
1549 // XXX: This mimics LD64, where it uses the install-name as codesign
1550 // identifier, if available.
1551 if (!config
->installName
.empty())
1552 fileName
= config
->installName
;
1554 // FIXME: Consider using finalOutput instead of outputFile.
1555 fileName
= config
->outputFile
;
1557 size_t slashIndex
= fileName
.rfind("/");
1558 if (slashIndex
!= std::string::npos
)
1559 fileName
= fileName
.drop_front(slashIndex
+ 1);
1561 // NOTE: Any changes to these calculations should be repeated
1562 // in llvm-objcopy's MachOLayoutBuilder::layoutTail.
1563 allHeadersSize
= alignTo
<16>(fixedHeadersSize
+ fileName
.size() + 1);
1564 fileNamePad
= allHeadersSize
- fixedHeadersSize
- fileName
.size();
1567 uint32_t CodeSignatureSection::getBlockCount() const {
1568 return (fileOff
+ blockSize
- 1) / blockSize
;
1571 uint64_t CodeSignatureSection::getRawSize() const {
1572 return allHeadersSize
+ getBlockCount() * hashSize
;
1575 void CodeSignatureSection::writeHashes(uint8_t *buf
) const {
1576 // NOTE: Changes to this functionality should be repeated in llvm-objcopy's
1577 // MachOWriter::writeSignatureData.
1578 uint8_t *hashes
= buf
+ fileOff
+ allHeadersSize
;
1579 parallelFor(0, getBlockCount(), [&](size_t i
) {
1580 sha256(buf
+ i
* blockSize
,
1581 std::min(static_cast<size_t>(fileOff
- i
* blockSize
), blockSize
),
1582 hashes
+ i
* hashSize
);
1584 #if defined(__APPLE__)
1585 // This is macOS-specific work-around and makes no sense for any
1586 // other host OS. See https://openradar.appspot.com/FB8914231
1588 // The macOS kernel maintains a signature-verification cache to
1589 // quickly validate applications at time of execve(2). The trouble
1590 // is that for the kernel creates the cache entry at the time of the
1591 // mmap(2) call, before we have a chance to write either the code to
1592 // sign or the signature header+hashes. The fix is to invalidate
1593 // all cached data associated with the output file, thus discarding
1594 // the bogus prematurely-cached signature.
1595 msync(buf
, fileOff
+ getSize(), MS_INVALIDATE
);
1599 void CodeSignatureSection::writeTo(uint8_t *buf
) const {
1600 // NOTE: Changes to this functionality should be repeated in llvm-objcopy's
1601 // MachOWriter::writeSignatureData.
1602 uint32_t signatureSize
= static_cast<uint32_t>(getSize());
1603 auto *superBlob
= reinterpret_cast<CS_SuperBlob
*>(buf
);
1604 write32be(&superBlob
->magic
, CSMAGIC_EMBEDDED_SIGNATURE
);
1605 write32be(&superBlob
->length
, signatureSize
);
1606 write32be(&superBlob
->count
, 1);
1607 auto *blobIndex
= reinterpret_cast<CS_BlobIndex
*>(&superBlob
[1]);
1608 write32be(&blobIndex
->type
, CSSLOT_CODEDIRECTORY
);
1609 write32be(&blobIndex
->offset
, blobHeadersSize
);
1610 auto *codeDirectory
=
1611 reinterpret_cast<CS_CodeDirectory
*>(buf
+ blobHeadersSize
);
1612 write32be(&codeDirectory
->magic
, CSMAGIC_CODEDIRECTORY
);
1613 write32be(&codeDirectory
->length
, signatureSize
- blobHeadersSize
);
1614 write32be(&codeDirectory
->version
, CS_SUPPORTSEXECSEG
);
1615 write32be(&codeDirectory
->flags
, CS_ADHOC
| CS_LINKER_SIGNED
);
1616 write32be(&codeDirectory
->hashOffset
,
1617 sizeof(CS_CodeDirectory
) + fileName
.size() + fileNamePad
);
1618 write32be(&codeDirectory
->identOffset
, sizeof(CS_CodeDirectory
));
1619 codeDirectory
->nSpecialSlots
= 0;
1620 write32be(&codeDirectory
->nCodeSlots
, getBlockCount());
1621 write32be(&codeDirectory
->codeLimit
, fileOff
);
1622 codeDirectory
->hashSize
= static_cast<uint8_t>(hashSize
);
1623 codeDirectory
->hashType
= kSecCodeSignatureHashSHA256
;
1624 codeDirectory
->platform
= 0;
1625 codeDirectory
->pageSize
= blockSizeShift
;
1626 codeDirectory
->spare2
= 0;
1627 codeDirectory
->scatterOffset
= 0;
1628 codeDirectory
->teamOffset
= 0;
1629 codeDirectory
->spare3
= 0;
1630 codeDirectory
->codeLimit64
= 0;
1631 OutputSegment
*textSeg
= getOrCreateOutputSegment(segment_names::text
);
1632 write64be(&codeDirectory
->execSegBase
, textSeg
->fileOff
);
1633 write64be(&codeDirectory
->execSegLimit
, textSeg
->fileSize
);
1634 write64be(&codeDirectory
->execSegFlags
,
1635 config
->outputType
== MH_EXECUTE
? CS_EXECSEG_MAIN_BINARY
: 0);
1636 auto *id
= reinterpret_cast<char *>(&codeDirectory
[1]);
1637 memcpy(id
, fileName
.begin(), fileName
.size());
1638 memset(id
+ fileName
.size(), 0, fileNamePad
);
1641 CStringSection::CStringSection(const char *name
)
1642 : SyntheticSection(segment_names::text
, name
) {
1643 flags
= S_CSTRING_LITERALS
;
1646 void CStringSection::addInput(CStringInputSection
*isec
) {
1647 isec
->parent
= this;
1648 inputs
.push_back(isec
);
1649 if (isec
->align
> align
)
1650 align
= isec
->align
;
1653 void CStringSection::writeTo(uint8_t *buf
) const {
1654 for (const CStringInputSection
*isec
: inputs
) {
1655 for (const auto &[i
, piece
] : llvm::enumerate(isec
->pieces
)) {
1658 StringRef string
= isec
->getStringRef(i
);
1659 memcpy(buf
+ piece
.outSecOff
, string
.data(), string
.size());
1664 void CStringSection::finalizeContents() {
1665 uint64_t offset
= 0;
1666 for (CStringInputSection
*isec
: inputs
) {
1667 for (const auto &[i
, piece
] : llvm::enumerate(isec
->pieces
)) {
1670 // See comment above DeduplicatedCStringSection for how alignment is
1672 uint32_t pieceAlign
= 1
1673 << llvm::countr_zero(isec
->align
| piece
.inSecOff
);
1674 offset
= alignToPowerOf2(offset
, pieceAlign
);
1675 piece
.outSecOff
= offset
;
1676 isec
->isFinal
= true;
1677 StringRef string
= isec
->getStringRef(i
);
1678 offset
+= string
.size() + 1; // account for null terminator
1684 // Mergeable cstring literals are found under the __TEXT,__cstring section. In
1685 // contrast to ELF, which puts strings that need different alignments into
1686 // different sections, clang's Mach-O backend puts them all in one section.
1687 // Strings that need to be aligned have the .p2align directive emitted before
1688 // them, which simply translates into zero padding in the object file. In other
1689 // words, we have to infer the desired alignment of these cstrings from their
1692 // We differ slightly from ld64 in how we've chosen to align these cstrings.
1693 // Both LLD and ld64 preserve the number of trailing zeros in each cstring's
1694 // address in the input object files. When deduplicating identical cstrings,
1695 // both linkers pick the cstring whose address has more trailing zeros, and
1696 // preserve the alignment of that address in the final binary. However, ld64
1697 // goes a step further and also preserves the offset of the cstring from the
1698 // last section-aligned address. I.e. if a cstring is at offset 18 in the
1699 // input, with a section alignment of 16, then both LLD and ld64 will ensure the
1700 // final address is 2-byte aligned (since 18 == 16 + 2). But ld64 will also
1701 // ensure that the final address is of the form 16 * k + 2 for some k.
1703 // Note that ld64's heuristic means that a dedup'ed cstring's final address is
1704 // dependent on the order of the input object files. E.g. if in addition to the
1705 // cstring at offset 18 above, we have a duplicate one in another file with a
1706 // `.cstring` section alignment of 2 and an offset of zero, then ld64 will pick
1707 // the cstring from the object file earlier on the command line (since both have
1708 // the same number of trailing zeros in their address). So the final cstring may
1709 // either be at some address `16 * k + 2` or at some address `2 * k`.
1711 // I've opted not to follow this behavior primarily for implementation
1712 // simplicity, and secondarily to save a few more bytes. It's not clear to me
1713 // that preserving the section alignment + offset is ever necessary, and there
1714 // are many cases that are clearly redundant. In particular, if an x86_64 object
1715 // file contains some strings that are accessed via SIMD instructions, then the
1716 // .cstring section in the object file will be 16-byte-aligned (since SIMD
1717 // requires its operand addresses to be 16-byte aligned). However, there will
1718 // typically also be other cstrings in the same file that aren't used via SIMD
1719 // and don't need this alignment. They will be emitted at some arbitrary address
1720 // `A`, but ld64 will treat them as being 16-byte aligned with an offset of `16
1722 void DeduplicatedCStringSection::finalizeContents() {
1723 // Find the largest alignment required for each string.
1724 for (const CStringInputSection
*isec
: inputs
) {
1725 for (const auto &[i
, piece
] : llvm::enumerate(isec
->pieces
)) {
1728 auto s
= isec
->getCachedHashStringRef(i
);
1729 assert(isec
->align
!= 0);
1730 uint8_t trailingZeros
= llvm::countr_zero(isec
->align
| piece
.inSecOff
);
1731 auto it
= stringOffsetMap
.insert(
1732 std::make_pair(s
, StringOffset(trailingZeros
)));
1733 if (!it
.second
&& it
.first
->second
.trailingZeros
< trailingZeros
)
1734 it
.first
->second
.trailingZeros
= trailingZeros
;
1738 // Assign an offset for each string and save it to the corresponding
1739 // StringPieces for easy access.
1740 for (CStringInputSection
*isec
: inputs
) {
1741 for (const auto &[i
, piece
] : llvm::enumerate(isec
->pieces
)) {
1744 auto s
= isec
->getCachedHashStringRef(i
);
1745 auto it
= stringOffsetMap
.find(s
);
1746 assert(it
!= stringOffsetMap
.end());
1747 StringOffset
&offsetInfo
= it
->second
;
1748 if (offsetInfo
.outSecOff
== UINT64_MAX
) {
1749 offsetInfo
.outSecOff
=
1750 alignToPowerOf2(size
, 1ULL << offsetInfo
.trailingZeros
);
1752 offsetInfo
.outSecOff
+ s
.size() + 1; // account for null terminator
1754 piece
.outSecOff
= offsetInfo
.outSecOff
;
1756 isec
->isFinal
= true;
1760 void DeduplicatedCStringSection::writeTo(uint8_t *buf
) const {
1761 for (const auto &p
: stringOffsetMap
) {
1762 StringRef data
= p
.first
.val();
1763 uint64_t off
= p
.second
.outSecOff
;
1765 memcpy(buf
+ off
, data
.data(), data
.size());
1769 DeduplicatedCStringSection::StringOffset
1770 DeduplicatedCStringSection::getStringOffset(StringRef str
) const {
1771 // StringPiece uses 31 bits to store the hashes, so we replicate that
1772 uint32_t hash
= xxh3_64bits(str
) & 0x7fffffff;
1773 auto offset
= stringOffsetMap
.find(CachedHashStringRef(str
, hash
));
1774 assert(offset
!= stringOffsetMap
.end() &&
1775 "Looked-up strings should always exist in section");
1776 return offset
->second
;
1779 // This section is actually emitted as __TEXT,__const by ld64, but clang may
1780 // emit input sections of that name, and LLD doesn't currently support mixing
1781 // synthetic and concat-type OutputSections. To work around this, I've given
1782 // our merged-literals section a different name.
1783 WordLiteralSection::WordLiteralSection()
1784 : SyntheticSection(segment_names::text
, section_names::literals
) {
1788 void WordLiteralSection::addInput(WordLiteralInputSection
*isec
) {
1789 isec
->parent
= this;
1790 inputs
.push_back(isec
);
1793 void WordLiteralSection::finalizeContents() {
1794 for (WordLiteralInputSection
*isec
: inputs
) {
1795 // We do all processing of the InputSection here, so it will be effectively
1797 isec
->isFinal
= true;
1798 const uint8_t *buf
= isec
->data
.data();
1799 switch (sectionType(isec
->getFlags())) {
1800 case S_4BYTE_LITERALS
: {
1801 for (size_t off
= 0, e
= isec
->data
.size(); off
< e
; off
+= 4) {
1802 if (!isec
->isLive(off
))
1804 uint32_t value
= *reinterpret_cast<const uint32_t *>(buf
+ off
);
1805 literal4Map
.emplace(value
, literal4Map
.size());
1809 case S_8BYTE_LITERALS
: {
1810 for (size_t off
= 0, e
= isec
->data
.size(); off
< e
; off
+= 8) {
1811 if (!isec
->isLive(off
))
1813 uint64_t value
= *reinterpret_cast<const uint64_t *>(buf
+ off
);
1814 literal8Map
.emplace(value
, literal8Map
.size());
1818 case S_16BYTE_LITERALS
: {
1819 for (size_t off
= 0, e
= isec
->data
.size(); off
< e
; off
+= 16) {
1820 if (!isec
->isLive(off
))
1822 UInt128 value
= *reinterpret_cast<const UInt128
*>(buf
+ off
);
1823 literal16Map
.emplace(value
, literal16Map
.size());
1828 llvm_unreachable("invalid literal section type");
1833 void WordLiteralSection::writeTo(uint8_t *buf
) const {
1834 // Note that we don't attempt to do any endianness conversion in addInput(),
1835 // so we don't do it here either -- just write out the original value,
1837 for (const auto &p
: literal16Map
)
1838 memcpy(buf
+ p
.second
* 16, &p
.first
, 16);
1839 buf
+= literal16Map
.size() * 16;
1841 for (const auto &p
: literal8Map
)
1842 memcpy(buf
+ p
.second
* 8, &p
.first
, 8);
1843 buf
+= literal8Map
.size() * 8;
1845 for (const auto &p
: literal4Map
)
1846 memcpy(buf
+ p
.second
* 4, &p
.first
, 4);
1849 ObjCImageInfoSection::ObjCImageInfoSection()
1850 : SyntheticSection(segment_names::data
, section_names::objCImageInfo
) {}
1852 ObjCImageInfoSection::ImageInfo
1853 ObjCImageInfoSection::parseImageInfo(const InputFile
*file
) {
1855 ArrayRef
<uint8_t> data
= file
->objCImageInfo
;
1856 // The image info struct has the following layout:
1858 // uint32_t version;
1861 if (data
.size() < 8) {
1862 warn(toString(file
) + ": invalid __objc_imageinfo size");
1866 auto *buf
= reinterpret_cast<const uint32_t *>(data
.data());
1867 if (read32le(buf
) != 0) {
1868 warn(toString(file
) + ": invalid __objc_imageinfo version");
1872 uint32_t flags
= read32le(buf
+ 1);
1873 info
.swiftVersion
= (flags
>> 8) & 0xff;
1874 info
.hasCategoryClassProperties
= flags
& 0x40;
1878 static std::string
swiftVersionString(uint8_t version
) {
1891 return ("0x" + Twine::utohexstr(version
)).str();
1895 // Validate each object file's __objc_imageinfo and use them to generate the
1896 // image info for the output binary. Only two pieces of info are relevant:
1897 // 1. The Swift version (should be identical across inputs)
1898 // 2. `bool hasCategoryClassProperties` (true only if true for all inputs)
1899 void ObjCImageInfoSection::finalizeContents() {
1900 assert(files
.size() != 0); // should have already been checked via isNeeded()
1902 info
.hasCategoryClassProperties
= true;
1903 const InputFile
*firstFile
;
1904 for (const InputFile
*file
: files
) {
1905 ImageInfo inputInfo
= parseImageInfo(file
);
1906 info
.hasCategoryClassProperties
&= inputInfo
.hasCategoryClassProperties
;
1908 // swiftVersion 0 means no Swift is present, so no version checking required
1909 if (inputInfo
.swiftVersion
== 0)
1912 if (info
.swiftVersion
!= 0 && info
.swiftVersion
!= inputInfo
.swiftVersion
) {
1913 error("Swift version mismatch: " + toString(firstFile
) + " has version " +
1914 swiftVersionString(info
.swiftVersion
) + " but " + toString(file
) +
1915 " has version " + swiftVersionString(inputInfo
.swiftVersion
));
1917 info
.swiftVersion
= inputInfo
.swiftVersion
;
1923 void ObjCImageInfoSection::writeTo(uint8_t *buf
) const {
1924 uint32_t flags
= info
.hasCategoryClassProperties
? 0x40 : 0x0;
1925 flags
|= info
.swiftVersion
<< 8;
1926 write32le(buf
+ 4, flags
);
1929 InitOffsetsSection::InitOffsetsSection()
1930 : SyntheticSection(segment_names::text
, section_names::initOffsets
) {
1931 flags
= S_INIT_FUNC_OFFSETS
;
1932 align
= 4; // This section contains 32-bit integers.
1935 uint64_t InitOffsetsSection::getSize() const {
1937 for (const ConcatInputSection
*isec
: sections
)
1938 count
+= isec
->relocs
.size();
1939 return count
* sizeof(uint32_t);
1942 void InitOffsetsSection::writeTo(uint8_t *buf
) const {
1943 // FIXME: Add function specified by -init when that argument is implemented.
1944 for (ConcatInputSection
*isec
: sections
) {
1945 for (const Reloc
&rel
: isec
->relocs
) {
1946 const Symbol
*referent
= rel
.referent
.dyn_cast
<Symbol
*>();
1947 assert(referent
&& "section relocation should have been rejected");
1948 uint64_t offset
= referent
->getVA() - in
.header
->addr
;
1949 // FIXME: Can we handle this gracefully?
1950 if (offset
> UINT32_MAX
)
1951 fatal(isec
->getLocation(rel
.offset
) + ": offset to initializer " +
1952 referent
->getName() + " (" + utohexstr(offset
) +
1953 ") does not fit in 32 bits");
1955 // Entries need to be added in the order they appear in the section, but
1956 // relocations aren't guaranteed to be sorted.
1957 size_t index
= rel
.offset
>> target
->p2WordSize
;
1958 write32le(&buf
[index
* sizeof(uint32_t)], offset
);
1960 buf
+= isec
->relocs
.size() * sizeof(uint32_t);
1964 // The inputs are __mod_init_func sections, which contain pointers to
1965 // initializer functions, therefore all relocations should be of the UNSIGNED
1966 // type. InitOffsetsSection stores offsets, so if the initializer's address is
1967 // not known at link time, stub-indirection has to be used.
1968 void InitOffsetsSection::setUp() {
1969 for (const ConcatInputSection
*isec
: sections
) {
1970 for (const Reloc
&rel
: isec
->relocs
) {
1971 RelocAttrs attrs
= target
->getRelocAttrs(rel
.type
);
1972 if (!attrs
.hasAttr(RelocAttrBits::UNSIGNED
))
1973 error(isec
->getLocation(rel
.offset
) +
1974 ": unsupported relocation type: " + attrs
.name
);
1975 if (rel
.addend
!= 0)
1976 error(isec
->getLocation(rel
.offset
) +
1977 ": relocation addend is not representable in __init_offsets");
1978 if (rel
.referent
.is
<InputSection
*>())
1979 error(isec
->getLocation(rel
.offset
) +
1980 ": unexpected section relocation");
1982 Symbol
*sym
= rel
.referent
.dyn_cast
<Symbol
*>();
1983 if (auto *undefined
= dyn_cast
<Undefined
>(sym
))
1984 treatUndefinedSymbol(*undefined
, isec
, rel
.offset
);
1985 if (needsBinding(sym
))
1986 in
.stubs
->addEntry(sym
);
1991 ObjCMethListSection::ObjCMethListSection()
1992 : SyntheticSection(segment_names::text
, section_names::objcMethList
) {
1993 flags
= S_ATTR_NO_DEAD_STRIP
;
1994 align
= relativeOffsetSize
;
1997 // Go through all input method lists and ensure that we have selrefs for all
1998 // their method names. The selrefs will be needed later by ::writeTo. We need to
1999 // create them early on here to ensure they are processed correctly by the lld
2001 void ObjCMethListSection::setUp() {
2002 for (const ConcatInputSection
*isec
: inputs
) {
2003 uint32_t structSizeAndFlags
= 0, structCount
= 0;
2004 readMethodListHeader(isec
->data
.data(), structSizeAndFlags
, structCount
);
2005 uint32_t originalStructSize
= structSizeAndFlags
& structSizeMask
;
2006 // Method name is immediately after header
2007 uint32_t methodNameOff
= methodListHeaderSize
;
2009 // Loop through all methods, and ensure a selref for each of them exists.
2010 while (methodNameOff
< isec
->data
.size()) {
2011 const Reloc
*reloc
= isec
->getRelocAt(methodNameOff
);
2012 assert(reloc
&& "Relocation expected at method list name slot");
2013 auto *def
= dyn_cast_or_null
<Defined
>(reloc
->referent
.get
<Symbol
*>());
2014 assert(def
&& "Expected valid Defined at method list name slot");
2015 auto *cisec
= cast
<CStringInputSection
>(def
->isec());
2016 assert(cisec
&& "Expected method name to be in a CStringInputSection");
2017 auto methname
= cisec
->getStringRefAtOffset(def
->value
);
2018 if (!ObjCSelRefsHelper::getSelRef(methname
))
2019 ObjCSelRefsHelper::makeSelRef(methname
);
2021 // Jump to method name offset in next struct
2022 methodNameOff
+= originalStructSize
;
2027 // Calculate section size and final offsets for where InputSection's need to be
2029 void ObjCMethListSection::finalize() {
2030 // sectionSize will be the total size of the __objc_methlist section
2032 for (ConcatInputSection
*isec
: inputs
) {
2033 // We can also use sectionSize as write offset for isec
2034 assert(sectionSize
== alignToPowerOf2(sectionSize
, relativeOffsetSize
) &&
2035 "expected __objc_methlist to be aligned by default with the "
2036 "required section alignment");
2037 isec
->outSecOff
= sectionSize
;
2039 isec
->isFinal
= true;
2040 uint32_t relativeListSize
=
2041 computeRelativeMethodListSize(isec
->data
.size());
2042 sectionSize
+= relativeListSize
;
2044 // If encoding the method list in relative offset format shrinks the size,
2045 // then we also need to adjust symbol sizes to match the new size. Note that
2046 // on 32bit platforms the size of the method list will remain the same when
2047 // encoded in relative offset format.
2048 if (relativeListSize
!= isec
->data
.size()) {
2049 for (Symbol
*sym
: isec
->symbols
) {
2050 assert(isa
<Defined
>(sym
) &&
2051 "Unexpected undefined symbol in ObjC method list");
2052 auto *def
= cast
<Defined
>(sym
);
2053 // There can be 0-size symbols, check if this is the case and ignore
2057 def
->size
== isec
->data
.size() &&
2058 "Invalid ObjC method list symbol size: expected symbol size to "
2060 def
->size
= relativeListSize
;
2067 void ObjCMethListSection::writeTo(uint8_t *bufStart
) const {
2068 uint8_t *buf
= bufStart
;
2069 for (const ConcatInputSection
*isec
: inputs
) {
2070 assert(buf
- bufStart
== long(isec
->outSecOff
) &&
2071 "Writing at unexpected offset");
2072 uint32_t writtenSize
= writeRelativeMethodList(isec
, buf
);
2075 assert(buf
- bufStart
== sectionSize
&&
2076 "Written size does not match expected section size");
2079 // Check if an InputSection is a method list. To do this we scan the
2080 // InputSection for any symbols who's names match the patterns we expect clang
2081 // to generate for method lists.
2082 bool ObjCMethListSection::isMethodList(const ConcatInputSection
*isec
) {
2083 const char *symPrefixes
[] = {objc::symbol_names::classMethods
,
2084 objc::symbol_names::instanceMethods
,
2085 objc::symbol_names::categoryInstanceMethods
,
2086 objc::symbol_names::categoryClassMethods
};
2089 for (const Symbol
*sym
: isec
->symbols
) {
2090 auto *def
= dyn_cast_or_null
<Defined
>(sym
);
2093 for (const char *prefix
: symPrefixes
) {
2094 if (def
->getName().starts_with(prefix
)) {
2095 assert(def
->size
== isec
->data
.size() &&
2096 "Invalid ObjC method list symbol size: expected symbol size to "
2098 assert(def
->value
== 0 &&
2099 "Offset of ObjC method list symbol must be 0");
2108 // Encode a single relative offset value. The input is the data/symbol at
2109 // (&isec->data[inSecOff]). The output is written to (&buf[outSecOff]).
2110 // 'createSelRef' indicates that we should not directly use the specified
2111 // symbol, but instead get the selRef for the symbol and use that instead.
2112 void ObjCMethListSection::writeRelativeOffsetForIsec(
2113 const ConcatInputSection
*isec
, uint8_t *buf
, uint32_t &inSecOff
,
2114 uint32_t &outSecOff
, bool useSelRef
) const {
2115 const Reloc
*reloc
= isec
->getRelocAt(inSecOff
);
2116 assert(reloc
&& "Relocation expected at __objc_methlist Offset");
2117 auto *def
= dyn_cast_or_null
<Defined
>(reloc
->referent
.get
<Symbol
*>());
2118 assert(def
&& "Expected all syms in __objc_methlist to be defined");
2119 uint32_t symVA
= def
->getVA();
2122 auto *cisec
= cast
<CStringInputSection
>(def
->isec());
2123 auto methname
= cisec
->getStringRefAtOffset(def
->value
);
2124 ConcatInputSection
*selRef
= ObjCSelRefsHelper::getSelRef(methname
);
2125 assert(selRef
&& "Expected all selector names to already be already be "
2126 "present in __objc_selrefs");
2127 symVA
= selRef
->getVA();
2128 assert(selRef
->data
.size() == sizeof(target
->wordSize
) &&
2129 "Expected one selref per ConcatInputSection");
2132 uint32_t currentVA
= isec
->getVA() + outSecOff
;
2133 uint32_t delta
= symVA
- currentVA
;
2134 write32le(buf
+ outSecOff
, delta
);
2136 // Move one pointer forward in the absolute method list
2137 inSecOff
+= target
->wordSize
;
2138 // Move one relative offset forward in the relative method list (32 bits)
2139 outSecOff
+= relativeOffsetSize
;
2142 // Write a relative method list to buf, return the size of the written
2145 ObjCMethListSection::writeRelativeMethodList(const ConcatInputSection
*isec
,
2146 uint8_t *buf
) const {
2147 // Copy over the header, and add the "this is a relative method list" magic
2149 uint32_t structSizeAndFlags
= 0, structCount
= 0;
2150 readMethodListHeader(isec
->data
.data(), structSizeAndFlags
, structCount
);
2151 // Set the struct size for the relative method list
2152 uint32_t relativeStructSizeAndFlags
=
2153 (relativeOffsetSize
* pointersPerStruct
) & structSizeMask
;
2154 // Carry over the old flags from the input struct
2155 relativeStructSizeAndFlags
|= structSizeAndFlags
& structFlagsMask
;
2156 // Set the relative method list flag
2157 relativeStructSizeAndFlags
|= relMethodHeaderFlag
;
2159 writeMethodListHeader(buf
, relativeStructSizeAndFlags
, structCount
);
2161 assert(methodListHeaderSize
+
2162 (structCount
* pointersPerStruct
* target
->wordSize
) ==
2163 isec
->data
.size() &&
2164 "Invalid computed ObjC method list size");
2166 uint32_t inSecOff
= methodListHeaderSize
;
2167 uint32_t outSecOff
= methodListHeaderSize
;
2169 // Go through the method list and encode input absolute pointers as relative
2170 // offsets. writeRelativeOffsetForIsec will be incrementing inSecOff and
2172 for (uint32_t i
= 0; i
< structCount
; i
++) {
2173 // Write the name of the method
2174 writeRelativeOffsetForIsec(isec
, buf
, inSecOff
, outSecOff
, true);
2175 // Write the type of the method
2176 writeRelativeOffsetForIsec(isec
, buf
, inSecOff
, outSecOff
, false);
2177 // Write reference to the selector of the method
2178 writeRelativeOffsetForIsec(isec
, buf
, inSecOff
, outSecOff
, false);
2181 // Expecting to have read all the data in the isec
2182 assert(inSecOff
== isec
->data
.size() &&
2183 "Invalid actual ObjC method list size");
2185 outSecOff
== computeRelativeMethodListSize(inSecOff
) &&
2186 "Mismatch between input & output size when writing relative method list");
2190 // Given the size of an ObjC method list InputSection, return the size of the
2191 // method list when encoded in relative offsets format. We can do this without
2192 // decoding the actual data, as it can be directly inferred from the size of the
2194 uint32_t ObjCMethListSection::computeRelativeMethodListSize(
2195 uint32_t absoluteMethodListSize
) const {
2196 uint32_t oldPointersSize
= absoluteMethodListSize
- methodListHeaderSize
;
2197 uint32_t pointerCount
= oldPointersSize
/ target
->wordSize
;
2198 assert(((pointerCount
% pointersPerStruct
) == 0) &&
2199 "__objc_methlist expects method lists to have multiple-of-3 pointers");
2201 uint32_t newPointersSize
= pointerCount
* relativeOffsetSize
;
2202 uint32_t newTotalSize
= methodListHeaderSize
+ newPointersSize
;
2204 assert((newTotalSize
<= absoluteMethodListSize
) &&
2205 "Expected relative method list size to be smaller or equal than "
2207 return newTotalSize
;
2210 // Read a method list header from buf
2211 void ObjCMethListSection::readMethodListHeader(const uint8_t *buf
,
2212 uint32_t &structSizeAndFlags
,
2213 uint32_t &structCount
) const {
2214 structSizeAndFlags
= read32le(buf
);
2215 structCount
= read32le(buf
+ sizeof(uint32_t));
2218 // Write a method list header to buf
2219 void ObjCMethListSection::writeMethodListHeader(uint8_t *buf
,
2220 uint32_t structSizeAndFlags
,
2221 uint32_t structCount
) const {
2222 write32le(buf
, structSizeAndFlags
);
2223 write32le(buf
+ sizeof(structSizeAndFlags
), structCount
);
2226 void macho::createSyntheticSymbols() {
2227 auto addHeaderSymbol
= [](const char *name
) {
2228 symtab
->addSynthetic(name
, in
.header
->isec
, /*value=*/0,
2229 /*isPrivateExtern=*/true, /*includeInSymtab=*/false,
2230 /*referencedDynamically=*/false);
2233 switch (config
->outputType
) {
2234 // FIXME: Assign the right address value for these symbols
2235 // (rather than 0). But we need to do that after assignAddresses().
2237 // If linking PIE, __mh_execute_header is a defined symbol in
2239 // Otherwise, it's an absolute symbol.
2241 symtab
->addSynthetic("__mh_execute_header", in
.header
->isec
, /*value=*/0,
2242 /*isPrivateExtern=*/false, /*includeInSymtab=*/true,
2243 /*referencedDynamically=*/true);
2245 symtab
->addSynthetic("__mh_execute_header", /*isec=*/nullptr, /*value=*/0,
2246 /*isPrivateExtern=*/false, /*includeInSymtab=*/true,
2247 /*referencedDynamically=*/true);
2250 // The following symbols are N_SECT symbols, even though the header is not
2251 // part of any section and that they are private to the bundle/dylib/object
2252 // they are part of.
2254 addHeaderSymbol("__mh_bundle_header");
2257 addHeaderSymbol("__mh_dylib_header");
2260 addHeaderSymbol("__mh_dylinker_header");
2263 addHeaderSymbol("__mh_object_header");
2266 llvm_unreachable("unexpected outputType");
2270 // The Itanium C++ ABI requires dylibs to pass a pointer to __cxa_atexit
2271 // which does e.g. cleanup of static global variables. The ABI document
2272 // says that the pointer can point to any address in one of the dylib's
2273 // segments, but in practice ld64 seems to set it to point to the header,
2274 // so that's what's implemented here.
2275 addHeaderSymbol("___dso_handle");
2278 ChainedFixupsSection::ChainedFixupsSection()
2279 : LinkEditSection(segment_names::linkEdit
, section_names::chainFixups
) {}
2281 bool ChainedFixupsSection::isNeeded() const {
2282 assert(config
->emitChainedFixups
);
2283 // dyld always expects LC_DYLD_CHAINED_FIXUPS to point to a valid
2284 // dyld_chained_fixups_header, so we create this section even if there aren't
2289 void ChainedFixupsSection::addBinding(const Symbol
*sym
,
2290 const InputSection
*isec
, uint64_t offset
,
2292 locations
.emplace_back(isec
, offset
);
2293 int64_t outlineAddend
= (addend
< 0 || addend
> 0xFF) ? addend
: 0;
2294 auto [it
, inserted
] = bindings
.insert(
2295 {{sym
, outlineAddend
}, static_cast<uint32_t>(bindings
.size())});
2298 symtabSize
+= sym
->getName().size() + 1;
2299 hasWeakBind
= hasWeakBind
|| needsWeakBind(*sym
);
2300 if (!isInt
<23>(outlineAddend
))
2301 needsLargeAddend
= true;
2302 else if (outlineAddend
!= 0)
2307 std::pair
<uint32_t, uint8_t>
2308 ChainedFixupsSection::getBinding(const Symbol
*sym
, int64_t addend
) const {
2309 int64_t outlineAddend
= (addend
< 0 || addend
> 0xFF) ? addend
: 0;
2310 auto it
= bindings
.find({sym
, outlineAddend
});
2311 assert(it
!= bindings
.end() && "binding not found in the imports table");
2312 if (outlineAddend
== 0)
2313 return {it
->second
, addend
};
2314 return {it
->second
, 0};
2317 static size_t writeImport(uint8_t *buf
, int format
, int16_t libOrdinal
,
2318 bool weakRef
, uint32_t nameOffset
, int64_t addend
) {
2320 case DYLD_CHAINED_IMPORT
: {
2321 auto *import
= reinterpret_cast<dyld_chained_import
*>(buf
);
2322 import
->lib_ordinal
= libOrdinal
;
2323 import
->weak_import
= weakRef
;
2324 import
->name_offset
= nameOffset
;
2325 return sizeof(dyld_chained_import
);
2327 case DYLD_CHAINED_IMPORT_ADDEND
: {
2328 auto *import
= reinterpret_cast<dyld_chained_import_addend
*>(buf
);
2329 import
->lib_ordinal
= libOrdinal
;
2330 import
->weak_import
= weakRef
;
2331 import
->name_offset
= nameOffset
;
2332 import
->addend
= addend
;
2333 return sizeof(dyld_chained_import_addend
);
2335 case DYLD_CHAINED_IMPORT_ADDEND64
: {
2336 auto *import
= reinterpret_cast<dyld_chained_import_addend64
*>(buf
);
2337 import
->lib_ordinal
= libOrdinal
;
2338 import
->weak_import
= weakRef
;
2339 import
->name_offset
= nameOffset
;
2340 import
->addend
= addend
;
2341 return sizeof(dyld_chained_import_addend64
);
2344 llvm_unreachable("Unknown import format");
2348 size_t ChainedFixupsSection::SegmentInfo::getSize() const {
2349 assert(pageStarts
.size() > 0 && "SegmentInfo for segment with no fixups?");
2350 return alignTo
<8>(sizeof(dyld_chained_starts_in_segment
) +
2351 pageStarts
.back().first
* sizeof(uint16_t));
2354 size_t ChainedFixupsSection::SegmentInfo::writeTo(uint8_t *buf
) const {
2355 auto *segInfo
= reinterpret_cast<dyld_chained_starts_in_segment
*>(buf
);
2356 segInfo
->size
= getSize();
2357 segInfo
->page_size
= target
->getPageSize();
2358 // FIXME: Use DYLD_CHAINED_PTR_64_OFFSET on newer OS versions.
2359 segInfo
->pointer_format
= DYLD_CHAINED_PTR_64
;
2360 segInfo
->segment_offset
= oseg
->addr
- in
.header
->addr
;
2361 segInfo
->max_valid_pointer
= 0; // not used on 64-bit
2362 segInfo
->page_count
= pageStarts
.back().first
+ 1;
2364 uint16_t *starts
= segInfo
->page_start
;
2365 for (size_t i
= 0; i
< segInfo
->page_count
; ++i
)
2366 starts
[i
] = DYLD_CHAINED_PTR_START_NONE
;
2368 for (auto [pageIdx
, startAddr
] : pageStarts
)
2369 starts
[pageIdx
] = startAddr
;
2370 return segInfo
->size
;
2373 static size_t importEntrySize(int format
) {
2375 case DYLD_CHAINED_IMPORT
:
2376 return sizeof(dyld_chained_import
);
2377 case DYLD_CHAINED_IMPORT_ADDEND
:
2378 return sizeof(dyld_chained_import_addend
);
2379 case DYLD_CHAINED_IMPORT_ADDEND64
:
2380 return sizeof(dyld_chained_import_addend64
);
2382 llvm_unreachable("Unknown import format");
2386 // This is step 3 of the algorithm described in the class comment of
2387 // ChainedFixupsSection.
2389 // LC_DYLD_CHAINED_FIXUPS data consists of (in this order):
2390 // * A dyld_chained_fixups_header
2391 // * A dyld_chained_starts_in_image
2392 // * One dyld_chained_starts_in_segment per segment
2393 // * List of all imports (dyld_chained_import, dyld_chained_import_addend, or
2394 // dyld_chained_import_addend64)
2395 // * Names of imported symbols
2396 void ChainedFixupsSection::writeTo(uint8_t *buf
) const {
2397 auto *header
= reinterpret_cast<dyld_chained_fixups_header
*>(buf
);
2398 header
->fixups_version
= 0;
2399 header
->imports_count
= bindings
.size();
2400 header
->imports_format
= importFormat
;
2401 header
->symbols_format
= 0;
2403 buf
+= alignTo
<8>(sizeof(*header
));
2405 auto curOffset
= [&buf
, &header
]() -> uint32_t {
2406 return buf
- reinterpret_cast<uint8_t *>(header
);
2409 header
->starts_offset
= curOffset();
2411 auto *imageInfo
= reinterpret_cast<dyld_chained_starts_in_image
*>(buf
);
2412 imageInfo
->seg_count
= outputSegments
.size();
2413 uint32_t *segStarts
= imageInfo
->seg_info_offset
;
2415 // dyld_chained_starts_in_image ends in a flexible array member containing an
2416 // uint32_t for each segment. Leave room for it, and fill it via segStarts.
2417 buf
+= alignTo
<8>(offsetof(dyld_chained_starts_in_image
, seg_info_offset
) +
2418 outputSegments
.size() * sizeof(uint32_t));
2420 // Initialize all offsets to 0, which indicates that the segment does not have
2421 // fixups. Those that do have them will be filled in below.
2422 for (size_t i
= 0; i
< outputSegments
.size(); ++i
)
2425 for (const SegmentInfo
&seg
: fixupSegments
) {
2426 segStarts
[seg
.oseg
->index
] = curOffset() - header
->starts_offset
;
2427 buf
+= seg
.writeTo(buf
);
2430 // Write imports table.
2431 header
->imports_offset
= curOffset();
2432 uint64_t nameOffset
= 0;
2433 for (auto [import
, idx
] : bindings
) {
2434 const Symbol
&sym
= *import
.first
;
2435 buf
+= writeImport(buf
, importFormat
, ordinalForSymbol(sym
),
2436 sym
.isWeakRef(), nameOffset
, import
.second
);
2437 nameOffset
+= sym
.getName().size() + 1;
2440 // Write imported symbol names.
2441 header
->symbols_offset
= curOffset();
2442 for (auto [import
, idx
] : bindings
) {
2443 StringRef name
= import
.first
->getName();
2444 memcpy(buf
, name
.data(), name
.size());
2445 buf
+= name
.size() + 1; // account for null terminator
2448 assert(curOffset() == getRawSize());
2451 // This is step 2 of the algorithm described in the class comment of
2452 // ChainedFixupsSection.
2453 void ChainedFixupsSection::finalizeContents() {
2454 assert(target
->wordSize
== 8 && "Only 64-bit platforms are supported");
2455 assert(config
->emitChainedFixups
);
2457 if (!isUInt
<32>(symtabSize
))
2458 error("cannot encode chained fixups: imported symbols table size " +
2459 Twine(symtabSize
) + " exceeds 4 GiB");
2461 bool needsLargeOrdinal
= any_of(bindings
, [](const auto &p
) {
2462 // 0xF1 - 0xFF are reserved for special ordinals in the 8-bit encoding.
2463 return ordinalForSymbol(*p
.first
.first
) > 0xF0;
2466 if (needsLargeAddend
|| !isUInt
<23>(symtabSize
) || needsLargeOrdinal
)
2467 importFormat
= DYLD_CHAINED_IMPORT_ADDEND64
;
2468 else if (needsAddend
)
2469 importFormat
= DYLD_CHAINED_IMPORT_ADDEND
;
2471 importFormat
= DYLD_CHAINED_IMPORT
;
2473 for (Location
&loc
: locations
)
2475 loc
.isec
->parent
->getSegmentOffset() + loc
.isec
->getOffset(loc
.offset
);
2477 llvm::sort(locations
, [](const Location
&a
, const Location
&b
) {
2478 const OutputSegment
*segA
= a
.isec
->parent
->parent
;
2479 const OutputSegment
*segB
= b
.isec
->parent
->parent
;
2481 return a
.offset
< b
.offset
;
2482 return segA
->addr
< segB
->addr
;
2485 auto sameSegment
= [](const Location
&a
, const Location
&b
) {
2486 return a
.isec
->parent
->parent
== b
.isec
->parent
->parent
;
2489 const uint64_t pageSize
= target
->getPageSize();
2490 for (size_t i
= 0, count
= locations
.size(); i
< count
;) {
2491 const Location
&firstLoc
= locations
[i
];
2492 fixupSegments
.emplace_back(firstLoc
.isec
->parent
->parent
);
2493 while (i
< count
&& sameSegment(locations
[i
], firstLoc
)) {
2494 uint32_t pageIdx
= locations
[i
].offset
/ pageSize
;
2495 fixupSegments
.back().pageStarts
.emplace_back(
2496 pageIdx
, locations
[i
].offset
% pageSize
);
2498 while (i
< count
&& sameSegment(locations
[i
], firstLoc
) &&
2499 locations
[i
].offset
/ pageSize
== pageIdx
)
2504 // Compute expected encoded size.
2505 size
= alignTo
<8>(sizeof(dyld_chained_fixups_header
));
2506 size
+= alignTo
<8>(offsetof(dyld_chained_starts_in_image
, seg_info_offset
) +
2507 outputSegments
.size() * sizeof(uint32_t));
2508 for (const SegmentInfo
&seg
: fixupSegments
)
2509 size
+= seg
.getSize();
2510 size
+= importEntrySize(importFormat
) * bindings
.size();
2514 template SymtabSection
*macho::makeSymtabSection
<LP64
>(StringTableSection
&);
2515 template SymtabSection
*macho::makeSymtabSection
<ILP32
>(StringTableSection
&);