1 //===- SyntheticSections.cpp ---------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "SyntheticSections.h"
10 #include "ConcatOutputSection.h"
12 #include "ExportTrie.h"
14 #include "InputFiles.h"
15 #include "MachOStructs.h"
17 #include "OutputSegment.h"
18 #include "SymbolTable.h"
21 #include "lld/Common/CommonLinkerContext.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/Config/llvm-config.h"
24 #include "llvm/Support/EndianStream.h"
25 #include "llvm/Support/FileSystem.h"
26 #include "llvm/Support/LEB128.h"
27 #include "llvm/Support/Parallel.h"
28 #include "llvm/Support/Path.h"
29 #include "llvm/Support/xxhash.h"
31 #if defined(__APPLE__)
34 #define COMMON_DIGEST_FOR_OPENSSL
35 #include <CommonCrypto/CommonDigest.h>
37 #include "llvm/Support/SHA256.h"
41 using namespace llvm::MachO
;
42 using namespace llvm::support
;
43 using namespace llvm::support::endian
;
45 using namespace lld::macho
;
47 // Reads `len` bytes at data and writes the 32-byte SHA256 checksum to `output`.
48 static void sha256(const uint8_t *data
, size_t len
, uint8_t *output
) {
49 #if defined(__APPLE__)
50 // FIXME: Make LLVM's SHA256 faster and use it unconditionally. See PR56121
51 // for some notes on this.
52 CC_SHA256(data
, len
, output
);
54 ArrayRef
<uint8_t> block(data
, len
);
55 std::array
<uint8_t, 32> hash
= SHA256::hash(block
);
56 static_assert(hash
.size() == CodeSignatureSection::hashSize
);
57 memcpy(output
, hash
.data(), hash
.size());
62 std::vector
<SyntheticSection
*> macho::syntheticSections
;
64 SyntheticSection::SyntheticSection(const char *segname
, const char *name
)
65 : OutputSection(SyntheticKind
, name
) {
66 std::tie(this->segname
, this->name
) = maybeRenameSection({segname
, name
});
67 isec
= makeSyntheticInputSection(segname
, name
);
69 syntheticSections
.push_back(this);
72 // dyld3's MachOLoaded::getSlide() assumes that the __TEXT segment starts
73 // from the beginning of the file (i.e. the header).
74 MachHeaderSection::MachHeaderSection()
75 : SyntheticSection(segment_names::text
, section_names::header
) {
76 // XXX: This is a hack. (See D97007)
77 // Setting the index to 1 to pretend that this section is the text
83 void MachHeaderSection::addLoadCommand(LoadCommand
*lc
) {
84 loadCommands
.push_back(lc
);
85 sizeOfCmds
+= lc
->getSize();
88 uint64_t MachHeaderSection::getSize() const {
89 uint64_t size
= target
->headerSize
+ sizeOfCmds
+ config
->headerPad
;
90 // If we are emitting an encryptable binary, our load commands must have a
91 // separate (non-encrypted) page to themselves.
92 if (config
->emitEncryptionInfo
)
93 size
= alignToPowerOf2(size
, target
->getPageSize());
97 static uint32_t cpuSubtype() {
98 uint32_t subtype
= target
->cpuSubtype
;
100 if (config
->outputType
== MH_EXECUTE
&& !config
->staticLink
&&
101 target
->cpuSubtype
== CPU_SUBTYPE_X86_64_ALL
&&
102 config
->platform() == PLATFORM_MACOS
&&
103 config
->platformInfo
.target
.MinDeployment
>= VersionTuple(10, 5))
104 subtype
|= CPU_SUBTYPE_LIB64
;
109 static bool hasWeakBinding() {
110 return config
->emitChainedFixups
? in
.chainedFixups
->hasWeakBinding()
111 : in
.weakBinding
->hasEntry();
114 static bool hasNonWeakDefinition() {
115 return config
->emitChainedFixups
? in
.chainedFixups
->hasNonWeakDefinition()
116 : in
.weakBinding
->hasNonWeakDefinition();
119 void MachHeaderSection::writeTo(uint8_t *buf
) const {
120 auto *hdr
= reinterpret_cast<mach_header
*>(buf
);
121 hdr
->magic
= target
->magic
;
122 hdr
->cputype
= target
->cpuType
;
123 hdr
->cpusubtype
= cpuSubtype();
124 hdr
->filetype
= config
->outputType
;
125 hdr
->ncmds
= loadCommands
.size();
126 hdr
->sizeofcmds
= sizeOfCmds
;
127 hdr
->flags
= MH_DYLDLINK
;
129 if (config
->namespaceKind
== NamespaceKind::twolevel
)
130 hdr
->flags
|= MH_NOUNDEFS
| MH_TWOLEVEL
;
132 if (config
->outputType
== MH_DYLIB
&& !config
->hasReexports
)
133 hdr
->flags
|= MH_NO_REEXPORTED_DYLIBS
;
135 if (config
->markDeadStrippableDylib
)
136 hdr
->flags
|= MH_DEAD_STRIPPABLE_DYLIB
;
138 if (config
->outputType
== MH_EXECUTE
&& config
->isPic
)
139 hdr
->flags
|= MH_PIE
;
141 if (config
->outputType
== MH_DYLIB
&& config
->applicationExtension
)
142 hdr
->flags
|= MH_APP_EXTENSION_SAFE
;
144 if (in
.exports
->hasWeakSymbol
|| hasNonWeakDefinition())
145 hdr
->flags
|= MH_WEAK_DEFINES
;
147 if (in
.exports
->hasWeakSymbol
|| hasWeakBinding())
148 hdr
->flags
|= MH_BINDS_TO_WEAK
;
150 for (const OutputSegment
*seg
: outputSegments
) {
151 for (const OutputSection
*osec
: seg
->getSections()) {
152 if (isThreadLocalVariables(osec
->flags
)) {
153 hdr
->flags
|= MH_HAS_TLV_DESCRIPTORS
;
159 uint8_t *p
= reinterpret_cast<uint8_t *>(hdr
) + target
->headerSize
;
160 for (const LoadCommand
*lc
: loadCommands
) {
166 PageZeroSection::PageZeroSection()
167 : SyntheticSection(segment_names::pageZero
, section_names::pageZero
) {}
169 RebaseSection::RebaseSection()
170 : LinkEditSection(segment_names::linkEdit
, section_names::rebase
) {}
174 uint64_t sequenceLength
;
179 static void emitIncrement(uint64_t incr
, raw_svector_ostream
&os
) {
182 if ((incr
>> target
->p2WordSize
) <= REBASE_IMMEDIATE_MASK
&&
183 (incr
% target
->wordSize
) == 0) {
184 os
<< static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_IMM_SCALED
|
185 (incr
>> target
->p2WordSize
));
187 os
<< static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_ULEB
);
188 encodeULEB128(incr
, os
);
192 static void flushRebase(const RebaseState
&state
, raw_svector_ostream
&os
) {
193 assert(state
.sequenceLength
> 0);
195 if (state
.skipLength
== target
->wordSize
) {
196 if (state
.sequenceLength
<= REBASE_IMMEDIATE_MASK
) {
197 os
<< static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_IMM_TIMES
|
198 state
.sequenceLength
);
200 os
<< static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_ULEB_TIMES
);
201 encodeULEB128(state
.sequenceLength
, os
);
203 } else if (state
.sequenceLength
== 1) {
204 os
<< static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB
);
205 encodeULEB128(state
.skipLength
- target
->wordSize
, os
);
207 os
<< static_cast<uint8_t>(
208 REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB
);
209 encodeULEB128(state
.sequenceLength
, os
);
210 encodeULEB128(state
.skipLength
- target
->wordSize
, os
);
214 // Rebases are communicated to dyld using a bytecode, whose opcodes cause the
215 // memory location at a specific address to be rebased and/or the address to be
218 // Opcode REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB is the most generic
219 // one, encoding a series of evenly spaced addresses. This algorithm works by
220 // splitting up the sorted list of addresses into such chunks. If the locations
221 // are consecutive or the sequence consists of a single location, flushRebase
222 // will use a smaller, more specialized encoding.
223 static void encodeRebases(const OutputSegment
*seg
,
224 MutableArrayRef
<Location
> locations
,
225 raw_svector_ostream
&os
) {
226 // dyld operates on segments. Translate section offsets into segment offsets.
227 for (Location
&loc
: locations
)
229 loc
.isec
->parent
->getSegmentOffset() + loc
.isec
->getOffset(loc
.offset
);
230 // The algorithm assumes that locations are unique.
232 llvm::unique(locations
, [](const Location
&a
, const Location
&b
) {
233 return a
.offset
== b
.offset
;
235 size_t count
= end
- locations
.begin();
237 os
<< static_cast<uint8_t>(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
|
239 assert(!locations
.empty());
240 uint64_t offset
= locations
[0].offset
;
241 encodeULEB128(offset
, os
);
243 RebaseState state
{1, target
->wordSize
};
245 for (size_t i
= 1; i
< count
; ++i
) {
246 offset
= locations
[i
].offset
;
248 uint64_t skip
= offset
- locations
[i
- 1].offset
;
249 assert(skip
!= 0 && "duplicate locations should have been weeded out");
251 if (skip
== state
.skipLength
) {
252 ++state
.sequenceLength
;
253 } else if (state
.sequenceLength
== 1) {
254 ++state
.sequenceLength
;
255 state
.skipLength
= skip
;
256 } else if (skip
< state
.skipLength
) {
257 // The address is lower than what the rebase pointer would be if the last
258 // location would be part of a sequence. We start a new sequence from the
259 // previous location.
260 --state
.sequenceLength
;
261 flushRebase(state
, os
);
263 state
.sequenceLength
= 2;
264 state
.skipLength
= skip
;
266 // The address is at some positive offset from the rebase pointer. We
267 // start a new sequence which begins with the current location.
268 flushRebase(state
, os
);
269 emitIncrement(skip
- state
.skipLength
, os
);
270 state
.sequenceLength
= 1;
271 state
.skipLength
= target
->wordSize
;
274 flushRebase(state
, os
);
277 void RebaseSection::finalizeContents() {
278 if (locations
.empty())
281 raw_svector_ostream os
{contents
};
282 os
<< static_cast<uint8_t>(REBASE_OPCODE_SET_TYPE_IMM
| REBASE_TYPE_POINTER
);
284 llvm::sort(locations
, [](const Location
&a
, const Location
&b
) {
285 return a
.isec
->getVA(a
.offset
) < b
.isec
->getVA(b
.offset
);
288 for (size_t i
= 0, count
= locations
.size(); i
< count
;) {
289 const OutputSegment
*seg
= locations
[i
].isec
->parent
->parent
;
291 while (j
< count
&& locations
[j
].isec
->parent
->parent
== seg
)
293 encodeRebases(seg
, {locations
.data() + i
, locations
.data() + j
}, os
);
296 os
<< static_cast<uint8_t>(REBASE_OPCODE_DONE
);
299 void RebaseSection::writeTo(uint8_t *buf
) const {
300 memcpy(buf
, contents
.data(), contents
.size());
303 NonLazyPointerSectionBase::NonLazyPointerSectionBase(const char *segname
,
305 : SyntheticSection(segname
, name
) {
306 align
= target
->wordSize
;
309 void macho::addNonLazyBindingEntries(const Symbol
*sym
,
310 const InputSection
*isec
, uint64_t offset
,
312 if (config
->emitChainedFixups
) {
313 if (needsBinding(sym
))
314 in
.chainedFixups
->addBinding(sym
, isec
, offset
, addend
);
315 else if (isa
<Defined
>(sym
))
316 in
.chainedFixups
->addRebase(isec
, offset
);
318 llvm_unreachable("cannot bind to an undefined symbol");
322 if (const auto *dysym
= dyn_cast
<DylibSymbol
>(sym
)) {
323 in
.binding
->addEntry(dysym
, isec
, offset
, addend
);
324 if (dysym
->isWeakDef())
325 in
.weakBinding
->addEntry(sym
, isec
, offset
, addend
);
326 } else if (const auto *defined
= dyn_cast
<Defined
>(sym
)) {
327 in
.rebase
->addEntry(isec
, offset
);
328 if (defined
->isExternalWeakDef())
329 in
.weakBinding
->addEntry(sym
, isec
, offset
, addend
);
330 else if (defined
->interposable
)
331 in
.binding
->addEntry(sym
, isec
, offset
, addend
);
333 // Undefined symbols are filtered out in scanRelocations(); we should never
335 llvm_unreachable("cannot bind to an undefined symbol");
339 void NonLazyPointerSectionBase::addEntry(Symbol
*sym
) {
340 if (entries
.insert(sym
)) {
341 assert(!sym
->isInGot());
342 sym
->gotIndex
= entries
.size() - 1;
344 addNonLazyBindingEntries(sym
, isec
, sym
->gotIndex
* target
->wordSize
);
348 void macho::writeChainedRebase(uint8_t *buf
, uint64_t targetVA
) {
349 assert(config
->emitChainedFixups
);
350 assert(target
->wordSize
== 8 && "Only 64-bit platforms are supported");
351 auto *rebase
= reinterpret_cast<dyld_chained_ptr_64_rebase
*>(buf
);
352 rebase
->target
= targetVA
& 0xf'ffff'ffff;
353 rebase
->high8
= (targetVA
>> 56);
354 rebase
->reserved
= 0;
358 // The fixup format places a 64 GiB limit on the output's size.
359 // Should we handle this gracefully?
360 uint64_t encodedVA
= rebase
->target
| ((uint64_t)rebase
->high8
<< 56);
361 if (encodedVA
!= targetVA
)
362 error("rebase target address 0x" + Twine::utohexstr(targetVA
) +
363 " does not fit into chained fixup. Re-link with -no_fixup_chains");
366 static void writeChainedBind(uint8_t *buf
, const Symbol
*sym
, int64_t addend
) {
367 assert(config
->emitChainedFixups
);
368 assert(target
->wordSize
== 8 && "Only 64-bit platforms are supported");
369 auto *bind
= reinterpret_cast<dyld_chained_ptr_64_bind
*>(buf
);
370 auto [ordinal
, inlineAddend
] = in
.chainedFixups
->getBinding(sym
, addend
);
371 bind
->ordinal
= ordinal
;
372 bind
->addend
= inlineAddend
;
378 void macho::writeChainedFixup(uint8_t *buf
, const Symbol
*sym
, int64_t addend
) {
379 if (needsBinding(sym
))
380 writeChainedBind(buf
, sym
, addend
);
382 writeChainedRebase(buf
, sym
->getVA() + addend
);
385 void NonLazyPointerSectionBase::writeTo(uint8_t *buf
) const {
386 if (config
->emitChainedFixups
) {
387 for (const auto &[i
, entry
] : llvm::enumerate(entries
))
388 writeChainedFixup(&buf
[i
* target
->wordSize
], entry
, 0);
390 for (const auto &[i
, entry
] : llvm::enumerate(entries
))
391 if (auto *defined
= dyn_cast
<Defined
>(entry
))
392 write64le(&buf
[i
* target
->wordSize
], defined
->getVA());
396 GotSection::GotSection()
397 : NonLazyPointerSectionBase(segment_names::data
, section_names::got
) {
398 flags
= S_NON_LAZY_SYMBOL_POINTERS
;
401 TlvPointerSection::TlvPointerSection()
402 : NonLazyPointerSectionBase(segment_names::data
,
403 section_names::threadPtrs
) {
404 flags
= S_THREAD_LOCAL_VARIABLE_POINTERS
;
407 BindingSection::BindingSection()
408 : LinkEditSection(segment_names::linkEdit
, section_names::binding
) {}
412 OutputSegment
*segment
= nullptr;
417 // Default value of 0xF0 is not valid opcode and should make the program
418 // scream instead of accidentally writing "valid" values.
419 uint8_t opcode
= 0xF0;
421 uint64_t consecutiveCount
= 0;
425 // Encode a sequence of opcodes that tell dyld to write the address of symbol +
426 // addend at osec->addr + outSecOff.
428 // The bind opcode "interpreter" remembers the values of each binding field, so
429 // we only need to encode the differences between bindings. Hence the use of
431 static void encodeBinding(const OutputSection
*osec
, uint64_t outSecOff
,
432 int64_t addend
, Binding
&lastBinding
,
433 std::vector
<BindIR
> &opcodes
) {
434 OutputSegment
*seg
= osec
->parent
;
435 uint64_t offset
= osec
->getSegmentOffset() + outSecOff
;
436 if (lastBinding
.segment
!= seg
) {
438 {static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
|
441 lastBinding
.segment
= seg
;
442 lastBinding
.offset
= offset
;
443 } else if (lastBinding
.offset
!= offset
) {
444 opcodes
.push_back({BIND_OPCODE_ADD_ADDR_ULEB
, offset
- lastBinding
.offset
});
445 lastBinding
.offset
= offset
;
448 if (lastBinding
.addend
!= addend
) {
450 {BIND_OPCODE_SET_ADDEND_SLEB
, static_cast<uint64_t>(addend
)});
451 lastBinding
.addend
= addend
;
454 opcodes
.push_back({BIND_OPCODE_DO_BIND
, 0});
455 // DO_BIND causes dyld to both perform the binding and increment the offset
456 lastBinding
.offset
+= target
->wordSize
;
459 static void optimizeOpcodes(std::vector
<BindIR
> &opcodes
) {
460 // Pass 1: Combine bind/add pairs
463 for (i
= 1; i
< opcodes
.size(); ++i
, ++pWrite
) {
464 if ((opcodes
[i
].opcode
== BIND_OPCODE_ADD_ADDR_ULEB
) &&
465 (opcodes
[i
- 1].opcode
== BIND_OPCODE_DO_BIND
)) {
466 opcodes
[pWrite
].opcode
= BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB
;
467 opcodes
[pWrite
].data
= opcodes
[i
].data
;
470 opcodes
[pWrite
] = opcodes
[i
- 1];
473 if (i
== opcodes
.size())
474 opcodes
[pWrite
] = opcodes
[i
- 1];
475 opcodes
.resize(pWrite
+ 1);
477 // Pass 2: Compress two or more bind_add opcodes
479 for (i
= 1; i
< opcodes
.size(); ++i
, ++pWrite
) {
480 if ((opcodes
[i
].opcode
== BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB
) &&
481 (opcodes
[i
- 1].opcode
== BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB
) &&
482 (opcodes
[i
].data
== opcodes
[i
- 1].data
)) {
483 opcodes
[pWrite
].opcode
= BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB
;
484 opcodes
[pWrite
].consecutiveCount
= 2;
485 opcodes
[pWrite
].data
= opcodes
[i
].data
;
487 while (i
< opcodes
.size() &&
488 (opcodes
[i
].opcode
== BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB
) &&
489 (opcodes
[i
].data
== opcodes
[i
- 1].data
)) {
490 opcodes
[pWrite
].consecutiveCount
++;
494 opcodes
[pWrite
] = opcodes
[i
- 1];
497 if (i
== opcodes
.size())
498 opcodes
[pWrite
] = opcodes
[i
- 1];
499 opcodes
.resize(pWrite
+ 1);
501 // Pass 3: Use immediate encodings
502 // Every binding is the size of one pointer. If the next binding is a
503 // multiple of wordSize away that is within BIND_IMMEDIATE_MASK, the
504 // opcode can be scaled by wordSize into a single byte and dyld will
505 // expand it to the correct address.
506 for (auto &p
: opcodes
) {
507 // It's unclear why the check needs to be less than BIND_IMMEDIATE_MASK,
508 // but ld64 currently does this. This could be a potential bug, but
509 // for now, perform the same behavior to prevent mysterious bugs.
510 if ((p
.opcode
== BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB
) &&
511 ((p
.data
/ target
->wordSize
) < BIND_IMMEDIATE_MASK
) &&
512 ((p
.data
% target
->wordSize
) == 0)) {
513 p
.opcode
= BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED
;
514 p
.data
/= target
->wordSize
;
519 static void flushOpcodes(const BindIR
&op
, raw_svector_ostream
&os
) {
520 uint8_t opcode
= op
.opcode
& BIND_OPCODE_MASK
;
522 case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
:
523 case BIND_OPCODE_ADD_ADDR_ULEB
:
524 case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB
:
526 encodeULEB128(op
.data
, os
);
528 case BIND_OPCODE_SET_ADDEND_SLEB
:
530 encodeSLEB128(static_cast<int64_t>(op
.data
), os
);
532 case BIND_OPCODE_DO_BIND
:
535 case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB
:
537 encodeULEB128(op
.consecutiveCount
, os
);
538 encodeULEB128(op
.data
, os
);
540 case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED
:
541 os
<< static_cast<uint8_t>(op
.opcode
| op
.data
);
544 llvm_unreachable("cannot bind to an unrecognized symbol");
548 static bool needsWeakBind(const Symbol
&sym
) {
549 if (auto *dysym
= dyn_cast
<DylibSymbol
>(&sym
))
550 return dysym
->isWeakDef();
551 if (auto *defined
= dyn_cast
<Defined
>(&sym
))
552 return defined
->isExternalWeakDef();
556 // Non-weak bindings need to have their dylib ordinal encoded as well.
557 static int16_t ordinalForDylibSymbol(const DylibSymbol
&dysym
) {
558 if (config
->namespaceKind
== NamespaceKind::flat
|| dysym
.isDynamicLookup())
559 return static_cast<int16_t>(BIND_SPECIAL_DYLIB_FLAT_LOOKUP
);
560 assert(dysym
.getFile()->isReferenced());
561 return dysym
.getFile()->ordinal
;
564 static int16_t ordinalForSymbol(const Symbol
&sym
) {
565 if (config
->emitChainedFixups
&& needsWeakBind(sym
))
566 return BIND_SPECIAL_DYLIB_WEAK_LOOKUP
;
567 if (const auto *dysym
= dyn_cast
<DylibSymbol
>(&sym
))
568 return ordinalForDylibSymbol(*dysym
);
569 assert(cast
<Defined
>(&sym
)->interposable
);
570 return BIND_SPECIAL_DYLIB_FLAT_LOOKUP
;
573 static void encodeDylibOrdinal(int16_t ordinal
, raw_svector_ostream
&os
) {
575 os
<< static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM
|
576 (ordinal
& BIND_IMMEDIATE_MASK
));
577 } else if (ordinal
<= BIND_IMMEDIATE_MASK
) {
578 os
<< static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM
| ordinal
);
580 os
<< static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB
);
581 encodeULEB128(ordinal
, os
);
585 static void encodeWeakOverride(const Defined
*defined
,
586 raw_svector_ostream
&os
) {
587 os
<< static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
|
588 BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION
)
589 << defined
->getName() << '\0';
592 // Organize the bindings so we can encoded them with fewer opcodes.
594 // First, all bindings for a given symbol should be grouped together.
595 // BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM is the largest opcode (since it
596 // has an associated symbol string), so we only want to emit it once per symbol.
598 // Within each group, we sort the bindings by address. Since bindings are
599 // delta-encoded, sorting them allows for a more compact result. Note that
600 // sorting by address alone ensures that bindings for the same segment / section
601 // are located together, minimizing the number of times we have to emit
602 // BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB.
604 // Finally, we sort the symbols by the address of their first binding, again
605 // to facilitate the delta-encoding process.
607 std::vector
<std::pair
<const Sym
*, std::vector
<BindingEntry
>>>
608 sortBindings(const BindingsMap
<const Sym
*> &bindingsMap
) {
609 std::vector
<std::pair
<const Sym
*, std::vector
<BindingEntry
>>> bindingsVec(
610 bindingsMap
.begin(), bindingsMap
.end());
611 for (auto &p
: bindingsVec
) {
612 std::vector
<BindingEntry
> &bindings
= p
.second
;
613 llvm::sort(bindings
, [](const BindingEntry
&a
, const BindingEntry
&b
) {
614 return a
.target
.getVA() < b
.target
.getVA();
617 llvm::sort(bindingsVec
, [](const auto &a
, const auto &b
) {
618 return a
.second
[0].target
.getVA() < b
.second
[0].target
.getVA();
623 // Emit bind opcodes, which are a stream of byte-sized opcodes that dyld
624 // interprets to update a record with the following fields:
625 // * segment index (of the segment to write the symbol addresses to, typically
626 // the __DATA_CONST segment which contains the GOT)
627 // * offset within the segment, indicating the next location to write a binding
629 // * symbol library ordinal (the index of its library's LC_LOAD_DYLIB command)
632 // When dyld sees BIND_OPCODE_DO_BIND, it uses the current record state to bind
633 // a symbol in the GOT, and increments the segment offset to point to the next
634 // entry. It does *not* clear the record state after doing the bind, so
635 // subsequent opcodes only need to encode the differences between bindings.
636 void BindingSection::finalizeContents() {
637 raw_svector_ostream os
{contents
};
639 int16_t lastOrdinal
= 0;
641 for (auto &p
: sortBindings(bindingsMap
)) {
642 const Symbol
*sym
= p
.first
;
643 std::vector
<BindingEntry
> &bindings
= p
.second
;
644 uint8_t flags
= BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
;
645 if (sym
->isWeakRef())
646 flags
|= BIND_SYMBOL_FLAGS_WEAK_IMPORT
;
647 os
<< flags
<< sym
->getName() << '\0'
648 << static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM
| BIND_TYPE_POINTER
);
649 int16_t ordinal
= ordinalForSymbol(*sym
);
650 if (ordinal
!= lastOrdinal
) {
651 encodeDylibOrdinal(ordinal
, os
);
652 lastOrdinal
= ordinal
;
654 std::vector
<BindIR
> opcodes
;
655 for (const BindingEntry
&b
: bindings
)
656 encodeBinding(b
.target
.isec
->parent
,
657 b
.target
.isec
->getOffset(b
.target
.offset
), b
.addend
,
658 lastBinding
, opcodes
);
659 if (config
->optimize
> 1)
660 optimizeOpcodes(opcodes
);
661 for (const auto &op
: opcodes
)
662 flushOpcodes(op
, os
);
664 if (!bindingsMap
.empty())
665 os
<< static_cast<uint8_t>(BIND_OPCODE_DONE
);
668 void BindingSection::writeTo(uint8_t *buf
) const {
669 memcpy(buf
, contents
.data(), contents
.size());
672 WeakBindingSection::WeakBindingSection()
673 : LinkEditSection(segment_names::linkEdit
, section_names::weakBinding
) {}
675 void WeakBindingSection::finalizeContents() {
676 raw_svector_ostream os
{contents
};
679 for (const Defined
*defined
: definitions
)
680 encodeWeakOverride(defined
, os
);
682 for (auto &p
: sortBindings(bindingsMap
)) {
683 const Symbol
*sym
= p
.first
;
684 std::vector
<BindingEntry
> &bindings
= p
.second
;
685 os
<< static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
)
686 << sym
->getName() << '\0'
687 << static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM
| BIND_TYPE_POINTER
);
688 std::vector
<BindIR
> opcodes
;
689 for (const BindingEntry
&b
: bindings
)
690 encodeBinding(b
.target
.isec
->parent
,
691 b
.target
.isec
->getOffset(b
.target
.offset
), b
.addend
,
692 lastBinding
, opcodes
);
693 if (config
->optimize
> 1)
694 optimizeOpcodes(opcodes
);
695 for (const auto &op
: opcodes
)
696 flushOpcodes(op
, os
);
698 if (!bindingsMap
.empty() || !definitions
.empty())
699 os
<< static_cast<uint8_t>(BIND_OPCODE_DONE
);
702 void WeakBindingSection::writeTo(uint8_t *buf
) const {
703 memcpy(buf
, contents
.data(), contents
.size());
706 StubsSection::StubsSection()
707 : SyntheticSection(segment_names::text
, section_names::stubs
) {
708 flags
= S_SYMBOL_STUBS
| S_ATTR_SOME_INSTRUCTIONS
| S_ATTR_PURE_INSTRUCTIONS
;
709 // The stubs section comprises machine instructions, which are aligned to
710 // 4 bytes on the archs we care about.
712 reserved2
= target
->stubSize
;
715 uint64_t StubsSection::getSize() const {
716 return entries
.size() * target
->stubSize
;
719 void StubsSection::writeTo(uint8_t *buf
) const {
721 for (const Symbol
*sym
: entries
) {
723 config
->emitChainedFixups
? sym
->getGotVA() : sym
->getLazyPtrVA();
724 target
->writeStub(buf
+ off
, *sym
, pointerVA
);
725 off
+= target
->stubSize
;
729 void StubsSection::finalize() { isFinal
= true; }
731 static void addBindingsForStub(Symbol
*sym
) {
732 assert(!config
->emitChainedFixups
);
733 if (auto *dysym
= dyn_cast
<DylibSymbol
>(sym
)) {
734 if (sym
->isWeakDef()) {
735 in
.binding
->addEntry(dysym
, in
.lazyPointers
->isec
,
736 sym
->stubsIndex
* target
->wordSize
);
737 in
.weakBinding
->addEntry(sym
, in
.lazyPointers
->isec
,
738 sym
->stubsIndex
* target
->wordSize
);
740 in
.lazyBinding
->addEntry(dysym
);
742 } else if (auto *defined
= dyn_cast
<Defined
>(sym
)) {
743 if (defined
->isExternalWeakDef()) {
744 in
.rebase
->addEntry(in
.lazyPointers
->isec
,
745 sym
->stubsIndex
* target
->wordSize
);
746 in
.weakBinding
->addEntry(sym
, in
.lazyPointers
->isec
,
747 sym
->stubsIndex
* target
->wordSize
);
748 } else if (defined
->interposable
) {
749 in
.lazyBinding
->addEntry(sym
);
751 llvm_unreachable("invalid stub target");
754 llvm_unreachable("invalid stub target symbol type");
758 void StubsSection::addEntry(Symbol
*sym
) {
759 bool inserted
= entries
.insert(sym
);
761 sym
->stubsIndex
= entries
.size() - 1;
763 if (config
->emitChainedFixups
)
764 in
.got
->addEntry(sym
);
766 addBindingsForStub(sym
);
770 StubHelperSection::StubHelperSection()
771 : SyntheticSection(segment_names::text
, section_names::stubHelper
) {
772 flags
= S_ATTR_SOME_INSTRUCTIONS
| S_ATTR_PURE_INSTRUCTIONS
;
773 align
= 4; // This section comprises machine instructions
776 uint64_t StubHelperSection::getSize() const {
777 return target
->stubHelperHeaderSize
+
778 in
.lazyBinding
->getEntries().size() * target
->stubHelperEntrySize
;
781 bool StubHelperSection::isNeeded() const { return in
.lazyBinding
->isNeeded(); }
783 void StubHelperSection::writeTo(uint8_t *buf
) const {
784 target
->writeStubHelperHeader(buf
);
785 size_t off
= target
->stubHelperHeaderSize
;
786 for (const Symbol
*sym
: in
.lazyBinding
->getEntries()) {
787 target
->writeStubHelperEntry(buf
+ off
, *sym
, addr
+ off
);
788 off
+= target
->stubHelperEntrySize
;
792 void StubHelperSection::setUp() {
793 Symbol
*binder
= symtab
->addUndefined("dyld_stub_binder", /*file=*/nullptr,
794 /*isWeakRef=*/false);
795 if (auto *undefined
= dyn_cast
<Undefined
>(binder
))
796 treatUndefinedSymbol(*undefined
,
797 "lazy binding (normally in libSystem.dylib)");
799 // treatUndefinedSymbol() can replace binder with a DylibSymbol; re-check.
800 stubBinder
= dyn_cast_or_null
<DylibSymbol
>(binder
);
801 if (stubBinder
== nullptr)
804 in
.got
->addEntry(stubBinder
);
806 in
.imageLoaderCache
->parent
=
807 ConcatOutputSection::getOrCreateForInput(in
.imageLoaderCache
);
808 addInputSection(in
.imageLoaderCache
);
809 // Since this isn't in the symbol table or in any input file, the noDeadStrip
810 // argument doesn't matter.
812 make
<Defined
>("__dyld_private", nullptr, in
.imageLoaderCache
, 0, 0,
814 /*isExternal=*/false, /*isPrivateExtern=*/false,
815 /*includeInSymtab=*/true,
816 /*isReferencedDynamically=*/false,
817 /*noDeadStrip=*/false);
818 dyldPrivate
->used
= true;
821 llvm::DenseMap
<llvm::CachedHashStringRef
, ConcatInputSection
*>
822 ObjCSelRefsHelper::methnameToSelref
;
823 void ObjCSelRefsHelper::initialize() {
824 // Do not fold selrefs without ICF.
825 if (config
->icfLevel
== ICFLevel::none
)
828 // Search methnames already referenced in __objc_selrefs
829 // Map the name to the corresponding selref entry
830 // which we will reuse when creating objc stubs.
831 for (ConcatInputSection
*isec
: inputSections
) {
832 if (isec
->shouldOmitFromOutput())
834 if (isec
->getName() != section_names::objcSelrefs
)
836 // We expect a single relocation per selref entry to __objc_methname that
837 // might be aggregated.
838 assert(isec
->relocs
.size() == 1);
839 auto Reloc
= isec
->relocs
[0];
840 if (const auto *sym
= Reloc
.referent
.dyn_cast
<Symbol
*>()) {
841 if (const auto *d
= dyn_cast
<Defined
>(sym
)) {
842 auto *cisec
= cast
<CStringInputSection
>(d
->isec());
843 auto methname
= cisec
->getStringRefAtOffset(d
->value
);
844 methnameToSelref
[CachedHashStringRef(methname
)] = isec
;
850 void ObjCSelRefsHelper::cleanup() { methnameToSelref
.clear(); }
852 ConcatInputSection
*ObjCSelRefsHelper::makeSelRef(StringRef methname
) {
853 auto methnameOffset
=
854 in
.objcMethnameSection
->getStringOffset(methname
).outSecOff
;
856 size_t wordSize
= target
->wordSize
;
857 uint8_t *selrefData
= bAlloc().Allocate
<uint8_t>(wordSize
);
858 write64le(selrefData
, methnameOffset
);
859 ConcatInputSection
*objcSelref
=
860 makeSyntheticInputSection(segment_names::data
, section_names::objcSelrefs
,
861 S_LITERAL_POINTERS
| S_ATTR_NO_DEAD_STRIP
,
862 ArrayRef
<uint8_t>{selrefData
, wordSize
},
864 assert(objcSelref
->live
);
865 objcSelref
->relocs
.push_back({/*type=*/target
->unsignedRelocType
,
866 /*pcrel=*/false, /*length=*/3,
868 /*addend=*/static_cast<int64_t>(methnameOffset
),
869 /*referent=*/in
.objcMethnameSection
->isec
});
870 objcSelref
->parent
= ConcatOutputSection::getOrCreateForInput(objcSelref
);
871 addInputSection(objcSelref
);
872 objcSelref
->isFinal
= true;
873 methnameToSelref
[CachedHashStringRef(methname
)] = objcSelref
;
877 ConcatInputSection
*ObjCSelRefsHelper::getSelRef(StringRef methname
) {
878 auto it
= methnameToSelref
.find(CachedHashStringRef(methname
));
879 if (it
== methnameToSelref
.end())
884 ObjCStubsSection::ObjCStubsSection()
885 : SyntheticSection(segment_names::text
, section_names::objcStubs
) {
886 flags
= S_ATTR_SOME_INSTRUCTIONS
| S_ATTR_PURE_INSTRUCTIONS
;
887 align
= config
->objcStubsMode
== ObjCStubsMode::fast
888 ? target
->objcStubsFastAlignment
889 : target
->objcStubsSmallAlignment
;
892 bool ObjCStubsSection::isObjCStubSymbol(Symbol
*sym
) {
893 return sym
->getName().starts_with(symbolPrefix
);
896 StringRef
ObjCStubsSection::getMethname(Symbol
*sym
) {
897 assert(isObjCStubSymbol(sym
) && "not an objc stub");
898 auto name
= sym
->getName();
899 StringRef methname
= name
.drop_front(symbolPrefix
.size());
903 void ObjCStubsSection::addEntry(Symbol
*sym
) {
904 StringRef methname
= getMethname(sym
);
905 // We create a selref entry for each unique methname.
906 if (!ObjCSelRefsHelper::getSelRef(methname
))
907 ObjCSelRefsHelper::makeSelRef(methname
);
909 auto stubSize
= config
->objcStubsMode
== ObjCStubsMode::fast
910 ? target
->objcStubsFastSize
911 : target
->objcStubsSmallSize
;
912 Defined
*newSym
= replaceSymbol
<Defined
>(
913 sym
, sym
->getName(), nullptr, isec
,
914 /*value=*/symbols
.size() * stubSize
,
916 /*isWeakDef=*/false, /*isExternal=*/true, /*isPrivateExtern=*/true,
917 /*includeInSymtab=*/true, /*isReferencedDynamically=*/false,
918 /*noDeadStrip=*/false);
919 symbols
.push_back(newSym
);
922 void ObjCStubsSection::setUp() {
923 objcMsgSend
= symtab
->addUndefined("_objc_msgSend", /*file=*/nullptr,
924 /*isWeakRef=*/false);
925 if (auto *undefined
= dyn_cast
<Undefined
>(objcMsgSend
))
926 treatUndefinedSymbol(*undefined
,
927 "lazy binding (normally in libobjc.dylib)");
928 objcMsgSend
->used
= true;
929 if (config
->objcStubsMode
== ObjCStubsMode::fast
) {
930 in
.got
->addEntry(objcMsgSend
);
931 assert(objcMsgSend
->isInGot());
933 assert(config
->objcStubsMode
== ObjCStubsMode::small
);
934 // In line with ld64's behavior, when objc_msgSend is a direct symbol,
935 // we directly reference it.
936 // In other cases, typically when binding in libobjc.dylib,
937 // we generate a stub to invoke objc_msgSend.
938 if (!isa
<Defined
>(objcMsgSend
))
939 in
.stubs
->addEntry(objcMsgSend
);
943 uint64_t ObjCStubsSection::getSize() const {
944 auto stubSize
= config
->objcStubsMode
== ObjCStubsMode::fast
945 ? target
->objcStubsFastSize
946 : target
->objcStubsSmallSize
;
947 return stubSize
* symbols
.size();
950 void ObjCStubsSection::writeTo(uint8_t *buf
) const {
951 uint64_t stubOffset
= 0;
952 for (size_t i
= 0, n
= symbols
.size(); i
< n
; ++i
) {
953 Defined
*sym
= symbols
[i
];
955 auto methname
= getMethname(sym
);
956 InputSection
*selRef
= ObjCSelRefsHelper::getSelRef(methname
);
957 assert(selRef
!= nullptr && "no selref for methname");
958 auto selrefAddr
= selRef
->getVA(0);
959 target
->writeObjCMsgSendStub(buf
+ stubOffset
, sym
, in
.objcStubs
->addr
,
960 stubOffset
, selrefAddr
, objcMsgSend
);
964 LazyPointerSection::LazyPointerSection()
965 : SyntheticSection(segment_names::data
, section_names::lazySymbolPtr
) {
966 align
= target
->wordSize
;
967 flags
= S_LAZY_SYMBOL_POINTERS
;
970 uint64_t LazyPointerSection::getSize() const {
971 return in
.stubs
->getEntries().size() * target
->wordSize
;
974 bool LazyPointerSection::isNeeded() const {
975 return !in
.stubs
->getEntries().empty();
978 void LazyPointerSection::writeTo(uint8_t *buf
) const {
980 for (const Symbol
*sym
: in
.stubs
->getEntries()) {
981 if (const auto *dysym
= dyn_cast
<DylibSymbol
>(sym
)) {
982 if (dysym
->hasStubsHelper()) {
983 uint64_t stubHelperOffset
=
984 target
->stubHelperHeaderSize
+
985 dysym
->stubsHelperIndex
* target
->stubHelperEntrySize
;
986 write64le(buf
+ off
, in
.stubHelper
->addr
+ stubHelperOffset
);
989 write64le(buf
+ off
, sym
->getVA());
991 off
+= target
->wordSize
;
995 LazyBindingSection::LazyBindingSection()
996 : LinkEditSection(segment_names::linkEdit
, section_names::lazyBinding
) {}
998 void LazyBindingSection::finalizeContents() {
999 // TODO: Just precompute output size here instead of writing to a temporary
1001 for (Symbol
*sym
: entries
)
1002 sym
->lazyBindOffset
= encode(*sym
);
1005 void LazyBindingSection::writeTo(uint8_t *buf
) const {
1006 memcpy(buf
, contents
.data(), contents
.size());
1009 void LazyBindingSection::addEntry(Symbol
*sym
) {
1010 assert(!config
->emitChainedFixups
&& "Chained fixups always bind eagerly");
1011 if (entries
.insert(sym
)) {
1012 sym
->stubsHelperIndex
= entries
.size() - 1;
1013 in
.rebase
->addEntry(in
.lazyPointers
->isec
,
1014 sym
->stubsIndex
* target
->wordSize
);
1018 // Unlike the non-lazy binding section, the bind opcodes in this section aren't
1019 // interpreted all at once. Rather, dyld will start interpreting opcodes at a
1020 // given offset, typically only binding a single symbol before it finds a
1021 // BIND_OPCODE_DONE terminator. As such, unlike in the non-lazy-binding case,
1022 // we cannot encode just the differences between symbols; we have to emit the
1023 // complete bind information for each symbol.
1024 uint32_t LazyBindingSection::encode(const Symbol
&sym
) {
1025 uint32_t opstreamOffset
= contents
.size();
1026 OutputSegment
*dataSeg
= in
.lazyPointers
->parent
;
1027 os
<< static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
|
1030 in
.lazyPointers
->addr
- dataSeg
->addr
+ sym
.stubsIndex
* target
->wordSize
;
1031 encodeULEB128(offset
, os
);
1032 encodeDylibOrdinal(ordinalForSymbol(sym
), os
);
1034 uint8_t flags
= BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
;
1035 if (sym
.isWeakRef())
1036 flags
|= BIND_SYMBOL_FLAGS_WEAK_IMPORT
;
1038 os
<< flags
<< sym
.getName() << '\0'
1039 << static_cast<uint8_t>(BIND_OPCODE_DO_BIND
)
1040 << static_cast<uint8_t>(BIND_OPCODE_DONE
);
1041 return opstreamOffset
;
1044 ExportSection::ExportSection()
1045 : LinkEditSection(segment_names::linkEdit
, section_names::export_
) {}
1047 void ExportSection::finalizeContents() {
1048 trieBuilder
.setImageBase(in
.header
->addr
);
1049 for (const Symbol
*sym
: symtab
->getSymbols()) {
1050 if (const auto *defined
= dyn_cast
<Defined
>(sym
)) {
1051 if (defined
->privateExtern
|| !defined
->isLive())
1053 trieBuilder
.addSymbol(*defined
);
1054 hasWeakSymbol
= hasWeakSymbol
|| sym
->isWeakDef();
1055 } else if (auto *dysym
= dyn_cast
<DylibSymbol
>(sym
)) {
1056 if (dysym
->shouldReexport
)
1057 trieBuilder
.addSymbol(*dysym
);
1060 size
= trieBuilder
.build();
1063 void ExportSection::writeTo(uint8_t *buf
) const { trieBuilder
.writeTo(buf
); }
1065 DataInCodeSection::DataInCodeSection()
1066 : LinkEditSection(segment_names::linkEdit
, section_names::dataInCode
) {}
1069 static std::vector
<MachO::data_in_code_entry
> collectDataInCodeEntries() {
1070 std::vector
<MachO::data_in_code_entry
> dataInCodeEntries
;
1071 for (const InputFile
*inputFile
: inputFiles
) {
1072 if (!isa
<ObjFile
>(inputFile
))
1074 const ObjFile
*objFile
= cast
<ObjFile
>(inputFile
);
1075 ArrayRef
<MachO::data_in_code_entry
> entries
= objFile
->getDataInCode();
1076 if (entries
.empty())
1079 std::vector
<MachO::data_in_code_entry
> sortedEntries
;
1080 sortedEntries
.assign(entries
.begin(), entries
.end());
1081 llvm::sort(sortedEntries
, [](const data_in_code_entry
&lhs
,
1082 const data_in_code_entry
&rhs
) {
1083 return lhs
.offset
< rhs
.offset
;
1086 // For each code subsection find 'data in code' entries residing in it.
1087 // Compute the new offset values as
1088 // <offset within subsection> + <subsection address> - <__TEXT address>.
1089 for (const Section
*section
: objFile
->sections
) {
1090 for (const Subsection
&subsec
: section
->subsections
) {
1091 const InputSection
*isec
= subsec
.isec
;
1092 if (!isCodeSection(isec
))
1094 if (cast
<ConcatInputSection
>(isec
)->shouldOmitFromOutput())
1096 const uint64_t beginAddr
= section
->addr
+ subsec
.offset
;
1097 auto it
= llvm::lower_bound(
1098 sortedEntries
, beginAddr
,
1099 [](const MachO::data_in_code_entry
&entry
, uint64_t addr
) {
1100 return entry
.offset
< addr
;
1102 const uint64_t endAddr
= beginAddr
+ isec
->getSize();
1103 for (const auto end
= sortedEntries
.end();
1104 it
!= end
&& it
->offset
+ it
->length
<= endAddr
; ++it
)
1105 dataInCodeEntries
.push_back(
1106 {static_cast<uint32_t>(isec
->getVA(it
->offset
- beginAddr
) -
1108 it
->length
, it
->kind
});
1113 // ld64 emits the table in sorted order too.
1114 llvm::sort(dataInCodeEntries
,
1115 [](const data_in_code_entry
&lhs
, const data_in_code_entry
&rhs
) {
1116 return lhs
.offset
< rhs
.offset
;
1118 return dataInCodeEntries
;
1121 void DataInCodeSection::finalizeContents() {
1122 entries
= target
->wordSize
== 8 ? collectDataInCodeEntries
<LP64
>()
1123 : collectDataInCodeEntries
<ILP32
>();
1126 void DataInCodeSection::writeTo(uint8_t *buf
) const {
1127 if (!entries
.empty())
1128 memcpy(buf
, entries
.data(), getRawSize());
1131 FunctionStartsSection::FunctionStartsSection()
1132 : LinkEditSection(segment_names::linkEdit
, section_names::functionStarts
) {}
1134 void FunctionStartsSection::finalizeContents() {
1135 raw_svector_ostream os
{contents
};
1136 std::vector
<uint64_t> addrs
;
1137 for (const InputFile
*file
: inputFiles
) {
1138 if (auto *objFile
= dyn_cast
<ObjFile
>(file
)) {
1139 for (const Symbol
*sym
: objFile
->symbols
) {
1140 if (const auto *defined
= dyn_cast_or_null
<Defined
>(sym
)) {
1141 if (!defined
->isec() || !isCodeSection(defined
->isec()) ||
1144 addrs
.push_back(defined
->getVA());
1150 uint64_t addr
= in
.header
->addr
;
1151 for (uint64_t nextAddr
: addrs
) {
1152 uint64_t delta
= nextAddr
- addr
;
1155 encodeULEB128(delta
, os
);
1161 void FunctionStartsSection::writeTo(uint8_t *buf
) const {
1162 memcpy(buf
, contents
.data(), contents
.size());
1165 SymtabSection::SymtabSection(StringTableSection
&stringTableSection
)
1166 : LinkEditSection(segment_names::linkEdit
, section_names::symbolTable
),
1167 stringTableSection(stringTableSection
) {}
1169 void SymtabSection::emitBeginSourceStab(StringRef sourceFile
) {
1170 StabsEntry
stab(N_SO
);
1171 stab
.strx
= stringTableSection
.addString(saver().save(sourceFile
));
1172 stabs
.emplace_back(std::move(stab
));
1175 void SymtabSection::emitEndSourceStab() {
1176 StabsEntry
stab(N_SO
);
1178 stabs
.emplace_back(std::move(stab
));
1181 void SymtabSection::emitObjectFileStab(ObjFile
*file
) {
1182 StabsEntry
stab(N_OSO
);
1183 stab
.sect
= target
->cpuSubtype
;
1184 SmallString
<261> path(!file
->archiveName
.empty() ? file
->archiveName
1186 std::error_code ec
= sys::fs::make_absolute(path
);
1188 fatal("failed to get absolute path for " + path
);
1190 if (!file
->archiveName
.empty())
1191 path
.append({"(", file
->getName(), ")"});
1193 StringRef adjustedPath
= saver().save(path
.str());
1194 adjustedPath
.consume_front(config
->osoPrefix
);
1196 stab
.strx
= stringTableSection
.addString(adjustedPath
);
1198 stab
.value
= file
->modTime
;
1199 stabs
.emplace_back(std::move(stab
));
1202 void SymtabSection::emitEndFunStab(Defined
*defined
) {
1203 StabsEntry
stab(N_FUN
);
1204 stab
.value
= defined
->size
;
1205 stabs
.emplace_back(std::move(stab
));
1208 // Given a pointer to a function symbol, return the symbol that points to the
1209 // actual function body that will go in the final binary. Generally this is the
1210 // symbol itself, but if the symbol was folded using a thunk, we retrieve the
1211 // target function body from the thunk.
1212 Defined
*SymtabSection::getFuncBodySym(Defined
*originalSym
) {
1213 if (originalSym
->identicalCodeFoldingKind
== Symbol::ICFFoldKind::None
||
1214 originalSym
->identicalCodeFoldingKind
== Symbol::ICFFoldKind::Body
)
1217 return macho::getBodyForThunkFoldedSym(originalSym
);
1220 void SymtabSection::emitStabs() {
1221 if (config
->omitDebugInfo
)
1224 for (const std::string
&s
: config
->astPaths
) {
1225 StabsEntry
astStab(N_AST
);
1226 astStab
.strx
= stringTableSection
.addString(s
);
1227 stabs
.emplace_back(std::move(astStab
));
1230 // Cache the file ID for each symbol in an std::pair for faster sorting.
1231 using SortingPair
= std::pair
<Defined
*, int>;
1232 std::vector
<SortingPair
> symbolsNeedingStabs
;
1233 for (const SymtabEntry
&entry
:
1234 concat
<SymtabEntry
>(localSymbols
, externalSymbols
)) {
1235 Symbol
*sym
= entry
.sym
;
1236 assert(sym
->isLive() &&
1237 "dead symbols should not be in localSymbols, externalSymbols");
1238 if (auto *defined
= dyn_cast
<Defined
>(sym
)) {
1239 // Excluded symbols should have been filtered out in finalizeContents().
1240 assert(defined
->includeInSymtab
);
1242 if (defined
->isAbsolute())
1245 // Constant-folded symbols go in the executable's symbol table, but don't
1246 // get a stabs entry unless --keep-icf-stabs flag is specified.
1247 if (!config
->keepICFStabs
&&
1248 defined
->identicalCodeFoldingKind
!= Symbol::ICFFoldKind::None
)
1251 ObjFile
*file
= defined
->getObjectFile();
1252 if (!file
|| !file
->compileUnit
)
1255 // We use 'originalIsec' to get the file id of the symbol since 'isec()'
1256 // might point to the merged ICF symbol's file
1257 symbolsNeedingStabs
.emplace_back(
1258 defined
, getFuncBodySym(defined
)->originalIsec
->getFile()->id
);
1262 llvm::stable_sort(symbolsNeedingStabs
,
1263 [&](const SortingPair
&a
, const SortingPair
&b
) {
1264 return a
.second
< b
.second
;
1267 // Emit STABS symbols so that dsymutil and/or the debugger can map address
1268 // regions in the final binary to the source and object files from which they
1270 InputFile
*lastFile
= nullptr;
1271 for (SortingPair
&pair
: symbolsNeedingStabs
) {
1272 Defined
*defined
= pair
.first
;
1273 // We use 'originalIsec' of the symbol since we care about the actual origin
1274 // of the symbol, not the canonical location returned by `isec()`.
1275 Defined
*funcBodySym
= getFuncBodySym(defined
);
1276 InputSection
*isec
= funcBodySym
->originalIsec
;
1277 ObjFile
*file
= cast
<ObjFile
>(isec
->getFile());
1279 if (lastFile
== nullptr || lastFile
!= file
) {
1280 if (lastFile
!= nullptr)
1281 emitEndSourceStab();
1284 emitBeginSourceStab(file
->sourceFile());
1285 emitObjectFileStab(file
);
1289 symStab
.sect
= isec
->parent
->index
;
1290 symStab
.strx
= stringTableSection
.addString(defined
->getName());
1291 symStab
.value
= funcBodySym
->getVA();
1293 if (isCodeSection(isec
)) {
1294 symStab
.type
= N_FUN
;
1295 stabs
.emplace_back(std::move(symStab
));
1296 emitEndFunStab(funcBodySym
);
1298 symStab
.type
= defined
->isExternal() ? N_GSYM
: N_STSYM
;
1299 stabs
.emplace_back(std::move(symStab
));
1304 emitEndSourceStab();
1307 void SymtabSection::finalizeContents() {
1308 auto addSymbol
= [&](std::vector
<SymtabEntry
> &symbols
, Symbol
*sym
) {
1309 uint32_t strx
= stringTableSection
.addString(sym
->getName());
1310 symbols
.push_back({sym
, strx
});
1313 std::function
<void(Symbol
*)> localSymbolsHandler
;
1314 switch (config
->localSymbolsPresence
) {
1315 case SymtabPresence::All
:
1316 localSymbolsHandler
= [&](Symbol
*sym
) { addSymbol(localSymbols
, sym
); };
1318 case SymtabPresence::None
:
1319 localSymbolsHandler
= [&](Symbol
*) { /* Do nothing*/ };
1321 case SymtabPresence::SelectivelyIncluded
:
1322 localSymbolsHandler
= [&](Symbol
*sym
) {
1323 if (config
->localSymbolPatterns
.match(sym
->getName()))
1324 addSymbol(localSymbols
, sym
);
1327 case SymtabPresence::SelectivelyExcluded
:
1328 localSymbolsHandler
= [&](Symbol
*sym
) {
1329 if (!config
->localSymbolPatterns
.match(sym
->getName()))
1330 addSymbol(localSymbols
, sym
);
1335 // Local symbols aren't in the SymbolTable, so we walk the list of object
1336 // files to gather them.
1337 // But if `-x` is set, then we don't need to. localSymbolsHandler() will do
1338 // the right thing regardless, but this check is a perf optimization because
1339 // iterating through all the input files and their symbols is expensive.
1340 if (config
->localSymbolsPresence
!= SymtabPresence::None
) {
1341 for (const InputFile
*file
: inputFiles
) {
1342 if (auto *objFile
= dyn_cast
<ObjFile
>(file
)) {
1343 for (Symbol
*sym
: objFile
->symbols
) {
1344 if (auto *defined
= dyn_cast_or_null
<Defined
>(sym
)) {
1345 if (defined
->isExternal() || !defined
->isLive() ||
1346 !defined
->includeInSymtab
)
1348 localSymbolsHandler(sym
);
1355 // __dyld_private is a local symbol too. It's linker-created and doesn't
1356 // exist in any object file.
1357 if (in
.stubHelper
&& in
.stubHelper
->dyldPrivate
)
1358 localSymbolsHandler(in
.stubHelper
->dyldPrivate
);
1360 for (Symbol
*sym
: symtab
->getSymbols()) {
1363 if (auto *defined
= dyn_cast
<Defined
>(sym
)) {
1364 if (!defined
->includeInSymtab
)
1366 assert(defined
->isExternal());
1367 if (defined
->privateExtern
)
1368 localSymbolsHandler(defined
);
1370 addSymbol(externalSymbols
, defined
);
1371 } else if (auto *dysym
= dyn_cast
<DylibSymbol
>(sym
)) {
1372 if (dysym
->isReferenced())
1373 addSymbol(undefinedSymbols
, sym
);
1378 uint32_t symtabIndex
= stabs
.size();
1379 for (const SymtabEntry
&entry
:
1380 concat
<SymtabEntry
>(localSymbols
, externalSymbols
, undefinedSymbols
)) {
1381 entry
.sym
->symtabIndex
= symtabIndex
++;
1385 uint32_t SymtabSection::getNumSymbols() const {
1386 return stabs
.size() + localSymbols
.size() + externalSymbols
.size() +
1387 undefinedSymbols
.size();
1390 // This serves to hide (type-erase) the template parameter from SymtabSection.
1391 template <class LP
> class SymtabSectionImpl final
: public SymtabSection
{
1393 SymtabSectionImpl(StringTableSection
&stringTableSection
)
1394 : SymtabSection(stringTableSection
) {}
1395 uint64_t getRawSize() const override
;
1396 void writeTo(uint8_t *buf
) const override
;
1399 template <class LP
> uint64_t SymtabSectionImpl
<LP
>::getRawSize() const {
1400 return getNumSymbols() * sizeof(typename
LP::nlist
);
1403 template <class LP
> void SymtabSectionImpl
<LP
>::writeTo(uint8_t *buf
) const {
1404 auto *nList
= reinterpret_cast<typename
LP::nlist
*>(buf
);
1405 // Emit the stabs entries before the "real" symbols. We cannot emit them
1406 // after as that would render Symbol::symtabIndex inaccurate.
1407 for (const StabsEntry
&entry
: stabs
) {
1408 nList
->n_strx
= entry
.strx
;
1409 nList
->n_type
= entry
.type
;
1410 nList
->n_sect
= entry
.sect
;
1411 nList
->n_desc
= entry
.desc
;
1412 nList
->n_value
= entry
.value
;
1416 for (const SymtabEntry
&entry
: concat
<const SymtabEntry
>(
1417 localSymbols
, externalSymbols
, undefinedSymbols
)) {
1418 nList
->n_strx
= entry
.strx
;
1419 // TODO populate n_desc with more flags
1420 if (auto *defined
= dyn_cast
<Defined
>(entry
.sym
)) {
1422 if (defined
->privateExtern
) {
1423 // Private external -- dylib scoped symbol.
1424 // Promote to non-external at link time.
1426 } else if (defined
->isExternal()) {
1427 // Normal global symbol.
1430 // TU-local symbol from localSymbols.
1434 if (defined
->isAbsolute()) {
1435 nList
->n_type
= scope
| N_ABS
;
1436 nList
->n_sect
= NO_SECT
;
1437 nList
->n_value
= defined
->value
;
1439 nList
->n_type
= scope
| N_SECT
;
1440 nList
->n_sect
= defined
->isec()->parent
->index
;
1441 // For the N_SECT symbol type, n_value is the address of the symbol
1442 nList
->n_value
= defined
->getVA();
1444 nList
->n_desc
|= defined
->isExternalWeakDef() ? N_WEAK_DEF
: 0;
1446 defined
->referencedDynamically
? REFERENCED_DYNAMICALLY
: 0;
1447 } else if (auto *dysym
= dyn_cast
<DylibSymbol
>(entry
.sym
)) {
1448 uint16_t n_desc
= nList
->n_desc
;
1449 int16_t ordinal
= ordinalForDylibSymbol(*dysym
);
1450 if (ordinal
== BIND_SPECIAL_DYLIB_FLAT_LOOKUP
)
1451 SET_LIBRARY_ORDINAL(n_desc
, DYNAMIC_LOOKUP_ORDINAL
);
1452 else if (ordinal
== BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE
)
1453 SET_LIBRARY_ORDINAL(n_desc
, EXECUTABLE_ORDINAL
);
1455 assert(ordinal
> 0);
1456 SET_LIBRARY_ORDINAL(n_desc
, static_cast<uint8_t>(ordinal
));
1459 nList
->n_type
= N_EXT
;
1460 n_desc
|= dysym
->isWeakDef() ? N_WEAK_DEF
: 0;
1461 n_desc
|= dysym
->isWeakRef() ? N_WEAK_REF
: 0;
1462 nList
->n_desc
= n_desc
;
1470 macho::makeSymtabSection(StringTableSection
&stringTableSection
) {
1471 return make
<SymtabSectionImpl
<LP
>>(stringTableSection
);
1474 IndirectSymtabSection::IndirectSymtabSection()
1475 : LinkEditSection(segment_names::linkEdit
,
1476 section_names::indirectSymbolTable
) {}
1478 uint32_t IndirectSymtabSection::getNumSymbols() const {
1479 uint32_t size
= in
.got
->getEntries().size() +
1480 in
.tlvPointers
->getEntries().size() +
1481 in
.stubs
->getEntries().size();
1482 if (!config
->emitChainedFixups
)
1483 size
+= in
.stubs
->getEntries().size();
1487 bool IndirectSymtabSection::isNeeded() const {
1488 return in
.got
->isNeeded() || in
.tlvPointers
->isNeeded() ||
1489 in
.stubs
->isNeeded();
1492 void IndirectSymtabSection::finalizeContents() {
1494 in
.got
->reserved1
= off
;
1495 off
+= in
.got
->getEntries().size();
1496 in
.tlvPointers
->reserved1
= off
;
1497 off
+= in
.tlvPointers
->getEntries().size();
1498 in
.stubs
->reserved1
= off
;
1499 if (in
.lazyPointers
) {
1500 off
+= in
.stubs
->getEntries().size();
1501 in
.lazyPointers
->reserved1
= off
;
1505 static uint32_t indirectValue(const Symbol
*sym
) {
1506 if (sym
->symtabIndex
== UINT32_MAX
|| !needsBinding(sym
))
1507 return INDIRECT_SYMBOL_LOCAL
;
1508 return sym
->symtabIndex
;
1511 void IndirectSymtabSection::writeTo(uint8_t *buf
) const {
1513 for (const Symbol
*sym
: in
.got
->getEntries()) {
1514 write32le(buf
+ off
* sizeof(uint32_t), indirectValue(sym
));
1517 for (const Symbol
*sym
: in
.tlvPointers
->getEntries()) {
1518 write32le(buf
+ off
* sizeof(uint32_t), indirectValue(sym
));
1521 for (const Symbol
*sym
: in
.stubs
->getEntries()) {
1522 write32le(buf
+ off
* sizeof(uint32_t), indirectValue(sym
));
1526 if (in
.lazyPointers
) {
1527 // There is a 1:1 correspondence between stubs and LazyPointerSection
1528 // entries. But giving __stubs and __la_symbol_ptr the same reserved1
1529 // (the offset into the indirect symbol table) so that they both refer
1530 // to the same range of offsets confuses `strip`, so write the stubs
1531 // symbol table offsets a second time.
1532 for (const Symbol
*sym
: in
.stubs
->getEntries()) {
1533 write32le(buf
+ off
* sizeof(uint32_t), indirectValue(sym
));
1539 StringTableSection::StringTableSection()
1540 : LinkEditSection(segment_names::linkEdit
, section_names::stringTable
) {}
1542 uint32_t StringTableSection::addString(StringRef str
) {
1543 uint32_t strx
= size
;
1544 strings
.push_back(str
); // TODO: consider deduplicating strings
1545 size
+= str
.size() + 1; // account for null terminator
1549 void StringTableSection::writeTo(uint8_t *buf
) const {
1551 for (StringRef str
: strings
) {
1552 memcpy(buf
+ off
, str
.data(), str
.size());
1553 off
+= str
.size() + 1; // account for null terminator
1557 static_assert((CodeSignatureSection::blobHeadersSize
% 8) == 0);
1558 static_assert((CodeSignatureSection::fixedHeadersSize
% 8) == 0);
1560 CodeSignatureSection::CodeSignatureSection()
1561 : LinkEditSection(segment_names::linkEdit
, section_names::codeSignature
) {
1562 align
= 16; // required by libstuff
1564 // XXX: This mimics LD64, where it uses the install-name as codesign
1565 // identifier, if available.
1566 if (!config
->installName
.empty())
1567 fileName
= config
->installName
;
1569 // FIXME: Consider using finalOutput instead of outputFile.
1570 fileName
= config
->outputFile
;
1572 size_t slashIndex
= fileName
.rfind("/");
1573 if (slashIndex
!= std::string::npos
)
1574 fileName
= fileName
.drop_front(slashIndex
+ 1);
1576 // NOTE: Any changes to these calculations should be repeated
1577 // in llvm-objcopy's MachOLayoutBuilder::layoutTail.
1578 allHeadersSize
= alignTo
<16>(fixedHeadersSize
+ fileName
.size() + 1);
1579 fileNamePad
= allHeadersSize
- fixedHeadersSize
- fileName
.size();
1582 uint32_t CodeSignatureSection::getBlockCount() const {
1583 return (fileOff
+ blockSize
- 1) / blockSize
;
1586 uint64_t CodeSignatureSection::getRawSize() const {
1587 return allHeadersSize
+ getBlockCount() * hashSize
;
1590 void CodeSignatureSection::writeHashes(uint8_t *buf
) const {
1591 // NOTE: Changes to this functionality should be repeated in llvm-objcopy's
1592 // MachOWriter::writeSignatureData.
1593 uint8_t *hashes
= buf
+ fileOff
+ allHeadersSize
;
1594 parallelFor(0, getBlockCount(), [&](size_t i
) {
1595 sha256(buf
+ i
* blockSize
,
1596 std::min(static_cast<size_t>(fileOff
- i
* blockSize
), blockSize
),
1597 hashes
+ i
* hashSize
);
1599 #if defined(__APPLE__)
1600 // This is macOS-specific work-around and makes no sense for any
1601 // other host OS. See https://openradar.appspot.com/FB8914231
1603 // The macOS kernel maintains a signature-verification cache to
1604 // quickly validate applications at time of execve(2). The trouble
1605 // is that for the kernel creates the cache entry at the time of the
1606 // mmap(2) call, before we have a chance to write either the code to
1607 // sign or the signature header+hashes. The fix is to invalidate
1608 // all cached data associated with the output file, thus discarding
1609 // the bogus prematurely-cached signature.
1610 msync(buf
, fileOff
+ getSize(), MS_INVALIDATE
);
1614 void CodeSignatureSection::writeTo(uint8_t *buf
) const {
1615 // NOTE: Changes to this functionality should be repeated in llvm-objcopy's
1616 // MachOWriter::writeSignatureData.
1617 uint32_t signatureSize
= static_cast<uint32_t>(getSize());
1618 auto *superBlob
= reinterpret_cast<CS_SuperBlob
*>(buf
);
1619 write32be(&superBlob
->magic
, CSMAGIC_EMBEDDED_SIGNATURE
);
1620 write32be(&superBlob
->length
, signatureSize
);
1621 write32be(&superBlob
->count
, 1);
1622 auto *blobIndex
= reinterpret_cast<CS_BlobIndex
*>(&superBlob
[1]);
1623 write32be(&blobIndex
->type
, CSSLOT_CODEDIRECTORY
);
1624 write32be(&blobIndex
->offset
, blobHeadersSize
);
1625 auto *codeDirectory
=
1626 reinterpret_cast<CS_CodeDirectory
*>(buf
+ blobHeadersSize
);
1627 write32be(&codeDirectory
->magic
, CSMAGIC_CODEDIRECTORY
);
1628 write32be(&codeDirectory
->length
, signatureSize
- blobHeadersSize
);
1629 write32be(&codeDirectory
->version
, CS_SUPPORTSEXECSEG
);
1630 write32be(&codeDirectory
->flags
, CS_ADHOC
| CS_LINKER_SIGNED
);
1631 write32be(&codeDirectory
->hashOffset
,
1632 sizeof(CS_CodeDirectory
) + fileName
.size() + fileNamePad
);
1633 write32be(&codeDirectory
->identOffset
, sizeof(CS_CodeDirectory
));
1634 codeDirectory
->nSpecialSlots
= 0;
1635 write32be(&codeDirectory
->nCodeSlots
, getBlockCount());
1636 write32be(&codeDirectory
->codeLimit
, fileOff
);
1637 codeDirectory
->hashSize
= static_cast<uint8_t>(hashSize
);
1638 codeDirectory
->hashType
= kSecCodeSignatureHashSHA256
;
1639 codeDirectory
->platform
= 0;
1640 codeDirectory
->pageSize
= blockSizeShift
;
1641 codeDirectory
->spare2
= 0;
1642 codeDirectory
->scatterOffset
= 0;
1643 codeDirectory
->teamOffset
= 0;
1644 codeDirectory
->spare3
= 0;
1645 codeDirectory
->codeLimit64
= 0;
1646 OutputSegment
*textSeg
= getOrCreateOutputSegment(segment_names::text
);
1647 write64be(&codeDirectory
->execSegBase
, textSeg
->fileOff
);
1648 write64be(&codeDirectory
->execSegLimit
, textSeg
->fileSize
);
1649 write64be(&codeDirectory
->execSegFlags
,
1650 config
->outputType
== MH_EXECUTE
? CS_EXECSEG_MAIN_BINARY
: 0);
1651 auto *id
= reinterpret_cast<char *>(&codeDirectory
[1]);
1652 memcpy(id
, fileName
.begin(), fileName
.size());
1653 memset(id
+ fileName
.size(), 0, fileNamePad
);
1656 CStringSection::CStringSection(const char *name
)
1657 : SyntheticSection(segment_names::text
, name
) {
1658 flags
= S_CSTRING_LITERALS
;
1661 void CStringSection::addInput(CStringInputSection
*isec
) {
1662 isec
->parent
= this;
1663 inputs
.push_back(isec
);
1664 if (isec
->align
> align
)
1665 align
= isec
->align
;
1668 void CStringSection::writeTo(uint8_t *buf
) const {
1669 for (const CStringInputSection
*isec
: inputs
) {
1670 for (const auto &[i
, piece
] : llvm::enumerate(isec
->pieces
)) {
1673 StringRef string
= isec
->getStringRef(i
);
1674 memcpy(buf
+ piece
.outSecOff
, string
.data(), string
.size());
1679 void CStringSection::finalizeContents() {
1680 uint64_t offset
= 0;
1681 for (CStringInputSection
*isec
: inputs
) {
1682 for (const auto &[i
, piece
] : llvm::enumerate(isec
->pieces
)) {
1685 // See comment above DeduplicatedCStringSection for how alignment is
1687 uint32_t pieceAlign
= 1
1688 << llvm::countr_zero(isec
->align
| piece
.inSecOff
);
1689 offset
= alignToPowerOf2(offset
, pieceAlign
);
1690 piece
.outSecOff
= offset
;
1691 isec
->isFinal
= true;
1692 StringRef string
= isec
->getStringRef(i
);
1693 offset
+= string
.size() + 1; // account for null terminator
1699 // Mergeable cstring literals are found under the __TEXT,__cstring section. In
1700 // contrast to ELF, which puts strings that need different alignments into
1701 // different sections, clang's Mach-O backend puts them all in one section.
1702 // Strings that need to be aligned have the .p2align directive emitted before
1703 // them, which simply translates into zero padding in the object file. In other
1704 // words, we have to infer the desired alignment of these cstrings from their
1707 // We differ slightly from ld64 in how we've chosen to align these cstrings.
1708 // Both LLD and ld64 preserve the number of trailing zeros in each cstring's
1709 // address in the input object files. When deduplicating identical cstrings,
1710 // both linkers pick the cstring whose address has more trailing zeros, and
1711 // preserve the alignment of that address in the final binary. However, ld64
1712 // goes a step further and also preserves the offset of the cstring from the
1713 // last section-aligned address. I.e. if a cstring is at offset 18 in the
1714 // input, with a section alignment of 16, then both LLD and ld64 will ensure the
1715 // final address is 2-byte aligned (since 18 == 16 + 2). But ld64 will also
1716 // ensure that the final address is of the form 16 * k + 2 for some k.
1718 // Note that ld64's heuristic means that a dedup'ed cstring's final address is
1719 // dependent on the order of the input object files. E.g. if in addition to the
1720 // cstring at offset 18 above, we have a duplicate one in another file with a
1721 // `.cstring` section alignment of 2 and an offset of zero, then ld64 will pick
1722 // the cstring from the object file earlier on the command line (since both have
1723 // the same number of trailing zeros in their address). So the final cstring may
1724 // either be at some address `16 * k + 2` or at some address `2 * k`.
1726 // I've opted not to follow this behavior primarily for implementation
1727 // simplicity, and secondarily to save a few more bytes. It's not clear to me
1728 // that preserving the section alignment + offset is ever necessary, and there
1729 // are many cases that are clearly redundant. In particular, if an x86_64 object
1730 // file contains some strings that are accessed via SIMD instructions, then the
1731 // .cstring section in the object file will be 16-byte-aligned (since SIMD
1732 // requires its operand addresses to be 16-byte aligned). However, there will
1733 // typically also be other cstrings in the same file that aren't used via SIMD
1734 // and don't need this alignment. They will be emitted at some arbitrary address
1735 // `A`, but ld64 will treat them as being 16-byte aligned with an offset of `16
1737 void DeduplicatedCStringSection::finalizeContents() {
1738 // Find the largest alignment required for each string.
1739 for (const CStringInputSection
*isec
: inputs
) {
1740 for (const auto &[i
, piece
] : llvm::enumerate(isec
->pieces
)) {
1743 auto s
= isec
->getCachedHashStringRef(i
);
1744 assert(isec
->align
!= 0);
1745 uint8_t trailingZeros
= llvm::countr_zero(isec
->align
| piece
.inSecOff
);
1746 auto it
= stringOffsetMap
.insert(
1747 std::make_pair(s
, StringOffset(trailingZeros
)));
1748 if (!it
.second
&& it
.first
->second
.trailingZeros
< trailingZeros
)
1749 it
.first
->second
.trailingZeros
= trailingZeros
;
1753 // Assign an offset for each string and save it to the corresponding
1754 // StringPieces for easy access.
1755 for (CStringInputSection
*isec
: inputs
) {
1756 for (const auto &[i
, piece
] : llvm::enumerate(isec
->pieces
)) {
1759 auto s
= isec
->getCachedHashStringRef(i
);
1760 auto it
= stringOffsetMap
.find(s
);
1761 assert(it
!= stringOffsetMap
.end());
1762 StringOffset
&offsetInfo
= it
->second
;
1763 if (offsetInfo
.outSecOff
== UINT64_MAX
) {
1764 offsetInfo
.outSecOff
=
1765 alignToPowerOf2(size
, 1ULL << offsetInfo
.trailingZeros
);
1767 offsetInfo
.outSecOff
+ s
.size() + 1; // account for null terminator
1769 piece
.outSecOff
= offsetInfo
.outSecOff
;
1771 isec
->isFinal
= true;
1775 void DeduplicatedCStringSection::writeTo(uint8_t *buf
) const {
1776 for (const auto &p
: stringOffsetMap
) {
1777 StringRef data
= p
.first
.val();
1778 uint64_t off
= p
.second
.outSecOff
;
1780 memcpy(buf
+ off
, data
.data(), data
.size());
1784 DeduplicatedCStringSection::StringOffset
1785 DeduplicatedCStringSection::getStringOffset(StringRef str
) const {
1786 // StringPiece uses 31 bits to store the hashes, so we replicate that
1787 uint32_t hash
= xxh3_64bits(str
) & 0x7fffffff;
1788 auto offset
= stringOffsetMap
.find(CachedHashStringRef(str
, hash
));
1789 assert(offset
!= stringOffsetMap
.end() &&
1790 "Looked-up strings should always exist in section");
1791 return offset
->second
;
1794 // This section is actually emitted as __TEXT,__const by ld64, but clang may
1795 // emit input sections of that name, and LLD doesn't currently support mixing
1796 // synthetic and concat-type OutputSections. To work around this, I've given
1797 // our merged-literals section a different name.
1798 WordLiteralSection::WordLiteralSection()
1799 : SyntheticSection(segment_names::text
, section_names::literals
) {
1803 void WordLiteralSection::addInput(WordLiteralInputSection
*isec
) {
1804 isec
->parent
= this;
1805 inputs
.push_back(isec
);
1808 void WordLiteralSection::finalizeContents() {
1809 for (WordLiteralInputSection
*isec
: inputs
) {
1810 // We do all processing of the InputSection here, so it will be effectively
1812 isec
->isFinal
= true;
1813 const uint8_t *buf
= isec
->data
.data();
1814 switch (sectionType(isec
->getFlags())) {
1815 case S_4BYTE_LITERALS
: {
1816 for (size_t off
= 0, e
= isec
->data
.size(); off
< e
; off
+= 4) {
1817 if (!isec
->isLive(off
))
1819 uint32_t value
= *reinterpret_cast<const uint32_t *>(buf
+ off
);
1820 literal4Map
.emplace(value
, literal4Map
.size());
1824 case S_8BYTE_LITERALS
: {
1825 for (size_t off
= 0, e
= isec
->data
.size(); off
< e
; off
+= 8) {
1826 if (!isec
->isLive(off
))
1828 uint64_t value
= *reinterpret_cast<const uint64_t *>(buf
+ off
);
1829 literal8Map
.emplace(value
, literal8Map
.size());
1833 case S_16BYTE_LITERALS
: {
1834 for (size_t off
= 0, e
= isec
->data
.size(); off
< e
; off
+= 16) {
1835 if (!isec
->isLive(off
))
1837 UInt128 value
= *reinterpret_cast<const UInt128
*>(buf
+ off
);
1838 literal16Map
.emplace(value
, literal16Map
.size());
1843 llvm_unreachable("invalid literal section type");
1848 void WordLiteralSection::writeTo(uint8_t *buf
) const {
1849 // Note that we don't attempt to do any endianness conversion in addInput(),
1850 // so we don't do it here either -- just write out the original value,
1852 for (const auto &p
: literal16Map
)
1853 memcpy(buf
+ p
.second
* 16, &p
.first
, 16);
1854 buf
+= literal16Map
.size() * 16;
1856 for (const auto &p
: literal8Map
)
1857 memcpy(buf
+ p
.second
* 8, &p
.first
, 8);
1858 buf
+= literal8Map
.size() * 8;
1860 for (const auto &p
: literal4Map
)
1861 memcpy(buf
+ p
.second
* 4, &p
.first
, 4);
1864 ObjCImageInfoSection::ObjCImageInfoSection()
1865 : SyntheticSection(segment_names::data
, section_names::objCImageInfo
) {}
1867 ObjCImageInfoSection::ImageInfo
1868 ObjCImageInfoSection::parseImageInfo(const InputFile
*file
) {
1870 ArrayRef
<uint8_t> data
= file
->objCImageInfo
;
1871 // The image info struct has the following layout:
1873 // uint32_t version;
1876 if (data
.size() < 8) {
1877 warn(toString(file
) + ": invalid __objc_imageinfo size");
1881 auto *buf
= reinterpret_cast<const uint32_t *>(data
.data());
1882 if (read32le(buf
) != 0) {
1883 warn(toString(file
) + ": invalid __objc_imageinfo version");
1887 uint32_t flags
= read32le(buf
+ 1);
1888 info
.swiftVersion
= (flags
>> 8) & 0xff;
1889 info
.hasCategoryClassProperties
= flags
& 0x40;
1893 static std::string
swiftVersionString(uint8_t version
) {
1906 return ("0x" + Twine::utohexstr(version
)).str();
1910 // Validate each object file's __objc_imageinfo and use them to generate the
1911 // image info for the output binary. Only two pieces of info are relevant:
1912 // 1. The Swift version (should be identical across inputs)
1913 // 2. `bool hasCategoryClassProperties` (true only if true for all inputs)
1914 void ObjCImageInfoSection::finalizeContents() {
1915 assert(files
.size() != 0); // should have already been checked via isNeeded()
1917 info
.hasCategoryClassProperties
= true;
1918 const InputFile
*firstFile
;
1919 for (const InputFile
*file
: files
) {
1920 ImageInfo inputInfo
= parseImageInfo(file
);
1921 info
.hasCategoryClassProperties
&= inputInfo
.hasCategoryClassProperties
;
1923 // swiftVersion 0 means no Swift is present, so no version checking required
1924 if (inputInfo
.swiftVersion
== 0)
1927 if (info
.swiftVersion
!= 0 && info
.swiftVersion
!= inputInfo
.swiftVersion
) {
1928 error("Swift version mismatch: " + toString(firstFile
) + " has version " +
1929 swiftVersionString(info
.swiftVersion
) + " but " + toString(file
) +
1930 " has version " + swiftVersionString(inputInfo
.swiftVersion
));
1932 info
.swiftVersion
= inputInfo
.swiftVersion
;
1938 void ObjCImageInfoSection::writeTo(uint8_t *buf
) const {
1939 uint32_t flags
= info
.hasCategoryClassProperties
? 0x40 : 0x0;
1940 flags
|= info
.swiftVersion
<< 8;
1941 write32le(buf
+ 4, flags
);
1944 InitOffsetsSection::InitOffsetsSection()
1945 : SyntheticSection(segment_names::text
, section_names::initOffsets
) {
1946 flags
= S_INIT_FUNC_OFFSETS
;
1947 align
= 4; // This section contains 32-bit integers.
1950 uint64_t InitOffsetsSection::getSize() const {
1952 for (const ConcatInputSection
*isec
: sections
)
1953 count
+= isec
->relocs
.size();
1954 return count
* sizeof(uint32_t);
1957 void InitOffsetsSection::writeTo(uint8_t *buf
) const {
1958 // FIXME: Add function specified by -init when that argument is implemented.
1959 for (ConcatInputSection
*isec
: sections
) {
1960 for (const Reloc
&rel
: isec
->relocs
) {
1961 const Symbol
*referent
= rel
.referent
.dyn_cast
<Symbol
*>();
1962 assert(referent
&& "section relocation should have been rejected");
1963 uint64_t offset
= referent
->getVA() - in
.header
->addr
;
1964 // FIXME: Can we handle this gracefully?
1965 if (offset
> UINT32_MAX
)
1966 fatal(isec
->getLocation(rel
.offset
) + ": offset to initializer " +
1967 referent
->getName() + " (" + utohexstr(offset
) +
1968 ") does not fit in 32 bits");
1970 // Entries need to be added in the order they appear in the section, but
1971 // relocations aren't guaranteed to be sorted.
1972 size_t index
= rel
.offset
>> target
->p2WordSize
;
1973 write32le(&buf
[index
* sizeof(uint32_t)], offset
);
1975 buf
+= isec
->relocs
.size() * sizeof(uint32_t);
1979 // The inputs are __mod_init_func sections, which contain pointers to
1980 // initializer functions, therefore all relocations should be of the UNSIGNED
1981 // type. InitOffsetsSection stores offsets, so if the initializer's address is
1982 // not known at link time, stub-indirection has to be used.
1983 void InitOffsetsSection::setUp() {
1984 for (const ConcatInputSection
*isec
: sections
) {
1985 for (const Reloc
&rel
: isec
->relocs
) {
1986 RelocAttrs attrs
= target
->getRelocAttrs(rel
.type
);
1987 if (!attrs
.hasAttr(RelocAttrBits::UNSIGNED
))
1988 error(isec
->getLocation(rel
.offset
) +
1989 ": unsupported relocation type: " + attrs
.name
);
1990 if (rel
.addend
!= 0)
1991 error(isec
->getLocation(rel
.offset
) +
1992 ": relocation addend is not representable in __init_offsets");
1993 if (rel
.referent
.is
<InputSection
*>())
1994 error(isec
->getLocation(rel
.offset
) +
1995 ": unexpected section relocation");
1997 Symbol
*sym
= rel
.referent
.dyn_cast
<Symbol
*>();
1998 if (auto *undefined
= dyn_cast
<Undefined
>(sym
))
1999 treatUndefinedSymbol(*undefined
, isec
, rel
.offset
);
2000 if (needsBinding(sym
))
2001 in
.stubs
->addEntry(sym
);
2006 ObjCMethListSection::ObjCMethListSection()
2007 : SyntheticSection(segment_names::text
, section_names::objcMethList
) {
2008 flags
= S_ATTR_NO_DEAD_STRIP
;
2009 align
= relativeOffsetSize
;
2012 // Go through all input method lists and ensure that we have selrefs for all
2013 // their method names. The selrefs will be needed later by ::writeTo. We need to
2014 // create them early on here to ensure they are processed correctly by the lld
2016 void ObjCMethListSection::setUp() {
2017 for (const ConcatInputSection
*isec
: inputs
) {
2018 uint32_t structSizeAndFlags
= 0, structCount
= 0;
2019 readMethodListHeader(isec
->data
.data(), structSizeAndFlags
, structCount
);
2020 uint32_t originalStructSize
= structSizeAndFlags
& structSizeMask
;
2021 // Method name is immediately after header
2022 uint32_t methodNameOff
= methodListHeaderSize
;
2024 // Loop through all methods, and ensure a selref for each of them exists.
2025 while (methodNameOff
< isec
->data
.size()) {
2026 const Reloc
*reloc
= isec
->getRelocAt(methodNameOff
);
2027 assert(reloc
&& "Relocation expected at method list name slot");
2029 StringRef methname
= reloc
->getReferentString();
2030 if (!ObjCSelRefsHelper::getSelRef(methname
))
2031 ObjCSelRefsHelper::makeSelRef(methname
);
2033 // Jump to method name offset in next struct
2034 methodNameOff
+= originalStructSize
;
2039 // Calculate section size and final offsets for where InputSection's need to be
2041 void ObjCMethListSection::finalize() {
2042 // sectionSize will be the total size of the __objc_methlist section
2044 for (ConcatInputSection
*isec
: inputs
) {
2045 // We can also use sectionSize as write offset for isec
2046 assert(sectionSize
== alignToPowerOf2(sectionSize
, relativeOffsetSize
) &&
2047 "expected __objc_methlist to be aligned by default with the "
2048 "required section alignment");
2049 isec
->outSecOff
= sectionSize
;
2051 isec
->isFinal
= true;
2052 uint32_t relativeListSize
=
2053 computeRelativeMethodListSize(isec
->data
.size());
2054 sectionSize
+= relativeListSize
;
2056 // If encoding the method list in relative offset format shrinks the size,
2057 // then we also need to adjust symbol sizes to match the new size. Note that
2058 // on 32bit platforms the size of the method list will remain the same when
2059 // encoded in relative offset format.
2060 if (relativeListSize
!= isec
->data
.size()) {
2061 for (Symbol
*sym
: isec
->symbols
) {
2062 assert(isa
<Defined
>(sym
) &&
2063 "Unexpected undefined symbol in ObjC method list");
2064 auto *def
= cast
<Defined
>(sym
);
2065 // There can be 0-size symbols, check if this is the case and ignore
2069 def
->size
== isec
->data
.size() &&
2070 "Invalid ObjC method list symbol size: expected symbol size to "
2072 def
->size
= relativeListSize
;
2079 void ObjCMethListSection::writeTo(uint8_t *bufStart
) const {
2080 uint8_t *buf
= bufStart
;
2081 for (const ConcatInputSection
*isec
: inputs
) {
2082 assert(buf
- bufStart
== long(isec
->outSecOff
) &&
2083 "Writing at unexpected offset");
2084 uint32_t writtenSize
= writeRelativeMethodList(isec
, buf
);
2087 assert(buf
- bufStart
== sectionSize
&&
2088 "Written size does not match expected section size");
2091 // Check if an InputSection is a method list. To do this we scan the
2092 // InputSection for any symbols who's names match the patterns we expect clang
2093 // to generate for method lists.
2094 bool ObjCMethListSection::isMethodList(const ConcatInputSection
*isec
) {
2095 const char *symPrefixes
[] = {objc::symbol_names::classMethods
,
2096 objc::symbol_names::instanceMethods
,
2097 objc::symbol_names::categoryInstanceMethods
,
2098 objc::symbol_names::categoryClassMethods
};
2101 for (const Symbol
*sym
: isec
->symbols
) {
2102 auto *def
= dyn_cast_or_null
<Defined
>(sym
);
2105 for (const char *prefix
: symPrefixes
) {
2106 if (def
->getName().starts_with(prefix
)) {
2107 assert(def
->size
== isec
->data
.size() &&
2108 "Invalid ObjC method list symbol size: expected symbol size to "
2110 assert(def
->value
== 0 &&
2111 "Offset of ObjC method list symbol must be 0");
2120 // Encode a single relative offset value. The input is the data/symbol at
2121 // (&isec->data[inSecOff]). The output is written to (&buf[outSecOff]).
2122 // 'createSelRef' indicates that we should not directly use the specified
2123 // symbol, but instead get the selRef for the symbol and use that instead.
2124 void ObjCMethListSection::writeRelativeOffsetForIsec(
2125 const ConcatInputSection
*isec
, uint8_t *buf
, uint32_t &inSecOff
,
2126 uint32_t &outSecOff
, bool useSelRef
) const {
2127 const Reloc
*reloc
= isec
->getRelocAt(inSecOff
);
2128 assert(reloc
&& "Relocation expected at __objc_methlist Offset");
2132 StringRef methname
= reloc
->getReferentString();
2133 ConcatInputSection
*selRef
= ObjCSelRefsHelper::getSelRef(methname
);
2134 assert(selRef
&& "Expected all selector names to already be already be "
2135 "present in __objc_selrefs");
2136 symVA
= selRef
->getVA();
2137 assert(selRef
->data
.size() == target
->wordSize
&&
2138 "Expected one selref per ConcatInputSection");
2139 } else if (reloc
->referent
.is
<Symbol
*>()) {
2140 auto *def
= dyn_cast_or_null
<Defined
>(reloc
->referent
.get
<Symbol
*>());
2141 assert(def
&& "Expected all syms in __objc_methlist to be defined");
2142 symVA
= def
->getVA();
2144 auto *isec
= reloc
->referent
.get
<InputSection
*>();
2145 symVA
= isec
->getVA(reloc
->addend
);
2148 uint32_t currentVA
= isec
->getVA() + outSecOff
;
2149 uint32_t delta
= symVA
- currentVA
;
2150 write32le(buf
+ outSecOff
, delta
);
2152 // Move one pointer forward in the absolute method list
2153 inSecOff
+= target
->wordSize
;
2154 // Move one relative offset forward in the relative method list (32 bits)
2155 outSecOff
+= relativeOffsetSize
;
2158 // Write a relative method list to buf, return the size of the written
2161 ObjCMethListSection::writeRelativeMethodList(const ConcatInputSection
*isec
,
2162 uint8_t *buf
) const {
2163 // Copy over the header, and add the "this is a relative method list" magic
2165 uint32_t structSizeAndFlags
= 0, structCount
= 0;
2166 readMethodListHeader(isec
->data
.data(), structSizeAndFlags
, structCount
);
2167 // Set the struct size for the relative method list
2168 uint32_t relativeStructSizeAndFlags
=
2169 (relativeOffsetSize
* pointersPerStruct
) & structSizeMask
;
2170 // Carry over the old flags from the input struct
2171 relativeStructSizeAndFlags
|= structSizeAndFlags
& structFlagsMask
;
2172 // Set the relative method list flag
2173 relativeStructSizeAndFlags
|= relMethodHeaderFlag
;
2175 writeMethodListHeader(buf
, relativeStructSizeAndFlags
, structCount
);
2177 assert(methodListHeaderSize
+
2178 (structCount
* pointersPerStruct
* target
->wordSize
) ==
2179 isec
->data
.size() &&
2180 "Invalid computed ObjC method list size");
2182 uint32_t inSecOff
= methodListHeaderSize
;
2183 uint32_t outSecOff
= methodListHeaderSize
;
2185 // Go through the method list and encode input absolute pointers as relative
2186 // offsets. writeRelativeOffsetForIsec will be incrementing inSecOff and
2188 for (uint32_t i
= 0; i
< structCount
; i
++) {
2189 // Write the name of the method
2190 writeRelativeOffsetForIsec(isec
, buf
, inSecOff
, outSecOff
, true);
2191 // Write the type of the method
2192 writeRelativeOffsetForIsec(isec
, buf
, inSecOff
, outSecOff
, false);
2193 // Write reference to the selector of the method
2194 writeRelativeOffsetForIsec(isec
, buf
, inSecOff
, outSecOff
, false);
2197 // Expecting to have read all the data in the isec
2198 assert(inSecOff
== isec
->data
.size() &&
2199 "Invalid actual ObjC method list size");
2201 outSecOff
== computeRelativeMethodListSize(inSecOff
) &&
2202 "Mismatch between input & output size when writing relative method list");
2206 // Given the size of an ObjC method list InputSection, return the size of the
2207 // method list when encoded in relative offsets format. We can do this without
2208 // decoding the actual data, as it can be directly inferred from the size of the
2210 uint32_t ObjCMethListSection::computeRelativeMethodListSize(
2211 uint32_t absoluteMethodListSize
) const {
2212 uint32_t oldPointersSize
= absoluteMethodListSize
- methodListHeaderSize
;
2213 uint32_t pointerCount
= oldPointersSize
/ target
->wordSize
;
2214 assert(((pointerCount
% pointersPerStruct
) == 0) &&
2215 "__objc_methlist expects method lists to have multiple-of-3 pointers");
2217 uint32_t newPointersSize
= pointerCount
* relativeOffsetSize
;
2218 uint32_t newTotalSize
= methodListHeaderSize
+ newPointersSize
;
2220 assert((newTotalSize
<= absoluteMethodListSize
) &&
2221 "Expected relative method list size to be smaller or equal than "
2223 return newTotalSize
;
2226 // Read a method list header from buf
2227 void ObjCMethListSection::readMethodListHeader(const uint8_t *buf
,
2228 uint32_t &structSizeAndFlags
,
2229 uint32_t &structCount
) const {
2230 structSizeAndFlags
= read32le(buf
);
2231 structCount
= read32le(buf
+ sizeof(uint32_t));
2234 // Write a method list header to buf
2235 void ObjCMethListSection::writeMethodListHeader(uint8_t *buf
,
2236 uint32_t structSizeAndFlags
,
2237 uint32_t structCount
) const {
2238 write32le(buf
, structSizeAndFlags
);
2239 write32le(buf
+ sizeof(structSizeAndFlags
), structCount
);
2242 void macho::createSyntheticSymbols() {
2243 auto addHeaderSymbol
= [](const char *name
) {
2244 symtab
->addSynthetic(name
, in
.header
->isec
, /*value=*/0,
2245 /*isPrivateExtern=*/true, /*includeInSymtab=*/false,
2246 /*referencedDynamically=*/false);
2249 switch (config
->outputType
) {
2250 // FIXME: Assign the right address value for these symbols
2251 // (rather than 0). But we need to do that after assignAddresses().
2253 // If linking PIE, __mh_execute_header is a defined symbol in
2255 // Otherwise, it's an absolute symbol.
2257 symtab
->addSynthetic("__mh_execute_header", in
.header
->isec
, /*value=*/0,
2258 /*isPrivateExtern=*/false, /*includeInSymtab=*/true,
2259 /*referencedDynamically=*/true);
2261 symtab
->addSynthetic("__mh_execute_header", /*isec=*/nullptr, /*value=*/0,
2262 /*isPrivateExtern=*/false, /*includeInSymtab=*/true,
2263 /*referencedDynamically=*/true);
2266 // The following symbols are N_SECT symbols, even though the header is not
2267 // part of any section and that they are private to the bundle/dylib/object
2268 // they are part of.
2270 addHeaderSymbol("__mh_bundle_header");
2273 addHeaderSymbol("__mh_dylib_header");
2276 addHeaderSymbol("__mh_dylinker_header");
2279 addHeaderSymbol("__mh_object_header");
2282 llvm_unreachable("unexpected outputType");
2286 // The Itanium C++ ABI requires dylibs to pass a pointer to __cxa_atexit
2287 // which does e.g. cleanup of static global variables. The ABI document
2288 // says that the pointer can point to any address in one of the dylib's
2289 // segments, but in practice ld64 seems to set it to point to the header,
2290 // so that's what's implemented here.
2291 addHeaderSymbol("___dso_handle");
2294 ChainedFixupsSection::ChainedFixupsSection()
2295 : LinkEditSection(segment_names::linkEdit
, section_names::chainFixups
) {}
2297 bool ChainedFixupsSection::isNeeded() const {
2298 assert(config
->emitChainedFixups
);
2299 // dyld always expects LC_DYLD_CHAINED_FIXUPS to point to a valid
2300 // dyld_chained_fixups_header, so we create this section even if there aren't
2305 void ChainedFixupsSection::addBinding(const Symbol
*sym
,
2306 const InputSection
*isec
, uint64_t offset
,
2308 locations
.emplace_back(isec
, offset
);
2309 int64_t outlineAddend
= (addend
< 0 || addend
> 0xFF) ? addend
: 0;
2310 auto [it
, inserted
] = bindings
.insert(
2311 {{sym
, outlineAddend
}, static_cast<uint32_t>(bindings
.size())});
2314 symtabSize
+= sym
->getName().size() + 1;
2315 hasWeakBind
= hasWeakBind
|| needsWeakBind(*sym
);
2316 if (!isInt
<23>(outlineAddend
))
2317 needsLargeAddend
= true;
2318 else if (outlineAddend
!= 0)
2323 std::pair
<uint32_t, uint8_t>
2324 ChainedFixupsSection::getBinding(const Symbol
*sym
, int64_t addend
) const {
2325 int64_t outlineAddend
= (addend
< 0 || addend
> 0xFF) ? addend
: 0;
2326 auto it
= bindings
.find({sym
, outlineAddend
});
2327 assert(it
!= bindings
.end() && "binding not found in the imports table");
2328 if (outlineAddend
== 0)
2329 return {it
->second
, addend
};
2330 return {it
->second
, 0};
2333 static size_t writeImport(uint8_t *buf
, int format
, int16_t libOrdinal
,
2334 bool weakRef
, uint32_t nameOffset
, int64_t addend
) {
2336 case DYLD_CHAINED_IMPORT
: {
2337 auto *import
= reinterpret_cast<dyld_chained_import
*>(buf
);
2338 import
->lib_ordinal
= libOrdinal
;
2339 import
->weak_import
= weakRef
;
2340 import
->name_offset
= nameOffset
;
2341 return sizeof(dyld_chained_import
);
2343 case DYLD_CHAINED_IMPORT_ADDEND
: {
2344 auto *import
= reinterpret_cast<dyld_chained_import_addend
*>(buf
);
2345 import
->lib_ordinal
= libOrdinal
;
2346 import
->weak_import
= weakRef
;
2347 import
->name_offset
= nameOffset
;
2348 import
->addend
= addend
;
2349 return sizeof(dyld_chained_import_addend
);
2351 case DYLD_CHAINED_IMPORT_ADDEND64
: {
2352 auto *import
= reinterpret_cast<dyld_chained_import_addend64
*>(buf
);
2353 import
->lib_ordinal
= libOrdinal
;
2354 import
->weak_import
= weakRef
;
2355 import
->name_offset
= nameOffset
;
2356 import
->addend
= addend
;
2357 return sizeof(dyld_chained_import_addend64
);
2360 llvm_unreachable("Unknown import format");
2364 size_t ChainedFixupsSection::SegmentInfo::getSize() const {
2365 assert(pageStarts
.size() > 0 && "SegmentInfo for segment with no fixups?");
2366 return alignTo
<8>(sizeof(dyld_chained_starts_in_segment
) +
2367 pageStarts
.back().first
* sizeof(uint16_t));
2370 size_t ChainedFixupsSection::SegmentInfo::writeTo(uint8_t *buf
) const {
2371 auto *segInfo
= reinterpret_cast<dyld_chained_starts_in_segment
*>(buf
);
2372 segInfo
->size
= getSize();
2373 segInfo
->page_size
= target
->getPageSize();
2374 // FIXME: Use DYLD_CHAINED_PTR_64_OFFSET on newer OS versions.
2375 segInfo
->pointer_format
= DYLD_CHAINED_PTR_64
;
2376 segInfo
->segment_offset
= oseg
->addr
- in
.header
->addr
;
2377 segInfo
->max_valid_pointer
= 0; // not used on 64-bit
2378 segInfo
->page_count
= pageStarts
.back().first
+ 1;
2380 uint16_t *starts
= segInfo
->page_start
;
2381 for (size_t i
= 0; i
< segInfo
->page_count
; ++i
)
2382 starts
[i
] = DYLD_CHAINED_PTR_START_NONE
;
2384 for (auto [pageIdx
, startAddr
] : pageStarts
)
2385 starts
[pageIdx
] = startAddr
;
2386 return segInfo
->size
;
2389 static size_t importEntrySize(int format
) {
2391 case DYLD_CHAINED_IMPORT
:
2392 return sizeof(dyld_chained_import
);
2393 case DYLD_CHAINED_IMPORT_ADDEND
:
2394 return sizeof(dyld_chained_import_addend
);
2395 case DYLD_CHAINED_IMPORT_ADDEND64
:
2396 return sizeof(dyld_chained_import_addend64
);
2398 llvm_unreachable("Unknown import format");
2402 // This is step 3 of the algorithm described in the class comment of
2403 // ChainedFixupsSection.
2405 // LC_DYLD_CHAINED_FIXUPS data consists of (in this order):
2406 // * A dyld_chained_fixups_header
2407 // * A dyld_chained_starts_in_image
2408 // * One dyld_chained_starts_in_segment per segment
2409 // * List of all imports (dyld_chained_import, dyld_chained_import_addend, or
2410 // dyld_chained_import_addend64)
2411 // * Names of imported symbols
2412 void ChainedFixupsSection::writeTo(uint8_t *buf
) const {
2413 auto *header
= reinterpret_cast<dyld_chained_fixups_header
*>(buf
);
2414 header
->fixups_version
= 0;
2415 header
->imports_count
= bindings
.size();
2416 header
->imports_format
= importFormat
;
2417 header
->symbols_format
= 0;
2419 buf
+= alignTo
<8>(sizeof(*header
));
2421 auto curOffset
= [&buf
, &header
]() -> uint32_t {
2422 return buf
- reinterpret_cast<uint8_t *>(header
);
2425 header
->starts_offset
= curOffset();
2427 auto *imageInfo
= reinterpret_cast<dyld_chained_starts_in_image
*>(buf
);
2428 imageInfo
->seg_count
= outputSegments
.size();
2429 uint32_t *segStarts
= imageInfo
->seg_info_offset
;
2431 // dyld_chained_starts_in_image ends in a flexible array member containing an
2432 // uint32_t for each segment. Leave room for it, and fill it via segStarts.
2433 buf
+= alignTo
<8>(offsetof(dyld_chained_starts_in_image
, seg_info_offset
) +
2434 outputSegments
.size() * sizeof(uint32_t));
2436 // Initialize all offsets to 0, which indicates that the segment does not have
2437 // fixups. Those that do have them will be filled in below.
2438 for (size_t i
= 0; i
< outputSegments
.size(); ++i
)
2441 for (const SegmentInfo
&seg
: fixupSegments
) {
2442 segStarts
[seg
.oseg
->index
] = curOffset() - header
->starts_offset
;
2443 buf
+= seg
.writeTo(buf
);
2446 // Write imports table.
2447 header
->imports_offset
= curOffset();
2448 uint64_t nameOffset
= 0;
2449 for (auto [import
, idx
] : bindings
) {
2450 const Symbol
&sym
= *import
.first
;
2451 buf
+= writeImport(buf
, importFormat
, ordinalForSymbol(sym
),
2452 sym
.isWeakRef(), nameOffset
, import
.second
);
2453 nameOffset
+= sym
.getName().size() + 1;
2456 // Write imported symbol names.
2457 header
->symbols_offset
= curOffset();
2458 for (auto [import
, idx
] : bindings
) {
2459 StringRef name
= import
.first
->getName();
2460 memcpy(buf
, name
.data(), name
.size());
2461 buf
+= name
.size() + 1; // account for null terminator
2464 assert(curOffset() == getRawSize());
2467 // This is step 2 of the algorithm described in the class comment of
2468 // ChainedFixupsSection.
2469 void ChainedFixupsSection::finalizeContents() {
2470 assert(target
->wordSize
== 8 && "Only 64-bit platforms are supported");
2471 assert(config
->emitChainedFixups
);
2473 if (!isUInt
<32>(symtabSize
))
2474 error("cannot encode chained fixups: imported symbols table size " +
2475 Twine(symtabSize
) + " exceeds 4 GiB");
2477 bool needsLargeOrdinal
= any_of(bindings
, [](const auto &p
) {
2478 // 0xF1 - 0xFF are reserved for special ordinals in the 8-bit encoding.
2479 return ordinalForSymbol(*p
.first
.first
) > 0xF0;
2482 if (needsLargeAddend
|| !isUInt
<23>(symtabSize
) || needsLargeOrdinal
)
2483 importFormat
= DYLD_CHAINED_IMPORT_ADDEND64
;
2484 else if (needsAddend
)
2485 importFormat
= DYLD_CHAINED_IMPORT_ADDEND
;
2487 importFormat
= DYLD_CHAINED_IMPORT
;
2489 for (Location
&loc
: locations
)
2491 loc
.isec
->parent
->getSegmentOffset() + loc
.isec
->getOffset(loc
.offset
);
2493 llvm::sort(locations
, [](const Location
&a
, const Location
&b
) {
2494 const OutputSegment
*segA
= a
.isec
->parent
->parent
;
2495 const OutputSegment
*segB
= b
.isec
->parent
->parent
;
2497 return a
.offset
< b
.offset
;
2498 return segA
->addr
< segB
->addr
;
2501 auto sameSegment
= [](const Location
&a
, const Location
&b
) {
2502 return a
.isec
->parent
->parent
== b
.isec
->parent
->parent
;
2505 const uint64_t pageSize
= target
->getPageSize();
2506 for (size_t i
= 0, count
= locations
.size(); i
< count
;) {
2507 const Location
&firstLoc
= locations
[i
];
2508 fixupSegments
.emplace_back(firstLoc
.isec
->parent
->parent
);
2509 while (i
< count
&& sameSegment(locations
[i
], firstLoc
)) {
2510 uint32_t pageIdx
= locations
[i
].offset
/ pageSize
;
2511 fixupSegments
.back().pageStarts
.emplace_back(
2512 pageIdx
, locations
[i
].offset
% pageSize
);
2514 while (i
< count
&& sameSegment(locations
[i
], firstLoc
) &&
2515 locations
[i
].offset
/ pageSize
== pageIdx
)
2520 // Compute expected encoded size.
2521 size
= alignTo
<8>(sizeof(dyld_chained_fixups_header
));
2522 size
+= alignTo
<8>(offsetof(dyld_chained_starts_in_image
, seg_info_offset
) +
2523 outputSegments
.size() * sizeof(uint32_t));
2524 for (const SegmentInfo
&seg
: fixupSegments
)
2525 size
+= seg
.getSize();
2526 size
+= importEntrySize(importFormat
) * bindings
.size();
2530 template SymtabSection
*macho::makeSymtabSection
<LP64
>(StringTableSection
&);
2531 template SymtabSection
*macho::makeSymtabSection
<ILP32
>(StringTableSection
&);