1 //===- SyntheticSections.cpp ---------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "SyntheticSections.h"
10 #include "ConcatOutputSection.h"
12 #include "ExportTrie.h"
13 #include "InputFiles.h"
14 #include "MachOStructs.h"
15 #include "OutputSegment.h"
16 #include "SymbolTable.h"
19 #include "lld/Common/CommonLinkerContext.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/Config/llvm-config.h"
22 #include "llvm/Support/EndianStream.h"
23 #include "llvm/Support/FileSystem.h"
24 #include "llvm/Support/LEB128.h"
25 #include "llvm/Support/Parallel.h"
26 #include "llvm/Support/Path.h"
28 #if defined(__APPLE__)
31 #define COMMON_DIGEST_FOR_OPENSSL
32 #include <CommonCrypto/CommonDigest.h>
34 #include "llvm/Support/SHA256.h"
37 #ifdef LLVM_HAVE_LIBXAR
45 using namespace llvm::MachO
;
46 using namespace llvm::support
;
47 using namespace llvm::support::endian
;
49 using namespace lld::macho
;
51 // Reads `len` bytes at data and writes the 32-byte SHA256 checksum to `output`.
52 static void sha256(const uint8_t *data
, size_t len
, uint8_t *output
) {
53 #if defined(__APPLE__)
54 // FIXME: Make LLVM's SHA256 faster and use it unconditionally. See PR56121
55 // for some notes on this.
56 CC_SHA256(data
, len
, output
);
58 ArrayRef
<uint8_t> block(data
, len
);
59 std::array
<uint8_t, 32> hash
= SHA256::hash(block
);
60 static_assert(hash
.size() == CodeSignatureSection::hashSize
, "");
61 memcpy(output
, hash
.data(), hash
.size());
66 std::vector
<SyntheticSection
*> macho::syntheticSections
;
68 SyntheticSection::SyntheticSection(const char *segname
, const char *name
)
69 : OutputSection(SyntheticKind
, name
) {
70 std::tie(this->segname
, this->name
) = maybeRenameSection({segname
, name
});
71 isec
= makeSyntheticInputSection(segname
, name
);
73 syntheticSections
.push_back(this);
76 // dyld3's MachOLoaded::getSlide() assumes that the __TEXT segment starts
77 // from the beginning of the file (i.e. the header).
78 MachHeaderSection::MachHeaderSection()
79 : SyntheticSection(segment_names::text
, section_names::header
) {
80 // XXX: This is a hack. (See D97007)
81 // Setting the index to 1 to pretend that this section is the text
87 void MachHeaderSection::addLoadCommand(LoadCommand
*lc
) {
88 loadCommands
.push_back(lc
);
89 sizeOfCmds
+= lc
->getSize();
92 uint64_t MachHeaderSection::getSize() const {
93 uint64_t size
= target
->headerSize
+ sizeOfCmds
+ config
->headerPad
;
94 // If we are emitting an encryptable binary, our load commands must have a
95 // separate (non-encrypted) page to themselves.
96 if (config
->emitEncryptionInfo
)
97 size
= alignTo(size
, target
->getPageSize());
101 static uint32_t cpuSubtype() {
102 uint32_t subtype
= target
->cpuSubtype
;
104 if (config
->outputType
== MH_EXECUTE
&& !config
->staticLink
&&
105 target
->cpuSubtype
== CPU_SUBTYPE_X86_64_ALL
&&
106 config
->platform() == PLATFORM_MACOS
&&
107 config
->platformInfo
.minimum
>= VersionTuple(10, 5))
108 subtype
|= CPU_SUBTYPE_LIB64
;
113 void MachHeaderSection::writeTo(uint8_t *buf
) const {
114 auto *hdr
= reinterpret_cast<mach_header
*>(buf
);
115 hdr
->magic
= target
->magic
;
116 hdr
->cputype
= target
->cpuType
;
117 hdr
->cpusubtype
= cpuSubtype();
118 hdr
->filetype
= config
->outputType
;
119 hdr
->ncmds
= loadCommands
.size();
120 hdr
->sizeofcmds
= sizeOfCmds
;
121 hdr
->flags
= MH_DYLDLINK
;
123 if (config
->namespaceKind
== NamespaceKind::twolevel
)
124 hdr
->flags
|= MH_NOUNDEFS
| MH_TWOLEVEL
;
126 if (config
->outputType
== MH_DYLIB
&& !config
->hasReexports
)
127 hdr
->flags
|= MH_NO_REEXPORTED_DYLIBS
;
129 if (config
->markDeadStrippableDylib
)
130 hdr
->flags
|= MH_DEAD_STRIPPABLE_DYLIB
;
132 if (config
->outputType
== MH_EXECUTE
&& config
->isPic
)
133 hdr
->flags
|= MH_PIE
;
135 if (config
->outputType
== MH_DYLIB
&& config
->applicationExtension
)
136 hdr
->flags
|= MH_APP_EXTENSION_SAFE
;
138 if (in
.exports
->hasWeakSymbol
|| in
.weakBinding
->hasNonWeakDefinition())
139 hdr
->flags
|= MH_WEAK_DEFINES
;
141 if (in
.exports
->hasWeakSymbol
|| in
.weakBinding
->hasEntry())
142 hdr
->flags
|= MH_BINDS_TO_WEAK
;
144 for (const OutputSegment
*seg
: outputSegments
) {
145 for (const OutputSection
*osec
: seg
->getSections()) {
146 if (isThreadLocalVariables(osec
->flags
)) {
147 hdr
->flags
|= MH_HAS_TLV_DESCRIPTORS
;
153 uint8_t *p
= reinterpret_cast<uint8_t *>(hdr
) + target
->headerSize
;
154 for (const LoadCommand
*lc
: loadCommands
) {
160 PageZeroSection::PageZeroSection()
161 : SyntheticSection(segment_names::pageZero
, section_names::pageZero
) {}
163 RebaseSection::RebaseSection()
164 : LinkEditSection(segment_names::linkEdit
, section_names::rebase
) {}
168 uint64_t sequenceLength
;
173 static void emitIncrement(uint64_t incr
, raw_svector_ostream
&os
) {
176 if ((incr
>> target
->p2WordSize
) <= REBASE_IMMEDIATE_MASK
&&
177 (incr
% target
->wordSize
) == 0) {
178 os
<< static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_IMM_SCALED
|
179 (incr
>> target
->p2WordSize
));
181 os
<< static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_ULEB
);
182 encodeULEB128(incr
, os
);
186 static void flushRebase(const RebaseState
&state
, raw_svector_ostream
&os
) {
187 assert(state
.sequenceLength
> 0);
189 if (state
.skipLength
== target
->wordSize
) {
190 if (state
.sequenceLength
<= REBASE_IMMEDIATE_MASK
) {
191 os
<< static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_IMM_TIMES
|
192 state
.sequenceLength
);
194 os
<< static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_ULEB_TIMES
);
195 encodeULEB128(state
.sequenceLength
, os
);
197 } else if (state
.sequenceLength
== 1) {
198 os
<< static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB
);
199 encodeULEB128(state
.skipLength
- target
->wordSize
, os
);
201 os
<< static_cast<uint8_t>(
202 REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB
);
203 encodeULEB128(state
.sequenceLength
, os
);
204 encodeULEB128(state
.skipLength
- target
->wordSize
, os
);
208 // Rebases are communicated to dyld using a bytecode, whose opcodes cause the
209 // memory location at a specific address to be rebased and/or the address to be
212 // Opcode REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB is the most generic
213 // one, encoding a series of evenly spaced addresses. This algorithm works by
214 // splitting up the sorted list of addresses into such chunks. If the locations
215 // are consecutive or the sequence consists of a single location, flushRebase
216 // will use a smaller, more specialized encoding.
217 static void encodeRebases(const OutputSegment
*seg
,
218 MutableArrayRef
<Location
> locations
,
219 raw_svector_ostream
&os
) {
220 // dyld operates on segments. Translate section offsets into segment offsets.
221 for (Location
&loc
: locations
)
223 loc
.isec
->parent
->getSegmentOffset() + loc
.isec
->getOffset(loc
.offset
);
224 // The algorithm assumes that locations are unique.
226 llvm::unique(locations
, [](const Location
&a
, const Location
&b
) {
227 return a
.offset
== b
.offset
;
229 size_t count
= end
- locations
.begin();
231 os
<< static_cast<uint8_t>(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
|
233 assert(!locations
.empty());
234 uint64_t offset
= locations
[0].offset
;
235 encodeULEB128(offset
, os
);
237 RebaseState state
{1, target
->wordSize
};
239 for (size_t i
= 1; i
< count
; ++i
) {
240 offset
= locations
[i
].offset
;
242 uint64_t skip
= offset
- locations
[i
- 1].offset
;
243 assert(skip
!= 0 && "duplicate locations should have been weeded out");
245 if (skip
== state
.skipLength
) {
246 ++state
.sequenceLength
;
247 } else if (state
.sequenceLength
== 1) {
248 ++state
.sequenceLength
;
249 state
.skipLength
= skip
;
250 } else if (skip
< state
.skipLength
) {
251 // The address is lower than what the rebase pointer would be if the last
252 // location would be part of a sequence. We start a new sequence from the
253 // previous location.
254 --state
.sequenceLength
;
255 flushRebase(state
, os
);
257 state
.sequenceLength
= 2;
258 state
.skipLength
= skip
;
260 // The address is at some positive offset from the rebase pointer. We
261 // start a new sequence which begins with the current location.
262 flushRebase(state
, os
);
263 emitIncrement(skip
- state
.skipLength
, os
);
264 state
.sequenceLength
= 1;
265 state
.skipLength
= target
->wordSize
;
268 flushRebase(state
, os
);
271 void RebaseSection::finalizeContents() {
272 if (locations
.empty())
275 raw_svector_ostream os
{contents
};
276 os
<< static_cast<uint8_t>(REBASE_OPCODE_SET_TYPE_IMM
| REBASE_TYPE_POINTER
);
278 llvm::sort(locations
, [](const Location
&a
, const Location
&b
) {
279 return a
.isec
->getVA(a
.offset
) < b
.isec
->getVA(b
.offset
);
282 for (size_t i
= 0, count
= locations
.size(); i
< count
;) {
283 const OutputSegment
*seg
= locations
[i
].isec
->parent
->parent
;
285 while (j
< count
&& locations
[j
].isec
->parent
->parent
== seg
)
287 encodeRebases(seg
, {locations
.data() + i
, locations
.data() + j
}, os
);
290 os
<< static_cast<uint8_t>(REBASE_OPCODE_DONE
);
293 void RebaseSection::writeTo(uint8_t *buf
) const {
294 memcpy(buf
, contents
.data(), contents
.size());
297 NonLazyPointerSectionBase::NonLazyPointerSectionBase(const char *segname
,
299 : SyntheticSection(segname
, name
) {
300 align
= target
->wordSize
;
303 void macho::addNonLazyBindingEntries(const Symbol
*sym
,
304 const InputSection
*isec
, uint64_t offset
,
306 if (const auto *dysym
= dyn_cast
<DylibSymbol
>(sym
)) {
307 in
.binding
->addEntry(dysym
, isec
, offset
, addend
);
308 if (dysym
->isWeakDef())
309 in
.weakBinding
->addEntry(sym
, isec
, offset
, addend
);
310 } else if (const auto *defined
= dyn_cast
<Defined
>(sym
)) {
311 in
.rebase
->addEntry(isec
, offset
);
312 if (defined
->isExternalWeakDef())
313 in
.weakBinding
->addEntry(sym
, isec
, offset
, addend
);
314 else if (defined
->interposable
)
315 in
.binding
->addEntry(sym
, isec
, offset
, addend
);
317 // Undefined symbols are filtered out in scanRelocations(); we should never
319 llvm_unreachable("cannot bind to an undefined symbol");
323 void NonLazyPointerSectionBase::addEntry(Symbol
*sym
) {
324 if (entries
.insert(sym
)) {
325 assert(!sym
->isInGot());
326 sym
->gotIndex
= entries
.size() - 1;
328 addNonLazyBindingEntries(sym
, isec
, sym
->gotIndex
* target
->wordSize
);
332 void NonLazyPointerSectionBase::writeTo(uint8_t *buf
) const {
333 for (size_t i
= 0, n
= entries
.size(); i
< n
; ++i
)
334 if (auto *defined
= dyn_cast
<Defined
>(entries
[i
]))
335 write64le(&buf
[i
* target
->wordSize
], defined
->getVA());
338 GotSection::GotSection()
339 : NonLazyPointerSectionBase(segment_names::data
, section_names::got
) {
340 flags
= S_NON_LAZY_SYMBOL_POINTERS
;
343 TlvPointerSection::TlvPointerSection()
344 : NonLazyPointerSectionBase(segment_names::data
,
345 section_names::threadPtrs
) {
346 flags
= S_THREAD_LOCAL_VARIABLE_POINTERS
;
349 BindingSection::BindingSection()
350 : LinkEditSection(segment_names::linkEdit
, section_names::binding
) {}
354 OutputSegment
*segment
= nullptr;
359 // Default value of 0xF0 is not valid opcode and should make the program
360 // scream instead of accidentally writing "valid" values.
361 uint8_t opcode
= 0xF0;
363 uint64_t consecutiveCount
= 0;
367 // Encode a sequence of opcodes that tell dyld to write the address of symbol +
368 // addend at osec->addr + outSecOff.
370 // The bind opcode "interpreter" remembers the values of each binding field, so
371 // we only need to encode the differences between bindings. Hence the use of
373 static void encodeBinding(const OutputSection
*osec
, uint64_t outSecOff
,
374 int64_t addend
, Binding
&lastBinding
,
375 std::vector
<BindIR
> &opcodes
) {
376 OutputSegment
*seg
= osec
->parent
;
377 uint64_t offset
= osec
->getSegmentOffset() + outSecOff
;
378 if (lastBinding
.segment
!= seg
) {
380 {static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
|
383 lastBinding
.segment
= seg
;
384 lastBinding
.offset
= offset
;
385 } else if (lastBinding
.offset
!= offset
) {
386 opcodes
.push_back({BIND_OPCODE_ADD_ADDR_ULEB
, offset
- lastBinding
.offset
});
387 lastBinding
.offset
= offset
;
390 if (lastBinding
.addend
!= addend
) {
392 {BIND_OPCODE_SET_ADDEND_SLEB
, static_cast<uint64_t>(addend
)});
393 lastBinding
.addend
= addend
;
396 opcodes
.push_back({BIND_OPCODE_DO_BIND
, 0});
397 // DO_BIND causes dyld to both perform the binding and increment the offset
398 lastBinding
.offset
+= target
->wordSize
;
401 static void optimizeOpcodes(std::vector
<BindIR
> &opcodes
) {
402 // Pass 1: Combine bind/add pairs
405 for (i
= 1; i
< opcodes
.size(); ++i
, ++pWrite
) {
406 if ((opcodes
[i
].opcode
== BIND_OPCODE_ADD_ADDR_ULEB
) &&
407 (opcodes
[i
- 1].opcode
== BIND_OPCODE_DO_BIND
)) {
408 opcodes
[pWrite
].opcode
= BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB
;
409 opcodes
[pWrite
].data
= opcodes
[i
].data
;
412 opcodes
[pWrite
] = opcodes
[i
- 1];
415 if (i
== opcodes
.size())
416 opcodes
[pWrite
] = opcodes
[i
- 1];
417 opcodes
.resize(pWrite
+ 1);
419 // Pass 2: Compress two or more bind_add opcodes
421 for (i
= 1; i
< opcodes
.size(); ++i
, ++pWrite
) {
422 if ((opcodes
[i
].opcode
== BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB
) &&
423 (opcodes
[i
- 1].opcode
== BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB
) &&
424 (opcodes
[i
].data
== opcodes
[i
- 1].data
)) {
425 opcodes
[pWrite
].opcode
= BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB
;
426 opcodes
[pWrite
].consecutiveCount
= 2;
427 opcodes
[pWrite
].data
= opcodes
[i
].data
;
429 while (i
< opcodes
.size() &&
430 (opcodes
[i
].opcode
== BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB
) &&
431 (opcodes
[i
].data
== opcodes
[i
- 1].data
)) {
432 opcodes
[pWrite
].consecutiveCount
++;
436 opcodes
[pWrite
] = opcodes
[i
- 1];
439 if (i
== opcodes
.size())
440 opcodes
[pWrite
] = opcodes
[i
- 1];
441 opcodes
.resize(pWrite
+ 1);
443 // Pass 3: Use immediate encodings
444 // Every binding is the size of one pointer. If the next binding is a
445 // multiple of wordSize away that is within BIND_IMMEDIATE_MASK, the
446 // opcode can be scaled by wordSize into a single byte and dyld will
447 // expand it to the correct address.
448 for (auto &p
: opcodes
) {
449 // It's unclear why the check needs to be less than BIND_IMMEDIATE_MASK,
450 // but ld64 currently does this. This could be a potential bug, but
451 // for now, perform the same behavior to prevent mysterious bugs.
452 if ((p
.opcode
== BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB
) &&
453 ((p
.data
/ target
->wordSize
) < BIND_IMMEDIATE_MASK
) &&
454 ((p
.data
% target
->wordSize
) == 0)) {
455 p
.opcode
= BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED
;
456 p
.data
/= target
->wordSize
;
461 static void flushOpcodes(const BindIR
&op
, raw_svector_ostream
&os
) {
462 uint8_t opcode
= op
.opcode
& BIND_OPCODE_MASK
;
464 case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
:
465 case BIND_OPCODE_ADD_ADDR_ULEB
:
466 case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB
:
468 encodeULEB128(op
.data
, os
);
470 case BIND_OPCODE_SET_ADDEND_SLEB
:
472 encodeSLEB128(static_cast<int64_t>(op
.data
), os
);
474 case BIND_OPCODE_DO_BIND
:
477 case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB
:
479 encodeULEB128(op
.consecutiveCount
, os
);
480 encodeULEB128(op
.data
, os
);
482 case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED
:
483 os
<< static_cast<uint8_t>(op
.opcode
| op
.data
);
486 llvm_unreachable("cannot bind to an unrecognized symbol");
490 // Non-weak bindings need to have their dylib ordinal encoded as well.
491 static int16_t ordinalForDylibSymbol(const DylibSymbol
&dysym
) {
492 if (config
->namespaceKind
== NamespaceKind::flat
|| dysym
.isDynamicLookup())
493 return static_cast<int16_t>(BIND_SPECIAL_DYLIB_FLAT_LOOKUP
);
494 assert(dysym
.getFile()->isReferenced());
495 return dysym
.getFile()->ordinal
;
498 static int16_t ordinalForSymbol(const Symbol
&sym
) {
499 if (const auto *dysym
= dyn_cast
<DylibSymbol
>(&sym
))
500 return ordinalForDylibSymbol(*dysym
);
501 assert(cast
<Defined
>(&sym
)->interposable
);
502 return BIND_SPECIAL_DYLIB_FLAT_LOOKUP
;
505 static void encodeDylibOrdinal(int16_t ordinal
, raw_svector_ostream
&os
) {
507 os
<< static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM
|
508 (ordinal
& BIND_IMMEDIATE_MASK
));
509 } else if (ordinal
<= BIND_IMMEDIATE_MASK
) {
510 os
<< static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM
| ordinal
);
512 os
<< static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB
);
513 encodeULEB128(ordinal
, os
);
517 static void encodeWeakOverride(const Defined
*defined
,
518 raw_svector_ostream
&os
) {
519 os
<< static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
|
520 BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION
)
521 << defined
->getName() << '\0';
524 // Organize the bindings so we can encoded them with fewer opcodes.
526 // First, all bindings for a given symbol should be grouped together.
527 // BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM is the largest opcode (since it
528 // has an associated symbol string), so we only want to emit it once per symbol.
530 // Within each group, we sort the bindings by address. Since bindings are
531 // delta-encoded, sorting them allows for a more compact result. Note that
532 // sorting by address alone ensures that bindings for the same segment / section
533 // are located together, minimizing the number of times we have to emit
534 // BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB.
536 // Finally, we sort the symbols by the address of their first binding, again
537 // to facilitate the delta-encoding process.
539 std::vector
<std::pair
<const Sym
*, std::vector
<BindingEntry
>>>
540 sortBindings(const BindingsMap
<const Sym
*> &bindingsMap
) {
541 std::vector
<std::pair
<const Sym
*, std::vector
<BindingEntry
>>> bindingsVec(
542 bindingsMap
.begin(), bindingsMap
.end());
543 for (auto &p
: bindingsVec
) {
544 std::vector
<BindingEntry
> &bindings
= p
.second
;
545 llvm::sort(bindings
, [](const BindingEntry
&a
, const BindingEntry
&b
) {
546 return a
.target
.getVA() < b
.target
.getVA();
549 llvm::sort(bindingsVec
, [](const auto &a
, const auto &b
) {
550 return a
.second
[0].target
.getVA() < b
.second
[0].target
.getVA();
555 // Emit bind opcodes, which are a stream of byte-sized opcodes that dyld
556 // interprets to update a record with the following fields:
557 // * segment index (of the segment to write the symbol addresses to, typically
558 // the __DATA_CONST segment which contains the GOT)
559 // * offset within the segment, indicating the next location to write a binding
561 // * symbol library ordinal (the index of its library's LC_LOAD_DYLIB command)
564 // When dyld sees BIND_OPCODE_DO_BIND, it uses the current record state to bind
565 // a symbol in the GOT, and increments the segment offset to point to the next
566 // entry. It does *not* clear the record state after doing the bind, so
567 // subsequent opcodes only need to encode the differences between bindings.
568 void BindingSection::finalizeContents() {
569 raw_svector_ostream os
{contents
};
571 int16_t lastOrdinal
= 0;
573 for (auto &p
: sortBindings(bindingsMap
)) {
574 const Symbol
*sym
= p
.first
;
575 std::vector
<BindingEntry
> &bindings
= p
.second
;
576 uint8_t flags
= BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
;
577 if (sym
->isWeakRef())
578 flags
|= BIND_SYMBOL_FLAGS_WEAK_IMPORT
;
579 os
<< flags
<< sym
->getName() << '\0'
580 << static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM
| BIND_TYPE_POINTER
);
581 int16_t ordinal
= ordinalForSymbol(*sym
);
582 if (ordinal
!= lastOrdinal
) {
583 encodeDylibOrdinal(ordinal
, os
);
584 lastOrdinal
= ordinal
;
586 std::vector
<BindIR
> opcodes
;
587 for (const BindingEntry
&b
: bindings
)
588 encodeBinding(b
.target
.isec
->parent
,
589 b
.target
.isec
->getOffset(b
.target
.offset
), b
.addend
,
590 lastBinding
, opcodes
);
591 if (config
->optimize
> 1)
592 optimizeOpcodes(opcodes
);
593 for (const auto &op
: opcodes
)
594 flushOpcodes(op
, os
);
596 if (!bindingsMap
.empty())
597 os
<< static_cast<uint8_t>(BIND_OPCODE_DONE
);
600 void BindingSection::writeTo(uint8_t *buf
) const {
601 memcpy(buf
, contents
.data(), contents
.size());
604 WeakBindingSection::WeakBindingSection()
605 : LinkEditSection(segment_names::linkEdit
, section_names::weakBinding
) {}
607 void WeakBindingSection::finalizeContents() {
608 raw_svector_ostream os
{contents
};
611 for (const Defined
*defined
: definitions
)
612 encodeWeakOverride(defined
, os
);
614 for (auto &p
: sortBindings(bindingsMap
)) {
615 const Symbol
*sym
= p
.first
;
616 std::vector
<BindingEntry
> &bindings
= p
.second
;
617 os
<< static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
)
618 << sym
->getName() << '\0'
619 << static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM
| BIND_TYPE_POINTER
);
620 std::vector
<BindIR
> opcodes
;
621 for (const BindingEntry
&b
: bindings
)
622 encodeBinding(b
.target
.isec
->parent
,
623 b
.target
.isec
->getOffset(b
.target
.offset
), b
.addend
,
624 lastBinding
, opcodes
);
625 if (config
->optimize
> 1)
626 optimizeOpcodes(opcodes
);
627 for (const auto &op
: opcodes
)
628 flushOpcodes(op
, os
);
630 if (!bindingsMap
.empty() || !definitions
.empty())
631 os
<< static_cast<uint8_t>(BIND_OPCODE_DONE
);
634 void WeakBindingSection::writeTo(uint8_t *buf
) const {
635 memcpy(buf
, contents
.data(), contents
.size());
638 StubsSection::StubsSection()
639 : SyntheticSection(segment_names::text
, section_names::stubs
) {
640 flags
= S_SYMBOL_STUBS
| S_ATTR_SOME_INSTRUCTIONS
| S_ATTR_PURE_INSTRUCTIONS
;
641 // The stubs section comprises machine instructions, which are aligned to
642 // 4 bytes on the archs we care about.
644 reserved2
= target
->stubSize
;
647 uint64_t StubsSection::getSize() const {
648 return entries
.size() * target
->stubSize
;
651 void StubsSection::writeTo(uint8_t *buf
) const {
653 for (const Symbol
*sym
: entries
) {
654 target
->writeStub(buf
+ off
, *sym
);
655 off
+= target
->stubSize
;
659 void StubsSection::finalize() { isFinal
= true; }
661 bool StubsSection::addEntry(Symbol
*sym
) {
662 bool inserted
= entries
.insert(sym
);
664 sym
->stubsIndex
= entries
.size() - 1;
668 StubHelperSection::StubHelperSection()
669 : SyntheticSection(segment_names::text
, section_names::stubHelper
) {
670 flags
= S_ATTR_SOME_INSTRUCTIONS
| S_ATTR_PURE_INSTRUCTIONS
;
671 align
= 4; // This section comprises machine instructions
674 uint64_t StubHelperSection::getSize() const {
675 return target
->stubHelperHeaderSize
+
676 in
.lazyBinding
->getEntries().size() * target
->stubHelperEntrySize
;
679 bool StubHelperSection::isNeeded() const { return in
.lazyBinding
->isNeeded(); }
681 void StubHelperSection::writeTo(uint8_t *buf
) const {
682 target
->writeStubHelperHeader(buf
);
683 size_t off
= target
->stubHelperHeaderSize
;
684 for (const Symbol
*sym
: in
.lazyBinding
->getEntries()) {
685 target
->writeStubHelperEntry(buf
+ off
, *sym
, addr
+ off
);
686 off
+= target
->stubHelperEntrySize
;
690 void StubHelperSection::setup() {
691 Symbol
*binder
= symtab
->addUndefined("dyld_stub_binder", /*file=*/nullptr,
692 /*isWeakRef=*/false);
693 if (auto *undefined
= dyn_cast
<Undefined
>(binder
))
694 treatUndefinedSymbol(*undefined
,
695 "lazy binding (normally in libSystem.dylib)");
697 // treatUndefinedSymbol() can replace binder with a DylibSymbol; re-check.
698 stubBinder
= dyn_cast_or_null
<DylibSymbol
>(binder
);
699 if (stubBinder
== nullptr)
702 in
.got
->addEntry(stubBinder
);
704 in
.imageLoaderCache
->parent
=
705 ConcatOutputSection::getOrCreateForInput(in
.imageLoaderCache
);
706 inputSections
.push_back(in
.imageLoaderCache
);
707 // Since this isn't in the symbol table or in any input file, the noDeadStrip
708 // argument doesn't matter.
710 make
<Defined
>("__dyld_private", nullptr, in
.imageLoaderCache
, 0, 0,
712 /*isExternal=*/false, /*isPrivateExtern=*/false,
713 /*includeInSymtab=*/true,
714 /*isThumb=*/false, /*isReferencedDynamically=*/false,
715 /*noDeadStrip=*/false);
716 dyldPrivate
->used
= true;
719 LazyPointerSection::LazyPointerSection()
720 : SyntheticSection(segment_names::data
, section_names::lazySymbolPtr
) {
721 align
= target
->wordSize
;
722 flags
= S_LAZY_SYMBOL_POINTERS
;
725 uint64_t LazyPointerSection::getSize() const {
726 return in
.stubs
->getEntries().size() * target
->wordSize
;
729 bool LazyPointerSection::isNeeded() const {
730 return !in
.stubs
->getEntries().empty();
733 void LazyPointerSection::writeTo(uint8_t *buf
) const {
735 for (const Symbol
*sym
: in
.stubs
->getEntries()) {
736 if (const auto *dysym
= dyn_cast
<DylibSymbol
>(sym
)) {
737 if (dysym
->hasStubsHelper()) {
738 uint64_t stubHelperOffset
=
739 target
->stubHelperHeaderSize
+
740 dysym
->stubsHelperIndex
* target
->stubHelperEntrySize
;
741 write64le(buf
+ off
, in
.stubHelper
->addr
+ stubHelperOffset
);
744 write64le(buf
+ off
, sym
->getVA());
746 off
+= target
->wordSize
;
750 LazyBindingSection::LazyBindingSection()
751 : LinkEditSection(segment_names::linkEdit
, section_names::lazyBinding
) {}
753 void LazyBindingSection::finalizeContents() {
754 // TODO: Just precompute output size here instead of writing to a temporary
756 for (Symbol
*sym
: entries
)
757 sym
->lazyBindOffset
= encode(*sym
);
760 void LazyBindingSection::writeTo(uint8_t *buf
) const {
761 memcpy(buf
, contents
.data(), contents
.size());
764 void LazyBindingSection::addEntry(Symbol
*sym
) {
765 if (entries
.insert(sym
)) {
766 sym
->stubsHelperIndex
= entries
.size() - 1;
767 in
.rebase
->addEntry(in
.lazyPointers
->isec
,
768 sym
->stubsIndex
* target
->wordSize
);
772 // Unlike the non-lazy binding section, the bind opcodes in this section aren't
773 // interpreted all at once. Rather, dyld will start interpreting opcodes at a
774 // given offset, typically only binding a single symbol before it finds a
775 // BIND_OPCODE_DONE terminator. As such, unlike in the non-lazy-binding case,
776 // we cannot encode just the differences between symbols; we have to emit the
777 // complete bind information for each symbol.
778 uint32_t LazyBindingSection::encode(const Symbol
&sym
) {
779 uint32_t opstreamOffset
= contents
.size();
780 OutputSegment
*dataSeg
= in
.lazyPointers
->parent
;
781 os
<< static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
|
784 in
.lazyPointers
->addr
- dataSeg
->addr
+ sym
.stubsIndex
* target
->wordSize
;
785 encodeULEB128(offset
, os
);
786 encodeDylibOrdinal(ordinalForSymbol(sym
), os
);
788 uint8_t flags
= BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
;
790 flags
|= BIND_SYMBOL_FLAGS_WEAK_IMPORT
;
792 os
<< flags
<< sym
.getName() << '\0'
793 << static_cast<uint8_t>(BIND_OPCODE_DO_BIND
)
794 << static_cast<uint8_t>(BIND_OPCODE_DONE
);
795 return opstreamOffset
;
798 ExportSection::ExportSection()
799 : LinkEditSection(segment_names::linkEdit
, section_names::export_
) {}
801 void ExportSection::finalizeContents() {
802 trieBuilder
.setImageBase(in
.header
->addr
);
803 for (const Symbol
*sym
: symtab
->getSymbols()) {
804 if (const auto *defined
= dyn_cast
<Defined
>(sym
)) {
805 if (defined
->privateExtern
|| !defined
->isLive())
807 trieBuilder
.addSymbol(*defined
);
808 hasWeakSymbol
= hasWeakSymbol
|| sym
->isWeakDef();
811 size
= trieBuilder
.build();
814 void ExportSection::writeTo(uint8_t *buf
) const { trieBuilder
.writeTo(buf
); }
816 DataInCodeSection::DataInCodeSection()
817 : LinkEditSection(segment_names::linkEdit
, section_names::dataInCode
) {}
820 static std::vector
<MachO::data_in_code_entry
> collectDataInCodeEntries() {
821 std::vector
<MachO::data_in_code_entry
> dataInCodeEntries
;
822 for (const InputFile
*inputFile
: inputFiles
) {
823 if (!isa
<ObjFile
>(inputFile
))
825 const ObjFile
*objFile
= cast
<ObjFile
>(inputFile
);
826 ArrayRef
<MachO::data_in_code_entry
> entries
= objFile
->getDataInCode();
830 assert(is_sorted(dataInCodeEntries
, [](const data_in_code_entry
&lhs
,
831 const data_in_code_entry
&rhs
) {
832 return lhs
.offset
< rhs
.offset
;
834 // For each code subsection find 'data in code' entries residing in it.
835 // Compute the new offset values as
836 // <offset within subsection> + <subsection address> - <__TEXT address>.
837 for (const Section
*section
: objFile
->sections
) {
838 for (const Subsection
&subsec
: section
->subsections
) {
839 const InputSection
*isec
= subsec
.isec
;
840 if (!isCodeSection(isec
))
842 if (cast
<ConcatInputSection
>(isec
)->shouldOmitFromOutput())
844 const uint64_t beginAddr
= section
->addr
+ subsec
.offset
;
845 auto it
= llvm::lower_bound(
847 [](const MachO::data_in_code_entry
&entry
, uint64_t addr
) {
848 return entry
.offset
< addr
;
850 const uint64_t endAddr
= beginAddr
+ isec
->getSize();
851 for (const auto end
= entries
.end();
852 it
!= end
&& it
->offset
+ it
->length
<= endAddr
; ++it
)
853 dataInCodeEntries
.push_back(
854 {static_cast<uint32_t>(isec
->getVA(it
->offset
- beginAddr
) -
856 it
->length
, it
->kind
});
860 return dataInCodeEntries
;
863 void DataInCodeSection::finalizeContents() {
864 entries
= target
->wordSize
== 8 ? collectDataInCodeEntries
<LP64
>()
865 : collectDataInCodeEntries
<ILP32
>();
868 void DataInCodeSection::writeTo(uint8_t *buf
) const {
869 if (!entries
.empty())
870 memcpy(buf
, entries
.data(), getRawSize());
873 FunctionStartsSection::FunctionStartsSection()
874 : LinkEditSection(segment_names::linkEdit
, section_names::functionStarts
) {}
876 void FunctionStartsSection::finalizeContents() {
877 raw_svector_ostream os
{contents
};
878 std::vector
<uint64_t> addrs
;
879 for (const InputFile
*file
: inputFiles
) {
880 if (auto *objFile
= dyn_cast
<ObjFile
>(file
)) {
881 for (const Symbol
*sym
: objFile
->symbols
) {
882 if (const auto *defined
= dyn_cast_or_null
<Defined
>(sym
)) {
883 if (!defined
->isec
|| !isCodeSection(defined
->isec
) ||
886 // TODO: Add support for thumbs, in that case
887 // the lowest bit of nextAddr needs to be set to 1.
888 addrs
.push_back(defined
->getVA());
894 uint64_t addr
= in
.header
->addr
;
895 for (uint64_t nextAddr
: addrs
) {
896 uint64_t delta
= nextAddr
- addr
;
899 encodeULEB128(delta
, os
);
905 void FunctionStartsSection::writeTo(uint8_t *buf
) const {
906 memcpy(buf
, contents
.data(), contents
.size());
909 SymtabSection::SymtabSection(StringTableSection
&stringTableSection
)
910 : LinkEditSection(segment_names::linkEdit
, section_names::symbolTable
),
911 stringTableSection(stringTableSection
) {}
913 void SymtabSection::emitBeginSourceStab(StringRef sourceFile
) {
914 StabsEntry
stab(N_SO
);
915 stab
.strx
= stringTableSection
.addString(saver().save(sourceFile
));
916 stabs
.emplace_back(std::move(stab
));
919 void SymtabSection::emitEndSourceStab() {
920 StabsEntry
stab(N_SO
);
922 stabs
.emplace_back(std::move(stab
));
925 void SymtabSection::emitObjectFileStab(ObjFile
*file
) {
926 StabsEntry
stab(N_OSO
);
927 stab
.sect
= target
->cpuSubtype
;
928 SmallString
<261> path(!file
->archiveName
.empty() ? file
->archiveName
930 std::error_code ec
= sys::fs::make_absolute(path
);
932 fatal("failed to get absolute path for " + path
);
934 if (!file
->archiveName
.empty())
935 path
.append({"(", file
->getName(), ")"});
937 StringRef adjustedPath
= saver().save(path
.str());
938 adjustedPath
.consume_front(config
->osoPrefix
);
940 stab
.strx
= stringTableSection
.addString(adjustedPath
);
942 stab
.value
= file
->modTime
;
943 stabs
.emplace_back(std::move(stab
));
946 void SymtabSection::emitEndFunStab(Defined
*defined
) {
947 StabsEntry
stab(N_FUN
);
948 stab
.value
= defined
->size
;
949 stabs
.emplace_back(std::move(stab
));
952 void SymtabSection::emitStabs() {
953 if (config
->omitDebugInfo
)
956 for (const std::string
&s
: config
->astPaths
) {
957 StabsEntry
astStab(N_AST
);
958 astStab
.strx
= stringTableSection
.addString(s
);
959 stabs
.emplace_back(std::move(astStab
));
962 // Cache the file ID for each symbol in an std::pair for faster sorting.
963 using SortingPair
= std::pair
<Defined
*, int>;
964 std::vector
<SortingPair
> symbolsNeedingStabs
;
965 for (const SymtabEntry
&entry
:
966 concat
<SymtabEntry
>(localSymbols
, externalSymbols
)) {
967 Symbol
*sym
= entry
.sym
;
968 assert(sym
->isLive() &&
969 "dead symbols should not be in localSymbols, externalSymbols");
970 if (auto *defined
= dyn_cast
<Defined
>(sym
)) {
971 // Excluded symbols should have been filtered out in finalizeContents().
972 assert(defined
->includeInSymtab
);
974 if (defined
->isAbsolute())
977 // Constant-folded symbols go in the executable's symbol table, but don't
978 // get a stabs entry.
979 if (defined
->wasIdenticalCodeFolded
)
982 InputSection
*isec
= defined
->isec
;
983 ObjFile
*file
= dyn_cast_or_null
<ObjFile
>(isec
->getFile());
984 if (!file
|| !file
->compileUnit
)
987 symbolsNeedingStabs
.emplace_back(defined
, defined
->isec
->getFile()->id
);
991 llvm::stable_sort(symbolsNeedingStabs
,
992 [&](const SortingPair
&a
, const SortingPair
&b
) {
993 return a
.second
< b
.second
;
996 // Emit STABS symbols so that dsymutil and/or the debugger can map address
997 // regions in the final binary to the source and object files from which they
999 InputFile
*lastFile
= nullptr;
1000 for (SortingPair
&pair
: symbolsNeedingStabs
) {
1001 Defined
*defined
= pair
.first
;
1002 InputSection
*isec
= defined
->isec
;
1003 ObjFile
*file
= cast
<ObjFile
>(isec
->getFile());
1005 if (lastFile
== nullptr || lastFile
!= file
) {
1006 if (lastFile
!= nullptr)
1007 emitEndSourceStab();
1010 emitBeginSourceStab(file
->sourceFile());
1011 emitObjectFileStab(file
);
1015 symStab
.sect
= defined
->isec
->parent
->index
;
1016 symStab
.strx
= stringTableSection
.addString(defined
->getName());
1017 symStab
.value
= defined
->getVA();
1019 if (isCodeSection(isec
)) {
1020 symStab
.type
= N_FUN
;
1021 stabs
.emplace_back(std::move(symStab
));
1022 emitEndFunStab(defined
);
1024 symStab
.type
= defined
->isExternal() ? N_GSYM
: N_STSYM
;
1025 stabs
.emplace_back(std::move(symStab
));
1030 emitEndSourceStab();
1033 void SymtabSection::finalizeContents() {
1034 auto addSymbol
= [&](std::vector
<SymtabEntry
> &symbols
, Symbol
*sym
) {
1035 uint32_t strx
= stringTableSection
.addString(sym
->getName());
1036 symbols
.push_back({sym
, strx
});
1039 std::function
<void(Symbol
*)> localSymbolsHandler
;
1040 switch (config
->localSymbolsPresence
) {
1041 case SymtabPresence::All
:
1042 localSymbolsHandler
= [&](Symbol
*sym
) { addSymbol(localSymbols
, sym
); };
1044 case SymtabPresence::None
:
1045 localSymbolsHandler
= [&](Symbol
*) { /* Do nothing*/ };
1047 case SymtabPresence::SelectivelyIncluded
:
1048 localSymbolsHandler
= [&](Symbol
*sym
) {
1049 if (config
->localSymbolPatterns
.match(sym
->getName()))
1050 addSymbol(localSymbols
, sym
);
1053 case SymtabPresence::SelectivelyExcluded
:
1054 localSymbolsHandler
= [&](Symbol
*sym
) {
1055 if (!config
->localSymbolPatterns
.match(sym
->getName()))
1056 addSymbol(localSymbols
, sym
);
1061 // Local symbols aren't in the SymbolTable, so we walk the list of object
1062 // files to gather them.
1063 // But if `-x` is set, then we don't need to. localSymbolsHandler() will do
1064 // the right thing regardless, but this check is a perf optimization because
1065 // iterating through all the input files and their symbols is expensive.
1066 if (config
->localSymbolsPresence
!= SymtabPresence::None
) {
1067 for (const InputFile
*file
: inputFiles
) {
1068 if (auto *objFile
= dyn_cast
<ObjFile
>(file
)) {
1069 for (Symbol
*sym
: objFile
->symbols
) {
1070 if (auto *defined
= dyn_cast_or_null
<Defined
>(sym
)) {
1071 if (defined
->isExternal() || !defined
->isLive() ||
1072 !defined
->includeInSymtab
)
1074 localSymbolsHandler(sym
);
1081 // __dyld_private is a local symbol too. It's linker-created and doesn't
1082 // exist in any object file.
1083 if (Defined
*dyldPrivate
= in
.stubHelper
->dyldPrivate
)
1084 localSymbolsHandler(dyldPrivate
);
1086 for (Symbol
*sym
: symtab
->getSymbols()) {
1089 if (auto *defined
= dyn_cast
<Defined
>(sym
)) {
1090 if (!defined
->includeInSymtab
)
1092 assert(defined
->isExternal());
1093 if (defined
->privateExtern
)
1094 localSymbolsHandler(defined
);
1096 addSymbol(externalSymbols
, defined
);
1097 } else if (auto *dysym
= dyn_cast
<DylibSymbol
>(sym
)) {
1098 if (dysym
->isReferenced())
1099 addSymbol(undefinedSymbols
, sym
);
1104 uint32_t symtabIndex
= stabs
.size();
1105 for (const SymtabEntry
&entry
:
1106 concat
<SymtabEntry
>(localSymbols
, externalSymbols
, undefinedSymbols
)) {
1107 entry
.sym
->symtabIndex
= symtabIndex
++;
1111 uint32_t SymtabSection::getNumSymbols() const {
1112 return stabs
.size() + localSymbols
.size() + externalSymbols
.size() +
1113 undefinedSymbols
.size();
1116 // This serves to hide (type-erase) the template parameter from SymtabSection.
1117 template <class LP
> class SymtabSectionImpl final
: public SymtabSection
{
1119 SymtabSectionImpl(StringTableSection
&stringTableSection
)
1120 : SymtabSection(stringTableSection
) {}
1121 uint64_t getRawSize() const override
;
1122 void writeTo(uint8_t *buf
) const override
;
1125 template <class LP
> uint64_t SymtabSectionImpl
<LP
>::getRawSize() const {
1126 return getNumSymbols() * sizeof(typename
LP::nlist
);
1129 template <class LP
> void SymtabSectionImpl
<LP
>::writeTo(uint8_t *buf
) const {
1130 auto *nList
= reinterpret_cast<typename
LP::nlist
*>(buf
);
1131 // Emit the stabs entries before the "real" symbols. We cannot emit them
1132 // after as that would render Symbol::symtabIndex inaccurate.
1133 for (const StabsEntry
&entry
: stabs
) {
1134 nList
->n_strx
= entry
.strx
;
1135 nList
->n_type
= entry
.type
;
1136 nList
->n_sect
= entry
.sect
;
1137 nList
->n_desc
= entry
.desc
;
1138 nList
->n_value
= entry
.value
;
1142 for (const SymtabEntry
&entry
: concat
<const SymtabEntry
>(
1143 localSymbols
, externalSymbols
, undefinedSymbols
)) {
1144 nList
->n_strx
= entry
.strx
;
1145 // TODO populate n_desc with more flags
1146 if (auto *defined
= dyn_cast
<Defined
>(entry
.sym
)) {
1148 if (defined
->privateExtern
) {
1149 // Private external -- dylib scoped symbol.
1150 // Promote to non-external at link time.
1152 } else if (defined
->isExternal()) {
1153 // Normal global symbol.
1156 // TU-local symbol from localSymbols.
1160 if (defined
->isAbsolute()) {
1161 nList
->n_type
= scope
| N_ABS
;
1162 nList
->n_sect
= NO_SECT
;
1163 nList
->n_value
= defined
->value
;
1165 nList
->n_type
= scope
| N_SECT
;
1166 nList
->n_sect
= defined
->isec
->parent
->index
;
1167 // For the N_SECT symbol type, n_value is the address of the symbol
1168 nList
->n_value
= defined
->getVA();
1170 nList
->n_desc
|= defined
->thumb
? N_ARM_THUMB_DEF
: 0;
1171 nList
->n_desc
|= defined
->isExternalWeakDef() ? N_WEAK_DEF
: 0;
1173 defined
->referencedDynamically
? REFERENCED_DYNAMICALLY
: 0;
1174 } else if (auto *dysym
= dyn_cast
<DylibSymbol
>(entry
.sym
)) {
1175 uint16_t n_desc
= nList
->n_desc
;
1176 int16_t ordinal
= ordinalForDylibSymbol(*dysym
);
1177 if (ordinal
== BIND_SPECIAL_DYLIB_FLAT_LOOKUP
)
1178 SET_LIBRARY_ORDINAL(n_desc
, DYNAMIC_LOOKUP_ORDINAL
);
1179 else if (ordinal
== BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE
)
1180 SET_LIBRARY_ORDINAL(n_desc
, EXECUTABLE_ORDINAL
);
1182 assert(ordinal
> 0);
1183 SET_LIBRARY_ORDINAL(n_desc
, static_cast<uint8_t>(ordinal
));
1186 nList
->n_type
= N_EXT
;
1187 n_desc
|= dysym
->isWeakDef() ? N_WEAK_DEF
: 0;
1188 n_desc
|= dysym
->isWeakRef() ? N_WEAK_REF
: 0;
1189 nList
->n_desc
= n_desc
;
1197 macho::makeSymtabSection(StringTableSection
&stringTableSection
) {
1198 return make
<SymtabSectionImpl
<LP
>>(stringTableSection
);
1201 IndirectSymtabSection::IndirectSymtabSection()
1202 : LinkEditSection(segment_names::linkEdit
,
1203 section_names::indirectSymbolTable
) {}
1205 uint32_t IndirectSymtabSection::getNumSymbols() const {
1206 return in
.got
->getEntries().size() + in
.tlvPointers
->getEntries().size() +
1207 2 * in
.stubs
->getEntries().size();
1210 bool IndirectSymtabSection::isNeeded() const {
1211 return in
.got
->isNeeded() || in
.tlvPointers
->isNeeded() ||
1212 in
.stubs
->isNeeded();
1215 void IndirectSymtabSection::finalizeContents() {
1217 in
.got
->reserved1
= off
;
1218 off
+= in
.got
->getEntries().size();
1219 in
.tlvPointers
->reserved1
= off
;
1220 off
+= in
.tlvPointers
->getEntries().size();
1221 in
.stubs
->reserved1
= off
;
1222 off
+= in
.stubs
->getEntries().size();
1223 in
.lazyPointers
->reserved1
= off
;
1226 static uint32_t indirectValue(const Symbol
*sym
) {
1227 if (sym
->symtabIndex
== UINT32_MAX
)
1228 return INDIRECT_SYMBOL_LOCAL
;
1229 if (auto *defined
= dyn_cast
<Defined
>(sym
))
1230 if (defined
->privateExtern
)
1231 return INDIRECT_SYMBOL_LOCAL
;
1232 return sym
->symtabIndex
;
1235 void IndirectSymtabSection::writeTo(uint8_t *buf
) const {
1237 for (const Symbol
*sym
: in
.got
->getEntries()) {
1238 write32le(buf
+ off
* sizeof(uint32_t), indirectValue(sym
));
1241 for (const Symbol
*sym
: in
.tlvPointers
->getEntries()) {
1242 write32le(buf
+ off
* sizeof(uint32_t), indirectValue(sym
));
1245 for (const Symbol
*sym
: in
.stubs
->getEntries()) {
1246 write32le(buf
+ off
* sizeof(uint32_t), indirectValue(sym
));
1249 // There is a 1:1 correspondence between stubs and LazyPointerSection
1250 // entries. But giving __stubs and __la_symbol_ptr the same reserved1
1251 // (the offset into the indirect symbol table) so that they both refer
1252 // to the same range of offsets confuses `strip`, so write the stubs
1253 // symbol table offsets a second time.
1254 for (const Symbol
*sym
: in
.stubs
->getEntries()) {
1255 write32le(buf
+ off
* sizeof(uint32_t), indirectValue(sym
));
1260 StringTableSection::StringTableSection()
1261 : LinkEditSection(segment_names::linkEdit
, section_names::stringTable
) {}
1263 uint32_t StringTableSection::addString(StringRef str
) {
1264 uint32_t strx
= size
;
1265 strings
.push_back(str
); // TODO: consider deduplicating strings
1266 size
+= str
.size() + 1; // account for null terminator
1270 void StringTableSection::writeTo(uint8_t *buf
) const {
1272 for (StringRef str
: strings
) {
1273 memcpy(buf
+ off
, str
.data(), str
.size());
1274 off
+= str
.size() + 1; // account for null terminator
1278 static_assert((CodeSignatureSection::blobHeadersSize
% 8) == 0, "");
1279 static_assert((CodeSignatureSection::fixedHeadersSize
% 8) == 0, "");
1281 CodeSignatureSection::CodeSignatureSection()
1282 : LinkEditSection(segment_names::linkEdit
, section_names::codeSignature
) {
1283 align
= 16; // required by libstuff
1284 // FIXME: Consider using finalOutput instead of outputFile.
1285 fileName
= config
->outputFile
;
1286 size_t slashIndex
= fileName
.rfind("/");
1287 if (slashIndex
!= std::string::npos
)
1288 fileName
= fileName
.drop_front(slashIndex
+ 1);
1290 // NOTE: Any changes to these calculations should be repeated
1291 // in llvm-objcopy's MachOLayoutBuilder::layoutTail.
1292 allHeadersSize
= alignTo
<16>(fixedHeadersSize
+ fileName
.size() + 1);
1293 fileNamePad
= allHeadersSize
- fixedHeadersSize
- fileName
.size();
1296 uint32_t CodeSignatureSection::getBlockCount() const {
1297 return (fileOff
+ blockSize
- 1) / blockSize
;
1300 uint64_t CodeSignatureSection::getRawSize() const {
1301 return allHeadersSize
+ getBlockCount() * hashSize
;
1304 void CodeSignatureSection::writeHashes(uint8_t *buf
) const {
1305 // NOTE: Changes to this functionality should be repeated in llvm-objcopy's
1306 // MachOWriter::writeSignatureData.
1307 uint8_t *hashes
= buf
+ fileOff
+ allHeadersSize
;
1308 parallelFor(0, getBlockCount(), [&](size_t i
) {
1309 sha256(buf
+ i
* blockSize
,
1310 std::min(static_cast<size_t>(fileOff
- i
* blockSize
), blockSize
),
1311 hashes
+ i
* hashSize
);
1313 #if defined(__APPLE__)
1314 // This is macOS-specific work-around and makes no sense for any
1315 // other host OS. See https://openradar.appspot.com/FB8914231
1317 // The macOS kernel maintains a signature-verification cache to
1318 // quickly validate applications at time of execve(2). The trouble
1319 // is that for the kernel creates the cache entry at the time of the
1320 // mmap(2) call, before we have a chance to write either the code to
1321 // sign or the signature header+hashes. The fix is to invalidate
1322 // all cached data associated with the output file, thus discarding
1323 // the bogus prematurely-cached signature.
1324 msync(buf
, fileOff
+ getSize(), MS_INVALIDATE
);
1328 void CodeSignatureSection::writeTo(uint8_t *buf
) const {
1329 // NOTE: Changes to this functionality should be repeated in llvm-objcopy's
1330 // MachOWriter::writeSignatureData.
1331 uint32_t signatureSize
= static_cast<uint32_t>(getSize());
1332 auto *superBlob
= reinterpret_cast<CS_SuperBlob
*>(buf
);
1333 write32be(&superBlob
->magic
, CSMAGIC_EMBEDDED_SIGNATURE
);
1334 write32be(&superBlob
->length
, signatureSize
);
1335 write32be(&superBlob
->count
, 1);
1336 auto *blobIndex
= reinterpret_cast<CS_BlobIndex
*>(&superBlob
[1]);
1337 write32be(&blobIndex
->type
, CSSLOT_CODEDIRECTORY
);
1338 write32be(&blobIndex
->offset
, blobHeadersSize
);
1339 auto *codeDirectory
=
1340 reinterpret_cast<CS_CodeDirectory
*>(buf
+ blobHeadersSize
);
1341 write32be(&codeDirectory
->magic
, CSMAGIC_CODEDIRECTORY
);
1342 write32be(&codeDirectory
->length
, signatureSize
- blobHeadersSize
);
1343 write32be(&codeDirectory
->version
, CS_SUPPORTSEXECSEG
);
1344 write32be(&codeDirectory
->flags
, CS_ADHOC
| CS_LINKER_SIGNED
);
1345 write32be(&codeDirectory
->hashOffset
,
1346 sizeof(CS_CodeDirectory
) + fileName
.size() + fileNamePad
);
1347 write32be(&codeDirectory
->identOffset
, sizeof(CS_CodeDirectory
));
1348 codeDirectory
->nSpecialSlots
= 0;
1349 write32be(&codeDirectory
->nCodeSlots
, getBlockCount());
1350 write32be(&codeDirectory
->codeLimit
, fileOff
);
1351 codeDirectory
->hashSize
= static_cast<uint8_t>(hashSize
);
1352 codeDirectory
->hashType
= kSecCodeSignatureHashSHA256
;
1353 codeDirectory
->platform
= 0;
1354 codeDirectory
->pageSize
= blockSizeShift
;
1355 codeDirectory
->spare2
= 0;
1356 codeDirectory
->scatterOffset
= 0;
1357 codeDirectory
->teamOffset
= 0;
1358 codeDirectory
->spare3
= 0;
1359 codeDirectory
->codeLimit64
= 0;
1360 OutputSegment
*textSeg
= getOrCreateOutputSegment(segment_names::text
);
1361 write64be(&codeDirectory
->execSegBase
, textSeg
->fileOff
);
1362 write64be(&codeDirectory
->execSegLimit
, textSeg
->fileSize
);
1363 write64be(&codeDirectory
->execSegFlags
,
1364 config
->outputType
== MH_EXECUTE
? CS_EXECSEG_MAIN_BINARY
: 0);
1365 auto *id
= reinterpret_cast<char *>(&codeDirectory
[1]);
1366 memcpy(id
, fileName
.begin(), fileName
.size());
1367 memset(id
+ fileName
.size(), 0, fileNamePad
);
1370 BitcodeBundleSection::BitcodeBundleSection()
1371 : SyntheticSection(segment_names::llvm
, section_names::bitcodeBundle
) {}
1373 class ErrorCodeWrapper
{
1375 explicit ErrorCodeWrapper(std::error_code ec
) : errorCode(ec
.value()) {}
1376 explicit ErrorCodeWrapper(int ec
) : errorCode(ec
) {}
1377 operator int() const { return errorCode
; }
1383 #define CHECK_EC(exp) \
1385 ErrorCodeWrapper ec(exp); \
1387 fatal(Twine("operation failed with error code ") + Twine(ec) + ": " + \
1391 void BitcodeBundleSection::finalize() {
1392 #ifdef LLVM_HAVE_LIBXAR
1393 using namespace llvm::sys::fs
;
1394 CHECK_EC(createTemporaryFile("bitcode-bundle", "xar", xarPath
));
1396 #pragma clang diagnostic push
1397 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
1398 xar_t
xar(xar_open(xarPath
.data(), O_RDWR
));
1399 #pragma clang diagnostic pop
1401 fatal("failed to open XAR temporary file at " + xarPath
);
1402 CHECK_EC(xar_opt_set(xar
, XAR_OPT_COMPRESSION
, XAR_OPT_VAL_NONE
));
1403 // FIXME: add more data to XAR
1404 CHECK_EC(xar_close(xar
));
1406 file_size(xarPath
, xarSize
);
1407 #endif // defined(LLVM_HAVE_LIBXAR)
1410 void BitcodeBundleSection::writeTo(uint8_t *buf
) const {
1411 using namespace llvm::sys::fs
;
1413 CHECK(openNativeFile(xarPath
, CD_OpenExisting
, FA_Read
, OF_None
),
1414 "failed to open XAR file");
1416 mapped_file_region
xarMap(handle
, mapped_file_region::mapmode::readonly
,
1419 fatal("failed to map XAR file");
1420 memcpy(buf
, xarMap
.const_data(), xarSize
);
1426 CStringSection::CStringSection()
1427 : SyntheticSection(segment_names::text
, section_names::cString
) {
1428 flags
= S_CSTRING_LITERALS
;
1431 void CStringSection::addInput(CStringInputSection
*isec
) {
1432 isec
->parent
= this;
1433 inputs
.push_back(isec
);
1434 if (isec
->align
> align
)
1435 align
= isec
->align
;
1438 void CStringSection::writeTo(uint8_t *buf
) const {
1439 for (const CStringInputSection
*isec
: inputs
) {
1440 for (size_t i
= 0, e
= isec
->pieces
.size(); i
!= e
; ++i
) {
1441 if (!isec
->pieces
[i
].live
)
1443 StringRef string
= isec
->getStringRef(i
);
1444 memcpy(buf
+ isec
->pieces
[i
].outSecOff
, string
.data(), string
.size());
1449 void CStringSection::finalizeContents() {
1450 uint64_t offset
= 0;
1451 for (CStringInputSection
*isec
: inputs
) {
1452 for (size_t i
= 0, e
= isec
->pieces
.size(); i
!= e
; ++i
) {
1453 if (!isec
->pieces
[i
].live
)
1455 // See comment above DeduplicatedCStringSection for how alignment is
1457 uint32_t pieceAlign
=
1458 1 << countTrailingZeros(isec
->align
| isec
->pieces
[i
].inSecOff
);
1459 offset
= alignTo(offset
, pieceAlign
);
1460 isec
->pieces
[i
].outSecOff
= offset
;
1461 isec
->isFinal
= true;
1462 StringRef string
= isec
->getStringRef(i
);
1463 offset
+= string
.size();
1469 // Mergeable cstring literals are found under the __TEXT,__cstring section. In
1470 // contrast to ELF, which puts strings that need different alignments into
1471 // different sections, clang's Mach-O backend puts them all in one section.
1472 // Strings that need to be aligned have the .p2align directive emitted before
1473 // them, which simply translates into zero padding in the object file. In other
1474 // words, we have to infer the desired alignment of these cstrings from their
1477 // We differ slightly from ld64 in how we've chosen to align these cstrings.
1478 // Both LLD and ld64 preserve the number of trailing zeros in each cstring's
1479 // address in the input object files. When deduplicating identical cstrings,
1480 // both linkers pick the cstring whose address has more trailing zeros, and
1481 // preserve the alignment of that address in the final binary. However, ld64
1482 // goes a step further and also preserves the offset of the cstring from the
1483 // last section-aligned address. I.e. if a cstring is at offset 18 in the
1484 // input, with a section alignment of 16, then both LLD and ld64 will ensure the
1485 // final address is 2-byte aligned (since 18 == 16 + 2). But ld64 will also
1486 // ensure that the final address is of the form 16 * k + 2 for some k.
1488 // Note that ld64's heuristic means that a dedup'ed cstring's final address is
1489 // dependent on the order of the input object files. E.g. if in addition to the
1490 // cstring at offset 18 above, we have a duplicate one in another file with a
1491 // `.cstring` section alignment of 2 and an offset of zero, then ld64 will pick
1492 // the cstring from the object file earlier on the command line (since both have
1493 // the same number of trailing zeros in their address). So the final cstring may
1494 // either be at some address `16 * k + 2` or at some address `2 * k`.
1496 // I've opted not to follow this behavior primarily for implementation
1497 // simplicity, and secondarily to save a few more bytes. It's not clear to me
1498 // that preserving the section alignment + offset is ever necessary, and there
1499 // are many cases that are clearly redundant. In particular, if an x86_64 object
1500 // file contains some strings that are accessed via SIMD instructions, then the
1501 // .cstring section in the object file will be 16-byte-aligned (since SIMD
1502 // requires its operand addresses to be 16-byte aligned). However, there will
1503 // typically also be other cstrings in the same file that aren't used via SIMD
1504 // and don't need this alignment. They will be emitted at some arbitrary address
1505 // `A`, but ld64 will treat them as being 16-byte aligned with an offset of `16
1507 void DeduplicatedCStringSection::finalizeContents() {
1508 // Find the largest alignment required for each string.
1509 for (const CStringInputSection
*isec
: inputs
) {
1510 for (size_t i
= 0, e
= isec
->pieces
.size(); i
!= e
; ++i
) {
1511 const StringPiece
&piece
= isec
->pieces
[i
];
1514 auto s
= isec
->getCachedHashStringRef(i
);
1515 assert(isec
->align
!= 0);
1516 uint8_t trailingZeros
= countTrailingZeros(isec
->align
| piece
.inSecOff
);
1517 auto it
= stringOffsetMap
.insert(
1518 std::make_pair(s
, StringOffset(trailingZeros
)));
1519 if (!it
.second
&& it
.first
->second
.trailingZeros
< trailingZeros
)
1520 it
.first
->second
.trailingZeros
= trailingZeros
;
1524 // Assign an offset for each string and save it to the corresponding
1525 // StringPieces for easy access.
1526 for (CStringInputSection
*isec
: inputs
) {
1527 for (size_t i
= 0, e
= isec
->pieces
.size(); i
!= e
; ++i
) {
1528 if (!isec
->pieces
[i
].live
)
1530 auto s
= isec
->getCachedHashStringRef(i
);
1531 auto it
= stringOffsetMap
.find(s
);
1532 assert(it
!= stringOffsetMap
.end());
1533 StringOffset
&offsetInfo
= it
->second
;
1534 if (offsetInfo
.outSecOff
== UINT64_MAX
) {
1535 offsetInfo
.outSecOff
= alignTo(size
, 1ULL << offsetInfo
.trailingZeros
);
1536 size
= offsetInfo
.outSecOff
+ s
.size();
1538 isec
->pieces
[i
].outSecOff
= offsetInfo
.outSecOff
;
1540 isec
->isFinal
= true;
1544 void DeduplicatedCStringSection::writeTo(uint8_t *buf
) const {
1545 for (const auto &p
: stringOffsetMap
) {
1546 StringRef data
= p
.first
.val();
1547 uint64_t off
= p
.second
.outSecOff
;
1549 memcpy(buf
+ off
, data
.data(), data
.size());
1553 // This section is actually emitted as __TEXT,__const by ld64, but clang may
1554 // emit input sections of that name, and LLD doesn't currently support mixing
1555 // synthetic and concat-type OutputSections. To work around this, I've given
1556 // our merged-literals section a different name.
1557 WordLiteralSection::WordLiteralSection()
1558 : SyntheticSection(segment_names::text
, section_names::literals
) {
1562 void WordLiteralSection::addInput(WordLiteralInputSection
*isec
) {
1563 isec
->parent
= this;
1564 inputs
.push_back(isec
);
1567 void WordLiteralSection::finalizeContents() {
1568 for (WordLiteralInputSection
*isec
: inputs
) {
1569 // We do all processing of the InputSection here, so it will be effectively
1571 isec
->isFinal
= true;
1572 const uint8_t *buf
= isec
->data
.data();
1573 switch (sectionType(isec
->getFlags())) {
1574 case S_4BYTE_LITERALS
: {
1575 for (size_t off
= 0, e
= isec
->data
.size(); off
< e
; off
+= 4) {
1576 if (!isec
->isLive(off
))
1578 uint32_t value
= *reinterpret_cast<const uint32_t *>(buf
+ off
);
1579 literal4Map
.emplace(value
, literal4Map
.size());
1583 case S_8BYTE_LITERALS
: {
1584 for (size_t off
= 0, e
= isec
->data
.size(); off
< e
; off
+= 8) {
1585 if (!isec
->isLive(off
))
1587 uint64_t value
= *reinterpret_cast<const uint64_t *>(buf
+ off
);
1588 literal8Map
.emplace(value
, literal8Map
.size());
1592 case S_16BYTE_LITERALS
: {
1593 for (size_t off
= 0, e
= isec
->data
.size(); off
< e
; off
+= 16) {
1594 if (!isec
->isLive(off
))
1596 UInt128 value
= *reinterpret_cast<const UInt128
*>(buf
+ off
);
1597 literal16Map
.emplace(value
, literal16Map
.size());
1602 llvm_unreachable("invalid literal section type");
1607 void WordLiteralSection::writeTo(uint8_t *buf
) const {
1608 // Note that we don't attempt to do any endianness conversion in addInput(),
1609 // so we don't do it here either -- just write out the original value,
1611 for (const auto &p
: literal16Map
)
1612 memcpy(buf
+ p
.second
* 16, &p
.first
, 16);
1613 buf
+= literal16Map
.size() * 16;
1615 for (const auto &p
: literal8Map
)
1616 memcpy(buf
+ p
.second
* 8, &p
.first
, 8);
1617 buf
+= literal8Map
.size() * 8;
1619 for (const auto &p
: literal4Map
)
1620 memcpy(buf
+ p
.second
* 4, &p
.first
, 4);
1623 ObjCImageInfoSection::ObjCImageInfoSection()
1624 : SyntheticSection(segment_names::data
, section_names::objCImageInfo
) {}
1626 ObjCImageInfoSection::ImageInfo
1627 ObjCImageInfoSection::parseImageInfo(const InputFile
*file
) {
1629 ArrayRef
<uint8_t> data
= file
->objCImageInfo
;
1630 // The image info struct has the following layout:
1632 // uint32_t version;
1635 if (data
.size() < 8) {
1636 warn(toString(file
) + ": invalid __objc_imageinfo size");
1640 auto *buf
= reinterpret_cast<const uint32_t *>(data
.data());
1641 if (read32le(buf
) != 0) {
1642 warn(toString(file
) + ": invalid __objc_imageinfo version");
1646 uint32_t flags
= read32le(buf
+ 1);
1647 info
.swiftVersion
= (flags
>> 8) & 0xff;
1648 info
.hasCategoryClassProperties
= flags
& 0x40;
1652 static std::string
swiftVersionString(uint8_t version
) {
1665 return ("0x" + Twine::utohexstr(version
)).str();
1669 // Validate each object file's __objc_imageinfo and use them to generate the
1670 // image info for the output binary. Only two pieces of info are relevant:
1671 // 1. The Swift version (should be identical across inputs)
1672 // 2. `bool hasCategoryClassProperties` (true only if true for all inputs)
1673 void ObjCImageInfoSection::finalizeContents() {
1674 assert(files
.size() != 0); // should have already been checked via isNeeded()
1676 info
.hasCategoryClassProperties
= true;
1677 const InputFile
*firstFile
;
1678 for (auto file
: files
) {
1679 ImageInfo inputInfo
= parseImageInfo(file
);
1680 info
.hasCategoryClassProperties
&= inputInfo
.hasCategoryClassProperties
;
1682 // swiftVersion 0 means no Swift is present, so no version checking required
1683 if (inputInfo
.swiftVersion
== 0)
1686 if (info
.swiftVersion
!= 0 && info
.swiftVersion
!= inputInfo
.swiftVersion
) {
1687 error("Swift version mismatch: " + toString(firstFile
) + " has version " +
1688 swiftVersionString(info
.swiftVersion
) + " but " + toString(file
) +
1689 " has version " + swiftVersionString(inputInfo
.swiftVersion
));
1691 info
.swiftVersion
= inputInfo
.swiftVersion
;
1697 void ObjCImageInfoSection::writeTo(uint8_t *buf
) const {
1698 uint32_t flags
= info
.hasCategoryClassProperties
? 0x40 : 0x0;
1699 flags
|= info
.swiftVersion
<< 8;
1700 write32le(buf
+ 4, flags
);
1703 void macho::createSyntheticSymbols() {
1704 auto addHeaderSymbol
= [](const char *name
) {
1705 symtab
->addSynthetic(name
, in
.header
->isec
, /*value=*/0,
1706 /*isPrivateExtern=*/true, /*includeInSymtab=*/false,
1707 /*referencedDynamically=*/false);
1710 switch (config
->outputType
) {
1711 // FIXME: Assign the right address value for these symbols
1712 // (rather than 0). But we need to do that after assignAddresses().
1714 // If linking PIE, __mh_execute_header is a defined symbol in
1716 // Otherwise, it's an absolute symbol.
1718 symtab
->addSynthetic("__mh_execute_header", in
.header
->isec
, /*value=*/0,
1719 /*isPrivateExtern=*/false, /*includeInSymtab=*/true,
1720 /*referencedDynamically=*/true);
1722 symtab
->addSynthetic("__mh_execute_header", /*isec=*/nullptr, /*value=*/0,
1723 /*isPrivateExtern=*/false, /*includeInSymtab=*/true,
1724 /*referencedDynamically=*/true);
1727 // The following symbols are N_SECT symbols, even though the header is not
1728 // part of any section and that they are private to the bundle/dylib/object
1729 // they are part of.
1731 addHeaderSymbol("__mh_bundle_header");
1734 addHeaderSymbol("__mh_dylib_header");
1737 addHeaderSymbol("__mh_dylinker_header");
1740 addHeaderSymbol("__mh_object_header");
1743 llvm_unreachable("unexpected outputType");
1747 // The Itanium C++ ABI requires dylibs to pass a pointer to __cxa_atexit
1748 // which does e.g. cleanup of static global variables. The ABI document
1749 // says that the pointer can point to any address in one of the dylib's
1750 // segments, but in practice ld64 seems to set it to point to the header,
1751 // so that's what's implemented here.
1752 addHeaderSymbol("___dso_handle");
1755 template SymtabSection
*macho::makeSymtabSection
<LP64
>(StringTableSection
&);
1756 template SymtabSection
*macho::makeSymtabSection
<ILP32
>(StringTableSection
&);