1 //===- SyntheticSections.cpp ---------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "SyntheticSections.h"
10 #include "ConcatOutputSection.h"
12 #include "ExportTrie.h"
13 #include "InputFiles.h"
14 #include "MachOStructs.h"
15 #include "OutputSegment.h"
16 #include "SymbolTable.h"
19 #include "lld/Common/ErrorHandler.h"
20 #include "lld/Common/Memory.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/Config/llvm-config.h"
23 #include "llvm/Support/EndianStream.h"
24 #include "llvm/Support/FileSystem.h"
25 #include "llvm/Support/LEB128.h"
26 #include "llvm/Support/Path.h"
27 #include "llvm/Support/SHA256.h"
29 #if defined(__APPLE__)
33 #ifdef LLVM_HAVE_LIBXAR
39 using namespace llvm::MachO
;
40 using namespace llvm::support
;
41 using namespace llvm::support::endian
;
43 using namespace lld::macho
;
46 std::vector
<SyntheticSection
*> macho::syntheticSections
;
48 SyntheticSection::SyntheticSection(const char *segname
, const char *name
)
49 : OutputSection(SyntheticKind
, name
) {
50 std::tie(this->segname
, this->name
) = maybeRenameSection({segname
, name
});
51 isec
= make
<ConcatInputSection
>(segname
, name
);
53 syntheticSections
.push_back(this);
56 // dyld3's MachOLoaded::getSlide() assumes that the __TEXT segment starts
57 // from the beginning of the file (i.e. the header).
58 MachHeaderSection::MachHeaderSection()
59 : SyntheticSection(segment_names::text
, section_names::header
) {
60 // XXX: This is a hack. (See D97007)
61 // Setting the index to 1 to pretend that this section is the text
67 void MachHeaderSection::addLoadCommand(LoadCommand
*lc
) {
68 loadCommands
.push_back(lc
);
69 sizeOfCmds
+= lc
->getSize();
72 uint64_t MachHeaderSection::getSize() const {
73 uint64_t size
= target
->headerSize
+ sizeOfCmds
+ config
->headerPad
;
74 // If we are emitting an encryptable binary, our load commands must have a
75 // separate (non-encrypted) page to themselves.
76 if (config
->emitEncryptionInfo
)
77 size
= alignTo(size
, target
->getPageSize());
81 static uint32_t cpuSubtype() {
82 uint32_t subtype
= target
->cpuSubtype
;
84 if (config
->outputType
== MH_EXECUTE
&& !config
->staticLink
&&
85 target
->cpuSubtype
== CPU_SUBTYPE_X86_64_ALL
&&
86 config
->platform() == PlatformKind::macOS
&&
87 config
->platformInfo
.minimum
>= VersionTuple(10, 5))
88 subtype
|= CPU_SUBTYPE_LIB64
;
93 void MachHeaderSection::writeTo(uint8_t *buf
) const {
94 auto *hdr
= reinterpret_cast<mach_header
*>(buf
);
95 hdr
->magic
= target
->magic
;
96 hdr
->cputype
= target
->cpuType
;
97 hdr
->cpusubtype
= cpuSubtype();
98 hdr
->filetype
= config
->outputType
;
99 hdr
->ncmds
= loadCommands
.size();
100 hdr
->sizeofcmds
= sizeOfCmds
;
101 hdr
->flags
= MH_DYLDLINK
;
103 if (config
->namespaceKind
== NamespaceKind::twolevel
)
104 hdr
->flags
|= MH_NOUNDEFS
| MH_TWOLEVEL
;
106 if (config
->outputType
== MH_DYLIB
&& !config
->hasReexports
)
107 hdr
->flags
|= MH_NO_REEXPORTED_DYLIBS
;
109 if (config
->markDeadStrippableDylib
)
110 hdr
->flags
|= MH_DEAD_STRIPPABLE_DYLIB
;
112 if (config
->outputType
== MH_EXECUTE
&& config
->isPic
)
113 hdr
->flags
|= MH_PIE
;
115 if (config
->outputType
== MH_DYLIB
&& config
->applicationExtension
)
116 hdr
->flags
|= MH_APP_EXTENSION_SAFE
;
118 if (in
.exports
->hasWeakSymbol
|| in
.weakBinding
->hasNonWeakDefinition())
119 hdr
->flags
|= MH_WEAK_DEFINES
;
121 if (in
.exports
->hasWeakSymbol
|| in
.weakBinding
->hasEntry())
122 hdr
->flags
|= MH_BINDS_TO_WEAK
;
124 for (const OutputSegment
*seg
: outputSegments
) {
125 for (const OutputSection
*osec
: seg
->getSections()) {
126 if (isThreadLocalVariables(osec
->flags
)) {
127 hdr
->flags
|= MH_HAS_TLV_DESCRIPTORS
;
133 uint8_t *p
= reinterpret_cast<uint8_t *>(hdr
) + target
->headerSize
;
134 for (const LoadCommand
*lc
: loadCommands
) {
140 PageZeroSection::PageZeroSection()
141 : SyntheticSection(segment_names::pageZero
, section_names::pageZero
) {}
143 RebaseSection::RebaseSection()
144 : LinkEditSection(segment_names::linkEdit
, section_names::rebase
) {}
148 OutputSegment
*segment
= nullptr;
150 uint64_t consecutiveCount
= 0;
154 // Rebase opcodes allow us to describe a contiguous sequence of rebase location
155 // using a single DO_REBASE opcode. To take advantage of it, we delay emitting
156 // `DO_REBASE` until we have reached the end of a contiguous sequence.
157 static void encodeDoRebase(Rebase
&rebase
, raw_svector_ostream
&os
) {
158 assert(rebase
.consecutiveCount
!= 0);
159 if (rebase
.consecutiveCount
<= REBASE_IMMEDIATE_MASK
) {
160 os
<< static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_IMM_TIMES
|
161 rebase
.consecutiveCount
);
163 os
<< static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_ULEB_TIMES
);
164 encodeULEB128(rebase
.consecutiveCount
, os
);
166 rebase
.consecutiveCount
= 0;
169 static void encodeRebase(const OutputSection
*osec
, uint64_t outSecOff
,
170 Rebase
&lastRebase
, raw_svector_ostream
&os
) {
171 OutputSegment
*seg
= osec
->parent
;
172 uint64_t offset
= osec
->getSegmentOffset() + outSecOff
;
173 if (lastRebase
.segment
!= seg
|| lastRebase
.offset
!= offset
) {
174 if (lastRebase
.consecutiveCount
!= 0)
175 encodeDoRebase(lastRebase
, os
);
177 if (lastRebase
.segment
!= seg
) {
178 os
<< static_cast<uint8_t>(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
|
180 encodeULEB128(offset
, os
);
181 lastRebase
.segment
= seg
;
182 lastRebase
.offset
= offset
;
184 assert(lastRebase
.offset
!= offset
);
185 os
<< static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_ULEB
);
186 encodeULEB128(offset
- lastRebase
.offset
, os
);
187 lastRebase
.offset
= offset
;
190 ++lastRebase
.consecutiveCount
;
191 // DO_REBASE causes dyld to both perform the binding and increment the offset
192 lastRebase
.offset
+= target
->wordSize
;
195 void RebaseSection::finalizeContents() {
196 if (locations
.empty())
199 raw_svector_ostream os
{contents
};
202 os
<< static_cast<uint8_t>(REBASE_OPCODE_SET_TYPE_IMM
| REBASE_TYPE_POINTER
);
204 llvm::sort(locations
, [](const Location
&a
, const Location
&b
) {
205 return a
.isec
->getVA(a
.offset
) < b
.isec
->getVA(b
.offset
);
207 for (const Location
&loc
: locations
)
208 encodeRebase(loc
.isec
->parent
, loc
.isec
->getOffset(loc
.offset
), lastRebase
,
210 if (lastRebase
.consecutiveCount
!= 0)
211 encodeDoRebase(lastRebase
, os
);
213 os
<< static_cast<uint8_t>(REBASE_OPCODE_DONE
);
216 void RebaseSection::writeTo(uint8_t *buf
) const {
217 memcpy(buf
, contents
.data(), contents
.size());
220 NonLazyPointerSectionBase::NonLazyPointerSectionBase(const char *segname
,
222 : SyntheticSection(segname
, name
) {
223 align
= target
->wordSize
;
226 void macho::addNonLazyBindingEntries(const Symbol
*sym
,
227 const InputSection
*isec
, uint64_t offset
,
229 if (const auto *dysym
= dyn_cast
<DylibSymbol
>(sym
)) {
230 in
.binding
->addEntry(dysym
, isec
, offset
, addend
);
231 if (dysym
->isWeakDef())
232 in
.weakBinding
->addEntry(sym
, isec
, offset
, addend
);
233 } else if (const auto *defined
= dyn_cast
<Defined
>(sym
)) {
234 in
.rebase
->addEntry(isec
, offset
);
235 if (defined
->isExternalWeakDef())
236 in
.weakBinding
->addEntry(sym
, isec
, offset
, addend
);
238 // Undefined symbols are filtered out in scanRelocations(); we should never
240 llvm_unreachable("cannot bind to an undefined symbol");
244 void NonLazyPointerSectionBase::addEntry(Symbol
*sym
) {
245 if (entries
.insert(sym
)) {
246 assert(!sym
->isInGot());
247 sym
->gotIndex
= entries
.size() - 1;
249 addNonLazyBindingEntries(sym
, isec
, sym
->gotIndex
* target
->wordSize
);
253 void NonLazyPointerSectionBase::writeTo(uint8_t *buf
) const {
254 for (size_t i
= 0, n
= entries
.size(); i
< n
; ++i
)
255 if (auto *defined
= dyn_cast
<Defined
>(entries
[i
]))
256 write64le(&buf
[i
* target
->wordSize
], defined
->getVA());
259 GotSection::GotSection()
260 : NonLazyPointerSectionBase(segment_names::dataConst
, section_names::got
) {
261 flags
= S_NON_LAZY_SYMBOL_POINTERS
;
264 TlvPointerSection::TlvPointerSection()
265 : NonLazyPointerSectionBase(segment_names::data
,
266 section_names::threadPtrs
) {
267 flags
= S_THREAD_LOCAL_VARIABLE_POINTERS
;
270 BindingSection::BindingSection()
271 : LinkEditSection(segment_names::linkEdit
, section_names::binding
) {}
275 OutputSegment
*segment
= nullptr;
280 // Default value of 0xF0 is not valid opcode and should make the program
281 // scream instead of accidentally writing "valid" values.
282 uint8_t opcode
= 0xF0;
284 uint64_t consecutiveCount
= 0;
288 // Encode a sequence of opcodes that tell dyld to write the address of symbol +
289 // addend at osec->addr + outSecOff.
291 // The bind opcode "interpreter" remembers the values of each binding field, so
292 // we only need to encode the differences between bindings. Hence the use of
294 static void encodeBinding(const OutputSection
*osec
, uint64_t outSecOff
,
295 int64_t addend
, Binding
&lastBinding
,
296 std::vector
<BindIR
> &opcodes
) {
297 OutputSegment
*seg
= osec
->parent
;
298 uint64_t offset
= osec
->getSegmentOffset() + outSecOff
;
299 if (lastBinding
.segment
!= seg
) {
301 {static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
|
304 lastBinding
.segment
= seg
;
305 lastBinding
.offset
= offset
;
306 } else if (lastBinding
.offset
!= offset
) {
307 opcodes
.push_back({BIND_OPCODE_ADD_ADDR_ULEB
, offset
- lastBinding
.offset
});
308 lastBinding
.offset
= offset
;
311 if (lastBinding
.addend
!= addend
) {
313 {BIND_OPCODE_SET_ADDEND_SLEB
, static_cast<uint64_t>(addend
)});
314 lastBinding
.addend
= addend
;
317 opcodes
.push_back({BIND_OPCODE_DO_BIND
, 0});
318 // DO_BIND causes dyld to both perform the binding and increment the offset
319 lastBinding
.offset
+= target
->wordSize
;
322 static void optimizeOpcodes(std::vector
<BindIR
> &opcodes
) {
323 // Pass 1: Combine bind/add pairs
326 for (i
= 1; i
< opcodes
.size(); ++i
, ++pWrite
) {
327 if ((opcodes
[i
].opcode
== BIND_OPCODE_ADD_ADDR_ULEB
) &&
328 (opcodes
[i
- 1].opcode
== BIND_OPCODE_DO_BIND
)) {
329 opcodes
[pWrite
].opcode
= BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB
;
330 opcodes
[pWrite
].data
= opcodes
[i
].data
;
333 opcodes
[pWrite
] = opcodes
[i
- 1];
336 if (i
== opcodes
.size())
337 opcodes
[pWrite
] = opcodes
[i
- 1];
338 opcodes
.resize(pWrite
+ 1);
340 // Pass 2: Compress two or more bind_add opcodes
342 for (i
= 1; i
< opcodes
.size(); ++i
, ++pWrite
) {
343 if ((opcodes
[i
].opcode
== BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB
) &&
344 (opcodes
[i
- 1].opcode
== BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB
) &&
345 (opcodes
[i
].data
== opcodes
[i
- 1].data
)) {
346 opcodes
[pWrite
].opcode
= BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB
;
347 opcodes
[pWrite
].consecutiveCount
= 2;
348 opcodes
[pWrite
].data
= opcodes
[i
].data
;
350 while (i
< opcodes
.size() &&
351 (opcodes
[i
].opcode
== BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB
) &&
352 (opcodes
[i
].data
== opcodes
[i
- 1].data
)) {
353 opcodes
[pWrite
].consecutiveCount
++;
357 opcodes
[pWrite
] = opcodes
[i
- 1];
360 if (i
== opcodes
.size())
361 opcodes
[pWrite
] = opcodes
[i
- 1];
362 opcodes
.resize(pWrite
+ 1);
364 // Pass 3: Use immediate encodings
365 // Every binding is the size of one pointer. If the next binding is a
366 // multiple of wordSize away that is within BIND_IMMEDIATE_MASK, the
367 // opcode can be scaled by wordSize into a single byte and dyld will
368 // expand it to the correct address.
369 for (auto &p
: opcodes
) {
370 // It's unclear why the check needs to be less than BIND_IMMEDIATE_MASK,
371 // but ld64 currently does this. This could be a potential bug, but
372 // for now, perform the same behavior to prevent mysterious bugs.
373 if ((p
.opcode
== BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB
) &&
374 ((p
.data
/ target
->wordSize
) < BIND_IMMEDIATE_MASK
) &&
375 ((p
.data
% target
->wordSize
) == 0)) {
376 p
.opcode
= BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED
;
377 p
.data
/= target
->wordSize
;
382 static void flushOpcodes(const BindIR
&op
, raw_svector_ostream
&os
) {
383 uint8_t opcode
= op
.opcode
& BIND_OPCODE_MASK
;
385 case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
:
386 case BIND_OPCODE_ADD_ADDR_ULEB
:
387 case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB
:
389 encodeULEB128(op
.data
, os
);
391 case BIND_OPCODE_SET_ADDEND_SLEB
:
393 encodeSLEB128(static_cast<int64_t>(op
.data
), os
);
395 case BIND_OPCODE_DO_BIND
:
398 case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB
:
400 encodeULEB128(op
.consecutiveCount
, os
);
401 encodeULEB128(op
.data
, os
);
403 case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED
:
404 os
<< static_cast<uint8_t>(op
.opcode
| op
.data
);
407 llvm_unreachable("cannot bind to an unrecognized symbol");
411 // Non-weak bindings need to have their dylib ordinal encoded as well.
412 static int16_t ordinalForDylibSymbol(const DylibSymbol
&dysym
) {
413 if (config
->namespaceKind
== NamespaceKind::flat
|| dysym
.isDynamicLookup())
414 return static_cast<int16_t>(BIND_SPECIAL_DYLIB_FLAT_LOOKUP
);
415 assert(dysym
.getFile()->isReferenced());
416 return dysym
.getFile()->ordinal
;
419 static void encodeDylibOrdinal(int16_t ordinal
, raw_svector_ostream
&os
) {
421 os
<< static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM
|
422 (ordinal
& BIND_IMMEDIATE_MASK
));
423 } else if (ordinal
<= BIND_IMMEDIATE_MASK
) {
424 os
<< static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM
| ordinal
);
426 os
<< static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB
);
427 encodeULEB128(ordinal
, os
);
431 static void encodeWeakOverride(const Defined
*defined
,
432 raw_svector_ostream
&os
) {
433 os
<< static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
|
434 BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION
)
435 << defined
->getName() << '\0';
438 // Organize the bindings so we can encoded them with fewer opcodes.
440 // First, all bindings for a given symbol should be grouped together.
441 // BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM is the largest opcode (since it
442 // has an associated symbol string), so we only want to emit it once per symbol.
444 // Within each group, we sort the bindings by address. Since bindings are
445 // delta-encoded, sorting them allows for a more compact result. Note that
446 // sorting by address alone ensures that bindings for the same segment / section
447 // are located together, minimizing the number of times we have to emit
448 // BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB.
450 // Finally, we sort the symbols by the address of their first binding, again
451 // to facilitate the delta-encoding process.
453 std::vector
<std::pair
<const Sym
*, std::vector
<BindingEntry
>>>
454 sortBindings(const BindingsMap
<const Sym
*> &bindingsMap
) {
455 std::vector
<std::pair
<const Sym
*, std::vector
<BindingEntry
>>> bindingsVec(
456 bindingsMap
.begin(), bindingsMap
.end());
457 for (auto &p
: bindingsVec
) {
458 std::vector
<BindingEntry
> &bindings
= p
.second
;
459 llvm::sort(bindings
, [](const BindingEntry
&a
, const BindingEntry
&b
) {
460 return a
.target
.getVA() < b
.target
.getVA();
463 llvm::sort(bindingsVec
, [](const auto &a
, const auto &b
) {
464 return a
.second
[0].target
.getVA() < b
.second
[0].target
.getVA();
469 // Emit bind opcodes, which are a stream of byte-sized opcodes that dyld
470 // interprets to update a record with the following fields:
471 // * segment index (of the segment to write the symbol addresses to, typically
472 // the __DATA_CONST segment which contains the GOT)
473 // * offset within the segment, indicating the next location to write a binding
475 // * symbol library ordinal (the index of its library's LC_LOAD_DYLIB command)
478 // When dyld sees BIND_OPCODE_DO_BIND, it uses the current record state to bind
479 // a symbol in the GOT, and increments the segment offset to point to the next
480 // entry. It does *not* clear the record state after doing the bind, so
481 // subsequent opcodes only need to encode the differences between bindings.
482 void BindingSection::finalizeContents() {
483 raw_svector_ostream os
{contents
};
485 int16_t lastOrdinal
= 0;
487 for (auto &p
: sortBindings(bindingsMap
)) {
488 const DylibSymbol
*sym
= p
.first
;
489 std::vector
<BindingEntry
> &bindings
= p
.second
;
490 uint8_t flags
= BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
;
491 if (sym
->isWeakRef())
492 flags
|= BIND_SYMBOL_FLAGS_WEAK_IMPORT
;
493 os
<< flags
<< sym
->getName() << '\0'
494 << static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM
| BIND_TYPE_POINTER
);
495 int16_t ordinal
= ordinalForDylibSymbol(*sym
);
496 if (ordinal
!= lastOrdinal
) {
497 encodeDylibOrdinal(ordinal
, os
);
498 lastOrdinal
= ordinal
;
500 std::vector
<BindIR
> opcodes
;
501 for (const BindingEntry
&b
: bindings
)
502 encodeBinding(b
.target
.isec
->parent
,
503 b
.target
.isec
->getOffset(b
.target
.offset
), b
.addend
,
504 lastBinding
, opcodes
);
505 if (config
->optimize
> 1)
506 optimizeOpcodes(opcodes
);
507 for (const auto &op
: opcodes
)
508 flushOpcodes(op
, os
);
510 if (!bindingsMap
.empty())
511 os
<< static_cast<uint8_t>(BIND_OPCODE_DONE
);
514 void BindingSection::writeTo(uint8_t *buf
) const {
515 memcpy(buf
, contents
.data(), contents
.size());
518 WeakBindingSection::WeakBindingSection()
519 : LinkEditSection(segment_names::linkEdit
, section_names::weakBinding
) {}
521 void WeakBindingSection::finalizeContents() {
522 raw_svector_ostream os
{contents
};
525 for (const Defined
*defined
: definitions
)
526 encodeWeakOverride(defined
, os
);
528 for (auto &p
: sortBindings(bindingsMap
)) {
529 const Symbol
*sym
= p
.first
;
530 std::vector
<BindingEntry
> &bindings
= p
.second
;
531 os
<< static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
)
532 << sym
->getName() << '\0'
533 << static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM
| BIND_TYPE_POINTER
);
534 std::vector
<BindIR
> opcodes
;
535 for (const BindingEntry
&b
: bindings
)
536 encodeBinding(b
.target
.isec
->parent
,
537 b
.target
.isec
->getOffset(b
.target
.offset
), b
.addend
,
538 lastBinding
, opcodes
);
539 if (config
->optimize
> 1)
540 optimizeOpcodes(opcodes
);
541 for (const auto &op
: opcodes
)
542 flushOpcodes(op
, os
);
544 if (!bindingsMap
.empty() || !definitions
.empty())
545 os
<< static_cast<uint8_t>(BIND_OPCODE_DONE
);
548 void WeakBindingSection::writeTo(uint8_t *buf
) const {
549 memcpy(buf
, contents
.data(), contents
.size());
552 StubsSection::StubsSection()
553 : SyntheticSection(segment_names::text
, section_names::stubs
) {
554 flags
= S_SYMBOL_STUBS
| S_ATTR_SOME_INSTRUCTIONS
| S_ATTR_PURE_INSTRUCTIONS
;
555 // The stubs section comprises machine instructions, which are aligned to
556 // 4 bytes on the archs we care about.
558 reserved2
= target
->stubSize
;
561 uint64_t StubsSection::getSize() const {
562 return entries
.size() * target
->stubSize
;
565 void StubsSection::writeTo(uint8_t *buf
) const {
567 for (const Symbol
*sym
: entries
) {
568 target
->writeStub(buf
+ off
, *sym
);
569 off
+= target
->stubSize
;
573 void StubsSection::finalize() { isFinal
= true; }
575 bool StubsSection::addEntry(Symbol
*sym
) {
576 bool inserted
= entries
.insert(sym
);
578 sym
->stubsIndex
= entries
.size() - 1;
582 StubHelperSection::StubHelperSection()
583 : SyntheticSection(segment_names::text
, section_names::stubHelper
) {
584 flags
= S_ATTR_SOME_INSTRUCTIONS
| S_ATTR_PURE_INSTRUCTIONS
;
585 align
= 4; // This section comprises machine instructions
588 uint64_t StubHelperSection::getSize() const {
589 return target
->stubHelperHeaderSize
+
590 in
.lazyBinding
->getEntries().size() * target
->stubHelperEntrySize
;
593 bool StubHelperSection::isNeeded() const { return in
.lazyBinding
->isNeeded(); }
595 void StubHelperSection::writeTo(uint8_t *buf
) const {
596 target
->writeStubHelperHeader(buf
);
597 size_t off
= target
->stubHelperHeaderSize
;
598 for (const DylibSymbol
*sym
: in
.lazyBinding
->getEntries()) {
599 target
->writeStubHelperEntry(buf
+ off
, *sym
, addr
+ off
);
600 off
+= target
->stubHelperEntrySize
;
604 void StubHelperSection::setup() {
605 Symbol
*binder
= symtab
->addUndefined("dyld_stub_binder", /*file=*/nullptr,
606 /*isWeakRef=*/false);
607 if (auto *undefined
= dyn_cast
<Undefined
>(binder
))
608 treatUndefinedSymbol(*undefined
,
609 "lazy binding (normally in libSystem.dylib)");
611 // treatUndefinedSymbol() can replace binder with a DylibSymbol; re-check.
612 stubBinder
= dyn_cast_or_null
<DylibSymbol
>(binder
);
613 if (stubBinder
== nullptr)
616 in
.got
->addEntry(stubBinder
);
618 in
.imageLoaderCache
->parent
=
619 ConcatOutputSection::getOrCreateForInput(in
.imageLoaderCache
);
620 inputSections
.push_back(in
.imageLoaderCache
);
621 // Since this isn't in the symbol table or in any input file, the noDeadStrip
622 // argument doesn't matter. It's kept alive by ImageLoaderCacheSection()
623 // setting `live` to true on the backing InputSection.
625 make
<Defined
>("__dyld_private", nullptr, in
.imageLoaderCache
, 0, 0,
627 /*isExternal=*/false, /*isPrivateExtern=*/false,
628 /*isThumb=*/false, /*isReferencedDynamically=*/false,
629 /*noDeadStrip=*/false);
632 LazyPointerSection::LazyPointerSection()
633 : SyntheticSection(segment_names::data
, section_names::lazySymbolPtr
) {
634 align
= target
->wordSize
;
635 flags
= S_LAZY_SYMBOL_POINTERS
;
638 uint64_t LazyPointerSection::getSize() const {
639 return in
.stubs
->getEntries().size() * target
->wordSize
;
642 bool LazyPointerSection::isNeeded() const {
643 return !in
.stubs
->getEntries().empty();
646 void LazyPointerSection::writeTo(uint8_t *buf
) const {
648 for (const Symbol
*sym
: in
.stubs
->getEntries()) {
649 if (const auto *dysym
= dyn_cast
<DylibSymbol
>(sym
)) {
650 if (dysym
->hasStubsHelper()) {
651 uint64_t stubHelperOffset
=
652 target
->stubHelperHeaderSize
+
653 dysym
->stubsHelperIndex
* target
->stubHelperEntrySize
;
654 write64le(buf
+ off
, in
.stubHelper
->addr
+ stubHelperOffset
);
657 write64le(buf
+ off
, sym
->getVA());
659 off
+= target
->wordSize
;
663 LazyBindingSection::LazyBindingSection()
664 : LinkEditSection(segment_names::linkEdit
, section_names::lazyBinding
) {}
666 void LazyBindingSection::finalizeContents() {
667 // TODO: Just precompute output size here instead of writing to a temporary
669 for (DylibSymbol
*sym
: entries
)
670 sym
->lazyBindOffset
= encode(*sym
);
673 void LazyBindingSection::writeTo(uint8_t *buf
) const {
674 memcpy(buf
, contents
.data(), contents
.size());
677 void LazyBindingSection::addEntry(DylibSymbol
*dysym
) {
678 if (entries
.insert(dysym
)) {
679 dysym
->stubsHelperIndex
= entries
.size() - 1;
680 in
.rebase
->addEntry(in
.lazyPointers
->isec
,
681 dysym
->stubsIndex
* target
->wordSize
);
685 // Unlike the non-lazy binding section, the bind opcodes in this section aren't
686 // interpreted all at once. Rather, dyld will start interpreting opcodes at a
687 // given offset, typically only binding a single symbol before it finds a
688 // BIND_OPCODE_DONE terminator. As such, unlike in the non-lazy-binding case,
689 // we cannot encode just the differences between symbols; we have to emit the
690 // complete bind information for each symbol.
691 uint32_t LazyBindingSection::encode(const DylibSymbol
&sym
) {
692 uint32_t opstreamOffset
= contents
.size();
693 OutputSegment
*dataSeg
= in
.lazyPointers
->parent
;
694 os
<< static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
|
696 uint64_t offset
= in
.lazyPointers
->addr
- dataSeg
->addr
+
697 sym
.stubsIndex
* target
->wordSize
;
698 encodeULEB128(offset
, os
);
699 encodeDylibOrdinal(ordinalForDylibSymbol(sym
), os
);
701 uint8_t flags
= BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
;
703 flags
|= BIND_SYMBOL_FLAGS_WEAK_IMPORT
;
705 os
<< flags
<< sym
.getName() << '\0'
706 << static_cast<uint8_t>(BIND_OPCODE_DO_BIND
)
707 << static_cast<uint8_t>(BIND_OPCODE_DONE
);
708 return opstreamOffset
;
711 ExportSection::ExportSection()
712 : LinkEditSection(segment_names::linkEdit
, section_names::export_
) {}
714 void ExportSection::finalizeContents() {
715 trieBuilder
.setImageBase(in
.header
->addr
);
716 for (const Symbol
*sym
: symtab
->getSymbols()) {
717 if (const auto *defined
= dyn_cast
<Defined
>(sym
)) {
718 if (defined
->privateExtern
|| !defined
->isLive())
720 trieBuilder
.addSymbol(*defined
);
721 hasWeakSymbol
= hasWeakSymbol
|| sym
->isWeakDef();
724 size
= trieBuilder
.build();
727 void ExportSection::writeTo(uint8_t *buf
) const { trieBuilder
.writeTo(buf
); }
729 DataInCodeSection::DataInCodeSection()
730 : LinkEditSection(segment_names::linkEdit
, section_names::dataInCode
) {}
733 static std::vector
<MachO::data_in_code_entry
> collectDataInCodeEntries() {
734 using SegmentCommand
= typename
LP::segment_command
;
735 using Section
= typename
LP::section
;
737 std::vector
<MachO::data_in_code_entry
> dataInCodeEntries
;
738 for (const InputFile
*inputFile
: inputFiles
) {
739 if (!isa
<ObjFile
>(inputFile
))
741 const ObjFile
*objFile
= cast
<ObjFile
>(inputFile
);
742 const auto *c
= reinterpret_cast<const SegmentCommand
*>(
743 findCommand(objFile
->mb
.getBufferStart(), LP::segmentLCType
));
746 ArrayRef
<Section
> sections
{reinterpret_cast<const Section
*>(c
+ 1),
749 ArrayRef
<MachO::data_in_code_entry
> entries
= objFile
->dataInCodeEntries
;
752 // For each code subsection find 'data in code' entries residing in it.
753 // Compute the new offset values as
754 // <offset within subsection> + <subsection address> - <__TEXT address>.
755 for (size_t i
= 0, n
= sections
.size(); i
< n
; ++i
) {
756 const SubsectionMap
&subsecMap
= objFile
->subsections
[i
];
757 for (const SubsectionEntry
&subsecEntry
: subsecMap
) {
758 const InputSection
*isec
= subsecEntry
.isec
;
759 if (!isCodeSection(isec
))
761 if (cast
<ConcatInputSection
>(isec
)->shouldOmitFromOutput())
763 const uint64_t beginAddr
= sections
[i
].addr
+ subsecEntry
.offset
;
764 auto it
= llvm::lower_bound(
766 [](const MachO::data_in_code_entry
&entry
, uint64_t addr
) {
767 return entry
.offset
< addr
;
769 const uint64_t endAddr
= beginAddr
+ isec
->getFileSize();
770 for (const auto end
= entries
.end();
771 it
!= end
&& it
->offset
+ it
->length
<= endAddr
; ++it
)
772 dataInCodeEntries
.push_back(
773 {static_cast<uint32_t>(isec
->getVA(it
->offset
- beginAddr
) -
775 it
->length
, it
->kind
});
779 return dataInCodeEntries
;
782 void DataInCodeSection::finalizeContents() {
783 entries
= target
->wordSize
== 8 ? collectDataInCodeEntries
<LP64
>()
784 : collectDataInCodeEntries
<ILP32
>();
787 void DataInCodeSection::writeTo(uint8_t *buf
) const {
788 if (!entries
.empty())
789 memcpy(buf
, entries
.data(), getRawSize());
792 FunctionStartsSection::FunctionStartsSection()
793 : LinkEditSection(segment_names::linkEdit
, section_names::functionStarts
) {}
795 void FunctionStartsSection::finalizeContents() {
796 raw_svector_ostream os
{contents
};
797 std::vector
<uint64_t> addrs
;
798 for (const Symbol
*sym
: symtab
->getSymbols()) {
799 if (const auto *defined
= dyn_cast
<Defined
>(sym
)) {
800 if (!defined
->isec
|| !isCodeSection(defined
->isec
) || !defined
->isLive())
802 if (const auto *concatIsec
= dyn_cast
<ConcatInputSection
>(defined
->isec
))
803 if (concatIsec
->shouldOmitFromOutput())
805 // TODO: Add support for thumbs, in that case
806 // the lowest bit of nextAddr needs to be set to 1.
807 addrs
.push_back(defined
->getVA());
811 uint64_t addr
= in
.header
->addr
;
812 for (uint64_t nextAddr
: addrs
) {
813 uint64_t delta
= nextAddr
- addr
;
816 encodeULEB128(delta
, os
);
822 void FunctionStartsSection::writeTo(uint8_t *buf
) const {
823 memcpy(buf
, contents
.data(), contents
.size());
826 SymtabSection::SymtabSection(StringTableSection
&stringTableSection
)
827 : LinkEditSection(segment_names::linkEdit
, section_names::symbolTable
),
828 stringTableSection(stringTableSection
) {}
830 void SymtabSection::emitBeginSourceStab(DWARFUnit
*compileUnit
) {
831 StabsEntry
stab(N_SO
);
832 SmallString
<261> dir(compileUnit
->getCompilationDir());
833 StringRef sep
= sys::path::get_separator();
834 // We don't use `path::append` here because we want an empty `dir` to result
835 // in an absolute path. `append` would give us a relative path for that case.
836 if (!dir
.endswith(sep
))
838 stab
.strx
= stringTableSection
.addString(
839 saver
.save(dir
+ compileUnit
->getUnitDIE().getShortName()));
840 stabs
.emplace_back(std::move(stab
));
843 void SymtabSection::emitEndSourceStab() {
844 StabsEntry
stab(N_SO
);
846 stabs
.emplace_back(std::move(stab
));
849 void SymtabSection::emitObjectFileStab(ObjFile
*file
) {
850 StabsEntry
stab(N_OSO
);
851 stab
.sect
= target
->cpuSubtype
;
852 SmallString
<261> path(!file
->archiveName
.empty() ? file
->archiveName
854 std::error_code ec
= sys::fs::make_absolute(path
);
856 fatal("failed to get absolute path for " + path
);
858 if (!file
->archiveName
.empty())
859 path
.append({"(", file
->getName(), ")"});
861 stab
.strx
= stringTableSection
.addString(saver
.save(path
.str()));
863 stab
.value
= file
->modTime
;
864 stabs
.emplace_back(std::move(stab
));
867 void SymtabSection::emitEndFunStab(Defined
*defined
) {
868 StabsEntry
stab(N_FUN
);
869 stab
.value
= defined
->size
;
870 stabs
.emplace_back(std::move(stab
));
873 void SymtabSection::emitStabs() {
874 for (const std::string
&s
: config
->astPaths
) {
875 StabsEntry
astStab(N_AST
);
876 astStab
.strx
= stringTableSection
.addString(s
);
877 stabs
.emplace_back(std::move(astStab
));
880 std::vector
<Defined
*> symbolsNeedingStabs
;
881 for (const SymtabEntry
&entry
:
882 concat
<SymtabEntry
>(localSymbols
, externalSymbols
)) {
883 Symbol
*sym
= entry
.sym
;
884 assert(sym
->isLive() &&
885 "dead symbols should not be in localSymbols, externalSymbols");
886 if (auto *defined
= dyn_cast
<Defined
>(sym
)) {
887 if (defined
->isAbsolute())
889 InputSection
*isec
= defined
->isec
;
890 ObjFile
*file
= dyn_cast_or_null
<ObjFile
>(isec
->getFile());
891 if (!file
|| !file
->compileUnit
)
893 symbolsNeedingStabs
.push_back(defined
);
897 llvm::stable_sort(symbolsNeedingStabs
, [&](Defined
*a
, Defined
*b
) {
898 return a
->isec
->getFile()->id
< b
->isec
->getFile()->id
;
901 // Emit STABS symbols so that dsymutil and/or the debugger can map address
902 // regions in the final binary to the source and object files from which they
904 InputFile
*lastFile
= nullptr;
905 for (Defined
*defined
: symbolsNeedingStabs
) {
906 InputSection
*isec
= defined
->isec
;
907 ObjFile
*file
= cast
<ObjFile
>(isec
->getFile());
909 if (lastFile
== nullptr || lastFile
!= file
) {
910 if (lastFile
!= nullptr)
914 emitBeginSourceStab(file
->compileUnit
);
915 emitObjectFileStab(file
);
919 symStab
.sect
= defined
->isec
->canonical()->parent
->index
;
920 symStab
.strx
= stringTableSection
.addString(defined
->getName());
921 symStab
.value
= defined
->getVA();
923 if (isCodeSection(isec
)) {
924 symStab
.type
= N_FUN
;
925 stabs
.emplace_back(std::move(symStab
));
926 emitEndFunStab(defined
);
928 symStab
.type
= defined
->isExternal() ? N_GSYM
: N_STSYM
;
929 stabs
.emplace_back(std::move(symStab
));
937 void SymtabSection::finalizeContents() {
938 auto addSymbol
= [&](std::vector
<SymtabEntry
> &symbols
, Symbol
*sym
) {
939 uint32_t strx
= stringTableSection
.addString(sym
->getName());
940 symbols
.push_back({sym
, strx
});
943 // Local symbols aren't in the SymbolTable, so we walk the list of object
944 // files to gather them.
945 for (const InputFile
*file
: inputFiles
) {
946 if (auto *objFile
= dyn_cast
<ObjFile
>(file
)) {
947 for (Symbol
*sym
: objFile
->symbols
) {
948 if (auto *defined
= dyn_cast_or_null
<Defined
>(sym
)) {
949 if (!defined
->isExternal() && defined
->isLive()) {
950 StringRef name
= defined
->getName();
951 if (!name
.startswith("l") && !name
.startswith("L"))
952 addSymbol(localSymbols
, sym
);
959 // __dyld_private is a local symbol too. It's linker-created and doesn't
960 // exist in any object file.
961 if (Defined
*dyldPrivate
= in
.stubHelper
->dyldPrivate
)
962 addSymbol(localSymbols
, dyldPrivate
);
964 for (Symbol
*sym
: symtab
->getSymbols()) {
967 if (auto *defined
= dyn_cast
<Defined
>(sym
)) {
968 if (!defined
->includeInSymtab
)
970 assert(defined
->isExternal());
971 if (defined
->privateExtern
)
972 addSymbol(localSymbols
, defined
);
974 addSymbol(externalSymbols
, defined
);
975 } else if (auto *dysym
= dyn_cast
<DylibSymbol
>(sym
)) {
976 if (dysym
->isReferenced())
977 addSymbol(undefinedSymbols
, sym
);
982 uint32_t symtabIndex
= stabs
.size();
983 for (const SymtabEntry
&entry
:
984 concat
<SymtabEntry
>(localSymbols
, externalSymbols
, undefinedSymbols
)) {
985 entry
.sym
->symtabIndex
= symtabIndex
++;
989 uint32_t SymtabSection::getNumSymbols() const {
990 return stabs
.size() + localSymbols
.size() + externalSymbols
.size() +
991 undefinedSymbols
.size();
994 // This serves to hide (type-erase) the template parameter from SymtabSection.
995 template <class LP
> class SymtabSectionImpl final
: public SymtabSection
{
997 SymtabSectionImpl(StringTableSection
&stringTableSection
)
998 : SymtabSection(stringTableSection
) {}
999 uint64_t getRawSize() const override
;
1000 void writeTo(uint8_t *buf
) const override
;
1003 template <class LP
> uint64_t SymtabSectionImpl
<LP
>::getRawSize() const {
1004 return getNumSymbols() * sizeof(typename
LP::nlist
);
1007 template <class LP
> void SymtabSectionImpl
<LP
>::writeTo(uint8_t *buf
) const {
1008 auto *nList
= reinterpret_cast<typename
LP::nlist
*>(buf
);
1009 // Emit the stabs entries before the "real" symbols. We cannot emit them
1010 // after as that would render Symbol::symtabIndex inaccurate.
1011 for (const StabsEntry
&entry
: stabs
) {
1012 nList
->n_strx
= entry
.strx
;
1013 nList
->n_type
= entry
.type
;
1014 nList
->n_sect
= entry
.sect
;
1015 nList
->n_desc
= entry
.desc
;
1016 nList
->n_value
= entry
.value
;
1020 for (const SymtabEntry
&entry
: concat
<const SymtabEntry
>(
1021 localSymbols
, externalSymbols
, undefinedSymbols
)) {
1022 nList
->n_strx
= entry
.strx
;
1023 // TODO populate n_desc with more flags
1024 if (auto *defined
= dyn_cast
<Defined
>(entry
.sym
)) {
1026 if (defined
->privateExtern
) {
1027 // Private external -- dylib scoped symbol.
1028 // Promote to non-external at link time.
1030 } else if (defined
->isExternal()) {
1031 // Normal global symbol.
1034 // TU-local symbol from localSymbols.
1038 if (defined
->isAbsolute()) {
1039 nList
->n_type
= scope
| N_ABS
;
1040 nList
->n_sect
= NO_SECT
;
1041 nList
->n_value
= defined
->value
;
1043 nList
->n_type
= scope
| N_SECT
;
1044 nList
->n_sect
= defined
->isec
->canonical()->parent
->index
;
1045 // For the N_SECT symbol type, n_value is the address of the symbol
1046 nList
->n_value
= defined
->getVA();
1048 nList
->n_desc
|= defined
->thumb
? N_ARM_THUMB_DEF
: 0;
1049 nList
->n_desc
|= defined
->isExternalWeakDef() ? N_WEAK_DEF
: 0;
1051 defined
->referencedDynamically
? REFERENCED_DYNAMICALLY
: 0;
1052 } else if (auto *dysym
= dyn_cast
<DylibSymbol
>(entry
.sym
)) {
1053 uint16_t n_desc
= nList
->n_desc
;
1054 int16_t ordinal
= ordinalForDylibSymbol(*dysym
);
1055 if (ordinal
== BIND_SPECIAL_DYLIB_FLAT_LOOKUP
)
1056 SET_LIBRARY_ORDINAL(n_desc
, DYNAMIC_LOOKUP_ORDINAL
);
1057 else if (ordinal
== BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE
)
1058 SET_LIBRARY_ORDINAL(n_desc
, EXECUTABLE_ORDINAL
);
1060 assert(ordinal
> 0);
1061 SET_LIBRARY_ORDINAL(n_desc
, static_cast<uint8_t>(ordinal
));
1064 nList
->n_type
= N_EXT
;
1065 n_desc
|= dysym
->isWeakDef() ? N_WEAK_DEF
: 0;
1066 n_desc
|= dysym
->isWeakRef() ? N_WEAK_REF
: 0;
1067 nList
->n_desc
= n_desc
;
1075 macho::makeSymtabSection(StringTableSection
&stringTableSection
) {
1076 return make
<SymtabSectionImpl
<LP
>>(stringTableSection
);
1079 IndirectSymtabSection::IndirectSymtabSection()
1080 : LinkEditSection(segment_names::linkEdit
,
1081 section_names::indirectSymbolTable
) {}
1083 uint32_t IndirectSymtabSection::getNumSymbols() const {
1084 return in
.got
->getEntries().size() + in
.tlvPointers
->getEntries().size() +
1085 2 * in
.stubs
->getEntries().size();
1088 bool IndirectSymtabSection::isNeeded() const {
1089 return in
.got
->isNeeded() || in
.tlvPointers
->isNeeded() ||
1090 in
.stubs
->isNeeded();
1093 void IndirectSymtabSection::finalizeContents() {
1095 in
.got
->reserved1
= off
;
1096 off
+= in
.got
->getEntries().size();
1097 in
.tlvPointers
->reserved1
= off
;
1098 off
+= in
.tlvPointers
->getEntries().size();
1099 in
.stubs
->reserved1
= off
;
1100 off
+= in
.stubs
->getEntries().size();
1101 in
.lazyPointers
->reserved1
= off
;
1104 static uint32_t indirectValue(const Symbol
*sym
) {
1105 return sym
->symtabIndex
!= UINT32_MAX
? sym
->symtabIndex
1106 : INDIRECT_SYMBOL_LOCAL
;
1109 void IndirectSymtabSection::writeTo(uint8_t *buf
) const {
1111 for (const Symbol
*sym
: in
.got
->getEntries()) {
1112 write32le(buf
+ off
* sizeof(uint32_t), indirectValue(sym
));
1115 for (const Symbol
*sym
: in
.tlvPointers
->getEntries()) {
1116 write32le(buf
+ off
* sizeof(uint32_t), indirectValue(sym
));
1119 for (const Symbol
*sym
: in
.stubs
->getEntries()) {
1120 write32le(buf
+ off
* sizeof(uint32_t), indirectValue(sym
));
1123 // There is a 1:1 correspondence between stubs and LazyPointerSection
1124 // entries. But giving __stubs and __la_symbol_ptr the same reserved1
1125 // (the offset into the indirect symbol table) so that they both refer
1126 // to the same range of offsets confuses `strip`, so write the stubs
1127 // symbol table offsets a second time.
1128 for (const Symbol
*sym
: in
.stubs
->getEntries()) {
1129 write32le(buf
+ off
* sizeof(uint32_t), indirectValue(sym
));
1134 StringTableSection::StringTableSection()
1135 : LinkEditSection(segment_names::linkEdit
, section_names::stringTable
) {}
1137 uint32_t StringTableSection::addString(StringRef str
) {
1138 uint32_t strx
= size
;
1139 strings
.push_back(str
); // TODO: consider deduplicating strings
1140 size
+= str
.size() + 1; // account for null terminator
1144 void StringTableSection::writeTo(uint8_t *buf
) const {
1146 for (StringRef str
: strings
) {
1147 memcpy(buf
+ off
, str
.data(), str
.size());
1148 off
+= str
.size() + 1; // account for null terminator
1152 static_assert((CodeSignatureSection::blobHeadersSize
% 8) == 0, "");
1153 static_assert((CodeSignatureSection::fixedHeadersSize
% 8) == 0, "");
1155 CodeSignatureSection::CodeSignatureSection()
1156 : LinkEditSection(segment_names::linkEdit
, section_names::codeSignature
) {
1157 align
= 16; // required by libstuff
1158 // FIXME: Consider using finalOutput instead of outputFile.
1159 fileName
= config
->outputFile
;
1160 size_t slashIndex
= fileName
.rfind("/");
1161 if (slashIndex
!= std::string::npos
)
1162 fileName
= fileName
.drop_front(slashIndex
+ 1);
1163 allHeadersSize
= alignTo
<16>(fixedHeadersSize
+ fileName
.size() + 1);
1164 fileNamePad
= allHeadersSize
- fixedHeadersSize
- fileName
.size();
1167 uint32_t CodeSignatureSection::getBlockCount() const {
1168 return (fileOff
+ blockSize
- 1) / blockSize
;
1171 uint64_t CodeSignatureSection::getRawSize() const {
1172 return allHeadersSize
+ getBlockCount() * hashSize
;
1175 void CodeSignatureSection::writeHashes(uint8_t *buf
) const {
1176 uint8_t *code
= buf
;
1177 uint8_t *codeEnd
= buf
+ fileOff
;
1178 uint8_t *hashes
= codeEnd
+ allHeadersSize
;
1179 while (code
< codeEnd
) {
1180 StringRef
block(reinterpret_cast<char *>(code
),
1181 std::min(codeEnd
- code
, static_cast<ssize_t
>(blockSize
)));
1183 hasher
.update(block
);
1184 StringRef hash
= hasher
.final();
1185 assert(hash
.size() == hashSize
);
1186 memcpy(hashes
, hash
.data(), hashSize
);
1190 #if defined(__APPLE__)
1191 // This is macOS-specific work-around and makes no sense for any
1192 // other host OS. See https://openradar.appspot.com/FB8914231
1194 // The macOS kernel maintains a signature-verification cache to
1195 // quickly validate applications at time of execve(2). The trouble
1196 // is that for the kernel creates the cache entry at the time of the
1197 // mmap(2) call, before we have a chance to write either the code to
1198 // sign or the signature header+hashes. The fix is to invalidate
1199 // all cached data associated with the output file, thus discarding
1200 // the bogus prematurely-cached signature.
1201 msync(buf
, fileOff
+ getSize(), MS_INVALIDATE
);
1205 void CodeSignatureSection::writeTo(uint8_t *buf
) const {
1206 uint32_t signatureSize
= static_cast<uint32_t>(getSize());
1207 auto *superBlob
= reinterpret_cast<CS_SuperBlob
*>(buf
);
1208 write32be(&superBlob
->magic
, CSMAGIC_EMBEDDED_SIGNATURE
);
1209 write32be(&superBlob
->length
, signatureSize
);
1210 write32be(&superBlob
->count
, 1);
1211 auto *blobIndex
= reinterpret_cast<CS_BlobIndex
*>(&superBlob
[1]);
1212 write32be(&blobIndex
->type
, CSSLOT_CODEDIRECTORY
);
1213 write32be(&blobIndex
->offset
, blobHeadersSize
);
1214 auto *codeDirectory
=
1215 reinterpret_cast<CS_CodeDirectory
*>(buf
+ blobHeadersSize
);
1216 write32be(&codeDirectory
->magic
, CSMAGIC_CODEDIRECTORY
);
1217 write32be(&codeDirectory
->length
, signatureSize
- blobHeadersSize
);
1218 write32be(&codeDirectory
->version
, CS_SUPPORTSEXECSEG
);
1219 write32be(&codeDirectory
->flags
, CS_ADHOC
| CS_LINKER_SIGNED
);
1220 write32be(&codeDirectory
->hashOffset
,
1221 sizeof(CS_CodeDirectory
) + fileName
.size() + fileNamePad
);
1222 write32be(&codeDirectory
->identOffset
, sizeof(CS_CodeDirectory
));
1223 codeDirectory
->nSpecialSlots
= 0;
1224 write32be(&codeDirectory
->nCodeSlots
, getBlockCount());
1225 write32be(&codeDirectory
->codeLimit
, fileOff
);
1226 codeDirectory
->hashSize
= static_cast<uint8_t>(hashSize
);
1227 codeDirectory
->hashType
= kSecCodeSignatureHashSHA256
;
1228 codeDirectory
->platform
= 0;
1229 codeDirectory
->pageSize
= blockSizeShift
;
1230 codeDirectory
->spare2
= 0;
1231 codeDirectory
->scatterOffset
= 0;
1232 codeDirectory
->teamOffset
= 0;
1233 codeDirectory
->spare3
= 0;
1234 codeDirectory
->codeLimit64
= 0;
1235 OutputSegment
*textSeg
= getOrCreateOutputSegment(segment_names::text
);
1236 write64be(&codeDirectory
->execSegBase
, textSeg
->fileOff
);
1237 write64be(&codeDirectory
->execSegLimit
, textSeg
->fileSize
);
1238 write64be(&codeDirectory
->execSegFlags
,
1239 config
->outputType
== MH_EXECUTE
? CS_EXECSEG_MAIN_BINARY
: 0);
1240 auto *id
= reinterpret_cast<char *>(&codeDirectory
[1]);
1241 memcpy(id
, fileName
.begin(), fileName
.size());
1242 memset(id
+ fileName
.size(), 0, fileNamePad
);
1245 BitcodeBundleSection::BitcodeBundleSection()
1246 : SyntheticSection(segment_names::llvm
, section_names::bitcodeBundle
) {}
1248 class ErrorCodeWrapper
{
1250 explicit ErrorCodeWrapper(std::error_code ec
) : errorCode(ec
.value()) {}
1251 explicit ErrorCodeWrapper(int ec
) : errorCode(ec
) {}
1252 operator int() const { return errorCode
; }
1258 #define CHECK_EC(exp) \
1260 ErrorCodeWrapper ec(exp); \
1262 fatal(Twine("operation failed with error code ") + Twine(ec) + ": " + \
1266 void BitcodeBundleSection::finalize() {
1267 #ifdef LLVM_HAVE_LIBXAR
1268 using namespace llvm::sys::fs
;
1269 CHECK_EC(createTemporaryFile("bitcode-bundle", "xar", xarPath
));
1271 xar_t
xar(xar_open(xarPath
.data(), O_RDWR
));
1273 fatal("failed to open XAR temporary file at " + xarPath
);
1274 CHECK_EC(xar_opt_set(xar
, XAR_OPT_COMPRESSION
, XAR_OPT_VAL_NONE
));
1275 // FIXME: add more data to XAR
1276 CHECK_EC(xar_close(xar
));
1278 file_size(xarPath
, xarSize
);
1279 #endif // defined(LLVM_HAVE_LIBXAR)
1282 void BitcodeBundleSection::writeTo(uint8_t *buf
) const {
1283 using namespace llvm::sys::fs
;
1285 CHECK(openNativeFile(xarPath
, CD_OpenExisting
, FA_Read
, OF_None
),
1286 "failed to open XAR file");
1288 mapped_file_region
xarMap(handle
, mapped_file_region::mapmode::readonly
,
1291 fatal("failed to map XAR file");
1292 memcpy(buf
, xarMap
.const_data(), xarSize
);
1298 CStringSection::CStringSection()
1299 : SyntheticSection(segment_names::text
, section_names::cString
) {
1300 flags
= S_CSTRING_LITERALS
;
1303 void CStringSection::addInput(CStringInputSection
*isec
) {
1304 isec
->parent
= this;
1305 inputs
.push_back(isec
);
1306 if (isec
->align
> align
)
1307 align
= isec
->align
;
1310 void CStringSection::writeTo(uint8_t *buf
) const {
1311 for (const CStringInputSection
*isec
: inputs
) {
1312 for (size_t i
= 0, e
= isec
->pieces
.size(); i
!= e
; ++i
) {
1313 if (!isec
->pieces
[i
].live
)
1315 StringRef string
= isec
->getStringRef(i
);
1316 memcpy(buf
+ isec
->pieces
[i
].outSecOff
, string
.data(), string
.size());
1321 void CStringSection::finalizeContents() {
1322 uint64_t offset
= 0;
1323 for (CStringInputSection
*isec
: inputs
) {
1324 for (size_t i
= 0, e
= isec
->pieces
.size(); i
!= e
; ++i
) {
1325 if (!isec
->pieces
[i
].live
)
1327 uint32_t pieceAlign
= MinAlign(isec
->pieces
[i
].inSecOff
, align
);
1328 offset
= alignTo(offset
, pieceAlign
);
1329 isec
->pieces
[i
].outSecOff
= offset
;
1330 isec
->isFinal
= true;
1331 StringRef string
= isec
->getStringRef(i
);
1332 offset
+= string
.size();
1337 // Mergeable cstring literals are found under the __TEXT,__cstring section. In
1338 // contrast to ELF, which puts strings that need different alignments into
1339 // different sections, clang's Mach-O backend puts them all in one section.
1340 // Strings that need to be aligned have the .p2align directive emitted before
1341 // them, which simply translates into zero padding in the object file.
1343 // I *think* ld64 extracts the desired per-string alignment from this data by
1344 // preserving each string's offset from the last section-aligned address. I'm
1345 // not entirely certain since it doesn't seem consistent about doing this, and
1346 // in fact doesn't seem to be correct in general: we can in fact can induce ld64
1347 // to produce a crashing binary just by linking in an additional object file
1348 // that only contains a duplicate cstring at a different alignment. See PR50563
1351 // On x86_64, the cstrings we've seen so far that require special alignment are
1352 // all accessed by SIMD operations -- x86_64 requires SIMD accesses to be
1353 // 16-byte-aligned. arm64 also seems to require 16-byte-alignment in some cases
1354 // (PR50791), but I haven't tracked down the root cause. So for now, I'm just
1355 // aligning all strings to 16 bytes. This is indeed wasteful, but
1356 // implementation-wise it's simpler than preserving per-string
1357 // alignment+offsets. It also avoids the aforementioned crash after
1358 // deduplication of differently-aligned strings. Finally, the overhead is not
1359 // huge: using 16-byte alignment (vs no alignment) is only a 0.5% size overhead
1360 // when linking chromium_framework on x86_64.
1361 DeduplicatedCStringSection::DeduplicatedCStringSection()
1362 : builder(StringTableBuilder::RAW
, /*Alignment=*/16) {}
1364 void DeduplicatedCStringSection::finalizeContents() {
1365 // Add all string pieces to the string table builder to create section
1367 for (const CStringInputSection
*isec
: inputs
)
1368 for (size_t i
= 0, e
= isec
->pieces
.size(); i
!= e
; ++i
)
1369 if (isec
->pieces
[i
].live
)
1370 builder
.add(isec
->getCachedHashStringRef(i
));
1372 // Fix the string table content. After this, the contents will never change.
1373 builder
.finalizeInOrder();
1375 // finalize() fixed tail-optimized strings, so we can now get
1376 // offsets of strings. Get an offset for each string and save it
1377 // to a corresponding SectionPiece for easy access.
1378 for (CStringInputSection
*isec
: inputs
) {
1379 for (size_t i
= 0, e
= isec
->pieces
.size(); i
!= e
; ++i
) {
1380 if (!isec
->pieces
[i
].live
)
1382 isec
->pieces
[i
].outSecOff
=
1383 builder
.getOffset(isec
->getCachedHashStringRef(i
));
1384 isec
->isFinal
= true;
1389 // This section is actually emitted as __TEXT,__const by ld64, but clang may
1390 // emit input sections of that name, and LLD doesn't currently support mixing
1391 // synthetic and concat-type OutputSections. To work around this, I've given
1392 // our merged-literals section a different name.
1393 WordLiteralSection::WordLiteralSection()
1394 : SyntheticSection(segment_names::text
, section_names::literals
) {
1398 void WordLiteralSection::addInput(WordLiteralInputSection
*isec
) {
1399 isec
->parent
= this;
1400 inputs
.push_back(isec
);
1403 void WordLiteralSection::finalizeContents() {
1404 for (WordLiteralInputSection
*isec
: inputs
) {
1405 // We do all processing of the InputSection here, so it will be effectively
1407 isec
->isFinal
= true;
1408 const uint8_t *buf
= isec
->data
.data();
1409 switch (sectionType(isec
->getFlags())) {
1410 case S_4BYTE_LITERALS
: {
1411 for (size_t off
= 0, e
= isec
->data
.size(); off
< e
; off
+= 4) {
1412 if (!isec
->isLive(off
))
1414 uint32_t value
= *reinterpret_cast<const uint32_t *>(buf
+ off
);
1415 literal4Map
.emplace(value
, literal4Map
.size());
1419 case S_8BYTE_LITERALS
: {
1420 for (size_t off
= 0, e
= isec
->data
.size(); off
< e
; off
+= 8) {
1421 if (!isec
->isLive(off
))
1423 uint64_t value
= *reinterpret_cast<const uint64_t *>(buf
+ off
);
1424 literal8Map
.emplace(value
, literal8Map
.size());
1428 case S_16BYTE_LITERALS
: {
1429 for (size_t off
= 0, e
= isec
->data
.size(); off
< e
; off
+= 16) {
1430 if (!isec
->isLive(off
))
1432 UInt128 value
= *reinterpret_cast<const UInt128
*>(buf
+ off
);
1433 literal16Map
.emplace(value
, literal16Map
.size());
1438 llvm_unreachable("invalid literal section type");
1443 void WordLiteralSection::writeTo(uint8_t *buf
) const {
1444 // Note that we don't attempt to do any endianness conversion in addInput(),
1445 // so we don't do it here either -- just write out the original value,
1447 for (const auto &p
: literal16Map
)
1448 memcpy(buf
+ p
.second
* 16, &p
.first
, 16);
1449 buf
+= literal16Map
.size() * 16;
1451 for (const auto &p
: literal8Map
)
1452 memcpy(buf
+ p
.second
* 8, &p
.first
, 8);
1453 buf
+= literal8Map
.size() * 8;
1455 for (const auto &p
: literal4Map
)
1456 memcpy(buf
+ p
.second
* 4, &p
.first
, 4);
1459 void macho::createSyntheticSymbols() {
1460 auto addHeaderSymbol
= [](const char *name
) {
1461 symtab
->addSynthetic(name
, in
.header
->isec
, /*value=*/0,
1462 /*privateExtern=*/true, /*includeInSymtab=*/false,
1463 /*referencedDynamically=*/false);
1466 switch (config
->outputType
) {
1467 // FIXME: Assign the right address value for these symbols
1468 // (rather than 0). But we need to do that after assignAddresses().
1470 // If linking PIE, __mh_execute_header is a defined symbol in
1472 // Otherwise, it's an absolute symbol.
1474 symtab
->addSynthetic("__mh_execute_header", in
.header
->isec
, /*value=*/0,
1475 /*privateExtern=*/false, /*includeInSymtab=*/true,
1476 /*referencedDynamically=*/true);
1478 symtab
->addSynthetic("__mh_execute_header", /*isec=*/nullptr, /*value=*/0,
1479 /*privateExtern=*/false, /*includeInSymtab=*/true,
1480 /*referencedDynamically=*/true);
1483 // The following symbols are N_SECT symbols, even though the header is not
1484 // part of any section and that they are private to the bundle/dylib/object
1485 // they are part of.
1487 addHeaderSymbol("__mh_bundle_header");
1490 addHeaderSymbol("__mh_dylib_header");
1493 addHeaderSymbol("__mh_dylinker_header");
1496 addHeaderSymbol("__mh_object_header");
1499 llvm_unreachable("unexpected outputType");
1503 // The Itanium C++ ABI requires dylibs to pass a pointer to __cxa_atexit
1504 // which does e.g. cleanup of static global variables. The ABI document
1505 // says that the pointer can point to any address in one of the dylib's
1506 // segments, but in practice ld64 seems to set it to point to the header,
1507 // so that's what's implemented here.
1508 addHeaderSymbol("___dso_handle");
1511 template SymtabSection
*macho::makeSymtabSection
<LP64
>(StringTableSection
&);
1512 template SymtabSection
*macho::makeSymtabSection
<ILP32
>(StringTableSection
&);