1 //===- SyntheticSections.h -------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLD_MACHO_SYNTHETIC_SECTIONS_H
10 #define LLD_MACHO_SYNTHETIC_SECTIONS_H
13 #include "ExportTrie.h"
14 #include "InputSection.h"
15 #include "OutputSection.h"
16 #include "OutputSegment.h"
20 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/ADT/Hashing.h"
22 #include "llvm/ADT/SetVector.h"
23 #include "llvm/MC/StringTableBuilder.h"
24 #include "llvm/Support/MathExtras.h"
25 #include "llvm/Support/raw_ostream.h"
27 #include <unordered_map>
40 class UnwindInfoSection
;
42 class SyntheticSection
: public OutputSection
{
44 SyntheticSection(const char *segname
, const char *name
);
45 virtual ~SyntheticSection() = default;
47 static bool classof(const OutputSection
*sec
) {
48 return sec
->kind() == SyntheticKind
;
52 // This fake InputSection makes it easier for us to write code that applies
53 // generically to both user inputs and synthetics.
57 // All sections in __LINKEDIT should inherit from this.
58 class LinkEditSection
: public SyntheticSection
{
60 LinkEditSection(const char *segname
, const char *name
)
61 : SyntheticSection(segname
, name
) {
62 align
= target
->wordSize
;
65 virtual void finalizeContents() {}
67 // Sections in __LINKEDIT are special: their offsets are recorded in the
68 // load commands like LC_DYLD_INFO_ONLY and LC_SYMTAB, instead of in section
70 bool isHidden() const override final
{ return true; }
72 virtual uint64_t getRawSize() const = 0;
74 // codesign (or more specifically libstuff) checks that each section in
75 // __LINKEDIT ends where the next one starts -- no gaps are permitted. We
76 // therefore align every section's start and end points to WordSize.
78 // NOTE: This assumes that the extra bytes required for alignment can be
80 uint64_t getSize() const override final
{
81 return llvm::alignTo(getRawSize(), align
);
85 // The header of the Mach-O file, which must have a file offset of zero.
86 class MachHeaderSection final
: public SyntheticSection
{
89 bool isHidden() const override
{ return true; }
90 uint64_t getSize() const override
;
91 void writeTo(uint8_t *buf
) const override
;
93 void addLoadCommand(LoadCommand
*);
96 std::vector
<LoadCommand
*> loadCommands
;
97 uint32_t sizeOfCmds
= 0;
100 // A hidden section that exists solely for the purpose of creating the
101 // __PAGEZERO segment, which is used to catch null pointer dereferences.
102 class PageZeroSection final
: public SyntheticSection
{
105 bool isHidden() const override
{ return true; }
106 uint64_t getSize() const override
{ return target
->pageZeroSize
; }
107 uint64_t getFileSize() const override
{ return 0; }
108 void writeTo(uint8_t *buf
) const override
{}
111 // This is the base class for the GOT and TLVPointer sections, which are nearly
112 // functionally identical -- they will both be populated by dyld with addresses
113 // to non-lazily-loaded dylib symbols. The main difference is that the
114 // TLVPointerSection stores references to thread-local variables.
115 class NonLazyPointerSectionBase
: public SyntheticSection
{
117 NonLazyPointerSectionBase(const char *segname
, const char *name
);
118 const llvm::SetVector
<const Symbol
*> &getEntries() const { return entries
; }
119 bool isNeeded() const override
{ return !entries
.empty(); }
120 uint64_t getSize() const override
{
121 return entries
.size() * target
->wordSize
;
123 void writeTo(uint8_t *buf
) const override
;
124 void addEntry(Symbol
*sym
);
125 uint64_t getVA(uint32_t gotIndex
) const {
126 return addr
+ gotIndex
* target
->wordSize
;
130 llvm::SetVector
<const Symbol
*> entries
;
133 class GotSection final
: public NonLazyPointerSectionBase
{
138 class TlvPointerSection final
: public NonLazyPointerSectionBase
{
144 const InputSection
*isec
;
147 Location(const InputSection
*isec
, uint64_t offset
)
148 : isec(isec
), offset(offset
) {}
149 uint64_t getVA() const { return isec
->getVA(offset
); }
152 // Stores rebase opcodes, which tell dyld where absolute addresses have been
153 // encoded in the binary. If the binary is not loaded at its preferred address,
154 // dyld has to rebase these addresses by adding an offset to them.
155 class RebaseSection final
: public LinkEditSection
{
158 void finalizeContents() override
;
159 uint64_t getRawSize() const override
{ return contents
.size(); }
160 bool isNeeded() const override
{ return !locations
.empty(); }
161 void writeTo(uint8_t *buf
) const override
;
163 void addEntry(const InputSection
*isec
, uint64_t offset
) {
165 locations
.push_back({isec
, offset
});
169 std::vector
<Location
> locations
;
170 SmallVector
<char, 128> contents
;
173 struct BindingEntry
{
176 BindingEntry(int64_t addend
, Location target
)
177 : addend(addend
), target(std::move(target
)) {}
181 using BindingsMap
= llvm::DenseMap
<Sym
, std::vector
<BindingEntry
>>;
183 // Stores bind opcodes for telling dyld which symbols to load non-lazily.
184 class BindingSection final
: public LinkEditSection
{
187 void finalizeContents() override
;
188 uint64_t getRawSize() const override
{ return contents
.size(); }
189 bool isNeeded() const override
{ return !bindingsMap
.empty(); }
190 void writeTo(uint8_t *buf
) const override
;
192 void addEntry(const DylibSymbol
*dysym
, const InputSection
*isec
,
193 uint64_t offset
, int64_t addend
= 0) {
194 bindingsMap
[dysym
].emplace_back(addend
, Location(isec
, offset
));
198 BindingsMap
<const DylibSymbol
*> bindingsMap
;
199 SmallVector
<char, 128> contents
;
202 // Stores bind opcodes for telling dyld which weak symbols need coalescing.
203 // There are two types of entries in this section:
205 // 1) Non-weak definitions: This is a symbol definition that weak symbols in
206 // other dylibs should coalesce to.
208 // 2) Weak bindings: These tell dyld that a given symbol reference should
209 // coalesce to a non-weak definition if one is found. Note that unlike the
210 // entries in the BindingSection, the bindings here only refer to these
211 // symbols by name, but do not specify which dylib to load them from.
212 class WeakBindingSection final
: public LinkEditSection
{
214 WeakBindingSection();
215 void finalizeContents() override
;
216 uint64_t getRawSize() const override
{ return contents
.size(); }
217 bool isNeeded() const override
{
218 return !bindingsMap
.empty() || !definitions
.empty();
221 void writeTo(uint8_t *buf
) const override
;
223 void addEntry(const Symbol
*symbol
, const InputSection
*isec
, uint64_t offset
,
224 int64_t addend
= 0) {
225 bindingsMap
[symbol
].emplace_back(addend
, Location(isec
, offset
));
228 bool hasEntry() const { return !bindingsMap
.empty(); }
230 void addNonWeakDefinition(const Defined
*defined
) {
231 definitions
.emplace_back(defined
);
234 bool hasNonWeakDefinition() const { return !definitions
.empty(); }
237 BindingsMap
<const Symbol
*> bindingsMap
;
238 std::vector
<const Defined
*> definitions
;
239 SmallVector
<char, 128> contents
;
242 // The following sections implement lazy symbol binding -- very similar to the
243 // PLT mechanism in ELF.
245 // ELF's .plt section is broken up into two sections in Mach-O: StubsSection
246 // and StubHelperSection. Calls to functions in dylibs will end up calling into
247 // StubsSection, which contains indirect jumps to addresses stored in the
248 // LazyPointerSection (the counterpart to ELF's .plt.got).
250 // We will first describe how non-weak symbols are handled.
252 // At program start, the LazyPointerSection contains addresses that point into
253 // one of the entry points in the middle of the StubHelperSection. The code in
254 // StubHelperSection will push on the stack an offset into the
255 // LazyBindingSection. The push is followed by a jump to the beginning of the
256 // StubHelperSection (similar to PLT0), which then calls into dyld_stub_binder.
257 // dyld_stub_binder is a non-lazily-bound symbol, so this call looks it up in
260 // The stub binder will look up the bind opcodes in the LazyBindingSection at
261 // the given offset. The bind opcodes will tell the binder to update the
262 // address in the LazyPointerSection to point to the symbol, so that subsequent
263 // calls don't have to redo the symbol resolution. The binder will then jump to
264 // the resolved symbol.
266 // With weak symbols, the situation is slightly different. Since there is no
267 // "weak lazy" lookup, function calls to weak symbols are always non-lazily
268 // bound. We emit both regular non-lazy bindings as well as weak bindings, in
269 // order that the weak bindings may overwrite the non-lazy bindings if an
270 // appropriate symbol is found at runtime. However, the bound addresses will
271 // still be written (non-lazily) into the LazyPointerSection.
273 class StubsSection final
: public SyntheticSection
{
276 uint64_t getSize() const override
;
277 bool isNeeded() const override
{ return !entries
.empty(); }
278 void finalize() override
;
279 void writeTo(uint8_t *buf
) const override
;
280 const llvm::SetVector
<Symbol
*> &getEntries() const { return entries
; }
281 // Returns whether the symbol was added. Note that every stubs entry will
282 // have a corresponding entry in the LazyPointerSection.
283 bool addEntry(Symbol
*);
284 uint64_t getVA(uint32_t stubsIndex
) const {
285 assert(isFinal
|| target
->usesThunks());
286 // ConcatOutputSection::finalize() can seek the address of a
287 // stub before its address is assigned. Before __stubs is
288 // finalized, return a contrived out-of-range address.
289 return isFinal
? addr
+ stubsIndex
* target
->stubSize
290 : TargetInfo::outOfRangeVA
;
293 bool isFinal
= false; // is address assigned?
296 llvm::SetVector
<Symbol
*> entries
;
299 class StubHelperSection final
: public SyntheticSection
{
302 uint64_t getSize() const override
;
303 bool isNeeded() const override
;
304 void writeTo(uint8_t *buf
) const override
;
308 DylibSymbol
*stubBinder
= nullptr;
309 Defined
*dyldPrivate
= nullptr;
312 // Note that this section may also be targeted by non-lazy bindings. In
313 // particular, this happens when branch relocations target weak symbols.
314 class LazyPointerSection final
: public SyntheticSection
{
316 LazyPointerSection();
317 uint64_t getSize() const override
;
318 bool isNeeded() const override
;
319 void writeTo(uint8_t *buf
) const override
;
322 class LazyBindingSection final
: public LinkEditSection
{
324 LazyBindingSection();
325 void finalizeContents() override
;
326 uint64_t getRawSize() const override
{ return contents
.size(); }
327 bool isNeeded() const override
{ return !entries
.empty(); }
328 void writeTo(uint8_t *buf
) const override
;
329 // Note that every entry here will by referenced by a corresponding entry in
330 // the StubHelperSection.
331 void addEntry(DylibSymbol
*dysym
);
332 const llvm::SetVector
<DylibSymbol
*> &getEntries() const { return entries
; }
335 uint32_t encode(const DylibSymbol
&);
337 llvm::SetVector
<DylibSymbol
*> entries
;
338 SmallVector
<char, 128> contents
;
339 llvm::raw_svector_ostream os
{contents
};
342 // Stores a trie that describes the set of exported symbols.
343 class ExportSection final
: public LinkEditSection
{
346 void finalizeContents() override
;
347 uint64_t getRawSize() const override
{ return size
; }
348 void writeTo(uint8_t *buf
) const override
;
350 bool hasWeakSymbol
= false;
353 TrieBuilder trieBuilder
;
357 // Stores 'data in code' entries that describe the locations of
358 // data regions inside code sections.
359 class DataInCodeSection final
: public LinkEditSection
{
362 void finalizeContents() override
;
363 uint64_t getRawSize() const override
{
364 return sizeof(llvm::MachO::data_in_code_entry
) * entries
.size();
366 void writeTo(uint8_t *buf
) const override
;
369 std::vector
<llvm::MachO::data_in_code_entry
> entries
;
372 // Stores ULEB128 delta encoded addresses of functions.
373 class FunctionStartsSection final
: public LinkEditSection
{
375 FunctionStartsSection();
376 void finalizeContents() override
;
377 uint64_t getRawSize() const override
{ return contents
.size(); }
378 void writeTo(uint8_t *buf
) const override
;
381 SmallVector
<char, 128> contents
;
384 // Stores the strings referenced by the symbol table.
385 class StringTableSection final
: public LinkEditSection
{
387 StringTableSection();
388 // Returns the start offset of the added string.
389 uint32_t addString(StringRef
);
390 uint64_t getRawSize() const override
{ return size
; }
391 void writeTo(uint8_t *buf
) const override
;
393 static constexpr size_t emptyStringIndex
= 1;
396 // ld64 emits string tables which start with a space and a zero byte. We
397 // match its behavior here since some tools depend on it.
398 // Consequently, the empty string will be at index 1, not zero.
399 std::vector
<StringRef
> strings
{" "};
410 uint32_t strx
= StringTableSection::emptyStringIndex
;
415 StabsEntry() = default;
416 explicit StabsEntry(uint8_t type
) : type(type
) {}
419 // Symbols of the same type must be laid out contiguously: we choose to emit
420 // all local symbols first, then external symbols, and finally undefined
421 // symbols. For each symbol type, the LC_DYSYMTAB load command will record the
422 // range (start index and total number) of those symbols in the symbol table.
423 class SymtabSection
: public LinkEditSection
{
425 void finalizeContents() override
;
426 uint32_t getNumSymbols() const;
427 uint32_t getNumLocalSymbols() const {
428 return stabs
.size() + localSymbols
.size();
430 uint32_t getNumExternalSymbols() const { return externalSymbols
.size(); }
431 uint32_t getNumUndefinedSymbols() const { return undefinedSymbols
.size(); }
434 void emitBeginSourceStab(llvm::DWARFUnit
*compileUnit
);
435 void emitEndSourceStab();
436 void emitObjectFileStab(ObjFile
*);
437 void emitEndFunStab(Defined
*);
441 SymtabSection(StringTableSection
&);
443 StringTableSection
&stringTableSection
;
444 // STABS symbols are always local symbols, but we represent them with special
445 // entries because they may use fields like n_sect and n_desc differently.
446 std::vector
<StabsEntry
> stabs
;
447 std::vector
<SymtabEntry
> localSymbols
;
448 std::vector
<SymtabEntry
> externalSymbols
;
449 std::vector
<SymtabEntry
> undefinedSymbols
;
452 template <class LP
> SymtabSection
*makeSymtabSection(StringTableSection
&);
454 // The indirect symbol table is a list of 32-bit integers that serve as indices
455 // into the (actual) symbol table. The indirect symbol table is a
456 // concatenation of several sub-arrays of indices, each sub-array belonging to
457 // a separate section. The starting offset of each sub-array is stored in the
458 // reserved1 header field of the respective section.
460 // These sub-arrays provide symbol information for sections that store
461 // contiguous sequences of symbol references. These references can be pointers
462 // (e.g. those in the GOT and TLVP sections) or assembly sequences (e.g.
464 class IndirectSymtabSection final
: public LinkEditSection
{
466 IndirectSymtabSection();
467 void finalizeContents() override
;
468 uint32_t getNumSymbols() const;
469 uint64_t getRawSize() const override
{
470 return getNumSymbols() * sizeof(uint32_t);
472 bool isNeeded() const override
;
473 void writeTo(uint8_t *buf
) const override
;
476 // The code signature comes at the very end of the linked output file.
477 class CodeSignatureSection final
: public LinkEditSection
{
479 static constexpr uint8_t blockSizeShift
= 12;
480 static constexpr size_t blockSize
= (1 << blockSizeShift
); // 4 KiB
481 static constexpr size_t hashSize
= 256 / 8;
482 static constexpr size_t blobHeadersSize
= llvm::alignTo
<8>(
483 sizeof(llvm::MachO::CS_SuperBlob
) + sizeof(llvm::MachO::CS_BlobIndex
));
484 static constexpr uint32_t fixedHeadersSize
=
485 blobHeadersSize
+ sizeof(llvm::MachO::CS_CodeDirectory
);
487 uint32_t fileNamePad
= 0;
488 uint32_t allHeadersSize
= 0;
491 CodeSignatureSection();
492 uint64_t getRawSize() const override
;
493 bool isNeeded() const override
{ return true; }
494 void writeTo(uint8_t *buf
) const override
;
495 uint32_t getBlockCount() const;
496 void writeHashes(uint8_t *buf
) const;
499 class BitcodeBundleSection final
: public SyntheticSection
{
501 BitcodeBundleSection();
502 uint64_t getSize() const override
{ return xarSize
; }
503 void finalize() override
;
504 void writeTo(uint8_t *buf
) const override
;
507 llvm::SmallString
<261> xarPath
;
511 class CStringSection
: public SyntheticSection
{
514 void addInput(CStringInputSection
*);
515 uint64_t getSize() const override
{ return size
; }
516 virtual void finalizeContents();
517 bool isNeeded() const override
{ return !inputs
.empty(); }
518 void writeTo(uint8_t *buf
) const override
;
520 std::vector
<CStringInputSection
*> inputs
;
526 class DeduplicatedCStringSection final
: public CStringSection
{
528 DeduplicatedCStringSection();
529 uint64_t getSize() const override
{ return builder
.getSize(); }
530 void finalizeContents() override
;
531 void writeTo(uint8_t *buf
) const override
{ builder
.write(buf
); }
534 llvm::StringTableBuilder builder
;
538 * This section contains deduplicated literal values. The 16-byte values are
539 * laid out first, followed by the 8- and then the 4-byte ones.
541 class WordLiteralSection final
: public SyntheticSection
{
543 using UInt128
= std::pair
<uint64_t, uint64_t>;
544 // I don't think the standard guarantees the size of a pair, so let's make
545 // sure it's exact -- that way we can construct it via `mmap`.
546 static_assert(sizeof(UInt128
) == 16, "");
548 WordLiteralSection();
549 void addInput(WordLiteralInputSection
*);
550 void finalizeContents();
551 void writeTo(uint8_t *buf
) const override
;
553 uint64_t getSize() const override
{
554 return literal16Map
.size() * 16 + literal8Map
.size() * 8 +
555 literal4Map
.size() * 4;
558 bool isNeeded() const override
{
559 return !literal16Map
.empty() || !literal4Map
.empty() ||
560 !literal8Map
.empty();
563 uint64_t getLiteral16Offset(const uint8_t *buf
) const {
564 return literal16Map
.at(*reinterpret_cast<const UInt128
*>(buf
)) * 16;
567 uint64_t getLiteral8Offset(const uint8_t *buf
) const {
568 return literal16Map
.size() * 16 +
569 literal8Map
.at(*reinterpret_cast<const uint64_t *>(buf
)) * 8;
572 uint64_t getLiteral4Offset(const uint8_t *buf
) const {
573 return literal16Map
.size() * 16 + literal8Map
.size() * 8 +
574 literal4Map
.at(*reinterpret_cast<const uint32_t *>(buf
)) * 4;
578 std::vector
<WordLiteralInputSection
*> inputs
;
580 template <class T
> struct Hasher
{
581 llvm::hash_code
operator()(T v
) const { return llvm::hash_value(v
); }
583 // We're using unordered_map instead of DenseMap here because we need to
584 // support all possible integer values -- there are no suitable tombstone
585 // values for DenseMap.
586 std::unordered_map
<UInt128
, uint64_t, Hasher
<UInt128
>> literal16Map
;
587 std::unordered_map
<uint64_t, uint64_t> literal8Map
;
588 std::unordered_map
<uint32_t, uint64_t> literal4Map
;
592 MachHeaderSection
*header
= nullptr;
593 CStringSection
*cStringSection
= nullptr;
594 WordLiteralSection
*wordLiteralSection
= nullptr;
595 RebaseSection
*rebase
= nullptr;
596 BindingSection
*binding
= nullptr;
597 WeakBindingSection
*weakBinding
= nullptr;
598 LazyBindingSection
*lazyBinding
= nullptr;
599 ExportSection
*exports
= nullptr;
600 GotSection
*got
= nullptr;
601 TlvPointerSection
*tlvPointers
= nullptr;
602 LazyPointerSection
*lazyPointers
= nullptr;
603 StubsSection
*stubs
= nullptr;
604 StubHelperSection
*stubHelper
= nullptr;
605 UnwindInfoSection
*unwindInfo
= nullptr;
606 ConcatInputSection
*imageLoaderCache
= nullptr;
610 extern std::vector
<SyntheticSection
*> syntheticSections
;
612 void createSyntheticSymbols();