1 //===- Symbols.h ------------------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLD_MACHO_SYMBOLS_H
10 #define LLD_MACHO_SYMBOLS_H
13 #include "InputFiles.h"
16 #include "llvm/Object/Archive.h"
17 #include "llvm/Support/Compiler.h"
18 #include "llvm/Support/MathExtras.h"
23 class MachHeaderSection
;
37 // Enum that describes the type of Identical Code Folding (ICF) applied to a
38 // symbol. This information is crucial for accurately representing symbol
39 // sizes in the map file.
41 None
, // No folding is applied.
42 Body
, // The entire body (function or data) is folded.
43 Thunk
// The function body is folded into a single branch thunk.
48 Kind
kind() const { return symbolKind
; }
50 StringRef
getName() const { return {nameData
, nameSize
}; }
52 bool isLive() const { return used
; }
54 return symbolKind
== LazyArchiveKind
|| symbolKind
== LazyObjectKind
;
57 virtual uint64_t getVA() const { return 0; }
59 virtual bool isWeakDef() const { return false; }
61 // Only undefined or dylib symbols can be weak references. A weak reference
62 // need not be satisfied at runtime, e.g. due to the symbol not being
63 // available on a given target platform.
64 virtual bool isWeakRef() const { return false; }
66 virtual bool isTlv() const { return false; }
68 // Whether this symbol is in the GOT or TLVPointer sections.
69 bool isInGot() const { return gotIndex
!= UINT32_MAX
; }
71 // Whether this symbol is in the StubsSection.
72 bool isInStubs() const { return stubsIndex
!= UINT32_MAX
; }
74 uint64_t getStubVA() const;
75 uint64_t getLazyPtrVA() const;
76 uint64_t getGotVA() const;
77 uint64_t getTlvVA() const;
78 uint64_t resolveBranchVA() const {
79 assert(isa
<Defined
>(this) || isa
<DylibSymbol
>(this));
80 return isInStubs() ? getStubVA() : getVA();
82 uint64_t resolveGotVA() const { return isInGot() ? getGotVA() : getVA(); }
83 uint64_t resolveTlvVA() const { return isInGot() ? getTlvVA() : getVA(); }
85 // The index of this symbol in the GOT or the TLVPointer section, depending
86 // on whether it is a thread-local. A given symbol cannot be referenced by
87 // both these sections at once.
88 uint32_t gotIndex
= UINT32_MAX
;
89 uint32_t lazyBindOffset
= UINT32_MAX
;
90 uint32_t stubsHelperIndex
= UINT32_MAX
;
91 uint32_t stubsIndex
= UINT32_MAX
;
92 uint32_t symtabIndex
= UINT32_MAX
;
94 InputFile
*getFile() const { return file
; }
97 Symbol(Kind k
, StringRef name
, InputFile
*file
)
98 : symbolKind(k
), nameData(name
.data()), file(file
), nameSize(name
.size()),
99 isUsedInRegularObj(!file
|| isa
<ObjFile
>(file
)),
100 used(!config
->deadStrip
) {}
103 const char *nameData
;
108 // True if this symbol was referenced by a regular (non-bitcode) object.
109 bool isUsedInRegularObj
: 1;
111 // True if this symbol is used from a live section.
115 class Defined
: public Symbol
{
117 Defined(StringRef name
, InputFile
*file
, InputSection
*isec
, uint64_t value
,
118 uint64_t size
, bool isWeakDef
, bool isExternal
, bool isPrivateExtern
,
119 bool includeInSymtab
, bool isReferencedDynamically
, bool noDeadStrip
,
120 bool canOverrideWeakDef
= false, bool isWeakDefCanBeHidden
= false,
121 bool interposable
= false);
123 bool isWeakDef() const override
{ return weakDef
; }
124 bool isExternalWeakDef() const {
125 return isWeakDef() && isExternal() && !privateExtern
;
127 bool isTlv() const override
;
129 bool isExternal() const { return external
; }
130 bool isAbsolute() const { return originalIsec
== nullptr; }
132 uint64_t getVA() const override
;
134 // Returns the object file that this symbol was defined in. This value differs
135 // from `getFile()` if the symbol originated from a bitcode file.
136 ObjFile
*getObjectFile() const;
138 std::string
getSourceLocation();
140 // Get the canonical InputSection of the symbol.
141 InputSection
*isec() const;
143 // Get the canonical unwind entry of the symbol.
144 ConcatInputSection
*unwindEntry() const;
146 static bool classof(const Symbol
*s
) { return s
->kind() == DefinedKind
; }
148 // Place the bitfields first so that they can get placed in the tail padding
149 // of the parent class, on platforms which support it.
150 bool overridesWeakDef
: 1;
151 // Whether this symbol should appear in the output binary's export trie.
152 bool privateExtern
: 1;
153 // Whether this symbol should appear in the output symbol table.
154 bool includeInSymtab
: 1;
155 // The ICF folding kind of this symbol: None / Body / Thunk.
156 LLVM_PREFERRED_TYPE(ICFFoldKind
)
157 uint8_t identicalCodeFoldingKind
: 2;
158 // Symbols marked referencedDynamically won't be removed from the output's
159 // symbol table by tools like strip. In theory, this could be set on arbitrary
160 // symbols in input object files. In practice, it's used solely for the
161 // synthetic __mh_execute_header symbol.
162 // This is information for the static linker, and it's also written to the
163 // output file's symbol table for tools running later (such as `strip`).
164 bool referencedDynamically
: 1;
165 // Set on symbols that should not be removed by dead code stripping.
166 // Set for example on `__attribute__((used))` globals, or on some Objective-C
167 // metadata. This is information only for the static linker and not written
169 bool noDeadStrip
: 1;
170 // Whether references to this symbol can be interposed at runtime to point to
171 // a different symbol definition (with the same name). For example, if both
172 // dylib A and B define an interposable symbol _foo, and we load A before B at
173 // runtime, then all references to _foo within dylib B will point to the
174 // definition in dylib A.
176 // Only extern symbols may be interposable.
177 bool interposable
: 1;
179 bool weakDefCanBeHidden
: 1;
182 const bool weakDef
: 1;
183 const bool external
: 1;
186 // The native InputSection of the symbol. The symbol may be moved to another
187 // InputSection in which case originalIsec->canonical() will point to the new
189 InputSection
*originalIsec
;
190 // Contains the offset from the containing subsection. Note that this is
191 // different from nlist::n_value, which is the absolute address of the symbol.
193 // size is only calculated for regular (non-bitcode) symbols.
195 // This can be a subsection of either __compact_unwind or __eh_frame.
196 ConcatInputSection
*originalUnwindEntry
= nullptr;
199 // This enum does double-duty: as a symbol property, it indicates whether & how
200 // a dylib symbol is referenced. As a DylibFile property, it indicates the kind
201 // of referenced symbols contained within the file. If there are both weak
202 // and strong references to the same file, we will count the file as
203 // strongly-referenced.
204 enum class RefState
: uint8_t { Unreferenced
= 0, Weak
= 1, Strong
= 2 };
206 class Undefined
: public Symbol
{
208 Undefined(StringRef name
, InputFile
*file
, RefState refState
,
209 bool wasBitcodeSymbol
)
210 : Symbol(UndefinedKind
, name
, file
), refState(refState
),
211 wasBitcodeSymbol(wasBitcodeSymbol
) {
212 assert(refState
!= RefState::Unreferenced
);
215 bool isWeakRef() const override
{ return refState
== RefState::Weak
; }
217 static bool classof(const Symbol
*s
) { return s
->kind() == UndefinedKind
; }
219 RefState refState
: 2;
220 bool wasBitcodeSymbol
;
223 // On Unix, it is traditionally allowed to write variable definitions without
224 // initialization expressions (such as "int foo;") to header files. These are
225 // called tentative definitions.
227 // Using tentative definitions is usually considered a bad practice; you should
228 // write only declarations (such as "extern int foo;") to header files.
229 // Nevertheless, the linker and the compiler have to do something to support
230 // bad code by allowing duplicate definitions for this particular case.
232 // The compiler creates common symbols when it sees tentative definitions.
233 // (You can suppress this behavior and let the compiler create a regular
234 // defined symbol by passing -fno-common. -fno-common is the default in clang
235 // as of LLVM 11.0.) When linking the final binary, if there are remaining
236 // common symbols after name resolution is complete, the linker converts them
237 // to regular defined symbols in a __common section.
238 class CommonSymbol
: public Symbol
{
240 CommonSymbol(StringRef name
, InputFile
*file
, uint64_t size
, uint32_t align
,
241 bool isPrivateExtern
)
242 : Symbol(CommonKind
, name
, file
), size(size
),
243 align(align
!= 1 ? align
: llvm::PowerOf2Ceil(size
)),
244 privateExtern(isPrivateExtern
) {
245 // TODO: cap maximum alignment
248 static bool classof(const Symbol
*s
) { return s
->kind() == CommonKind
; }
251 const uint32_t align
;
252 const bool privateExtern
;
255 class DylibSymbol
: public Symbol
{
257 DylibSymbol(DylibFile
*file
, StringRef name
, bool isWeakDef
,
258 RefState refState
, bool isTlv
)
259 : Symbol(DylibKind
, name
, file
), shouldReexport(false),
260 refState(refState
), weakDef(isWeakDef
), tlv(isTlv
) {
261 if (file
&& refState
> RefState::Unreferenced
)
262 file
->numReferencedSymbols
++;
265 uint64_t getVA() const override
;
266 bool isWeakDef() const override
{ return weakDef
; }
268 // Symbols from weak libraries/frameworks are also weakly-referenced.
269 bool isWeakRef() const override
{
270 return refState
== RefState::Weak
||
271 (file
&& getFile()->umbrella
->forceWeakImport
);
273 bool isReferenced() const { return refState
!= RefState::Unreferenced
; }
274 bool isTlv() const override
{ return tlv
; }
275 bool isDynamicLookup() const { return file
== nullptr; }
276 bool hasStubsHelper() const { return stubsHelperIndex
!= UINT32_MAX
; }
278 DylibFile
*getFile() const {
279 assert(!isDynamicLookup());
280 return cast
<DylibFile
>(file
);
283 static bool classof(const Symbol
*s
) { return s
->kind() == DylibKind
; }
285 RefState
getRefState() const { return refState
; }
287 void reference(RefState newState
) {
288 assert(newState
> RefState::Unreferenced
);
289 if (refState
== RefState::Unreferenced
&& file
)
290 getFile()->numReferencedSymbols
++;
291 refState
= std::max(refState
, newState
);
295 // dynamic_lookup symbols have no file.
296 if (refState
> RefState::Unreferenced
&& file
) {
297 assert(getFile()->numReferencedSymbols
> 0);
298 getFile()->numReferencedSymbols
--;
302 bool shouldReexport
: 1;
305 RefState refState
: 2;
306 const bool weakDef
: 1;
310 class LazyArchive
: public Symbol
{
312 LazyArchive(ArchiveFile
*file
, const llvm::object::Archive::Symbol
&sym
)
313 : Symbol(LazyArchiveKind
, sym
.getName(), file
), sym(sym
) {}
315 ArchiveFile
*getFile() const { return cast
<ArchiveFile
>(file
); }
316 void fetchArchiveMember();
318 static bool classof(const Symbol
*s
) { return s
->kind() == LazyArchiveKind
; }
321 const llvm::object::Archive::Symbol sym
;
324 // A defined symbol in an ObjFile/BitcodeFile surrounded by --start-lib and
326 class LazyObject
: public Symbol
{
328 LazyObject(InputFile
&file
, StringRef name
)
329 : Symbol(LazyObjectKind
, name
, &file
) {
330 isUsedInRegularObj
= false;
333 static bool classof(const Symbol
*s
) { return s
->kind() == LazyObjectKind
; }
336 // Represents N_INDR symbols. Note that if we are given valid, linkable inputs,
337 // then all AliasSymbol instances will be converted into one of the other Symbol
338 // types after `createAliases()` runs.
339 class AliasSymbol final
: public Symbol
{
341 AliasSymbol(InputFile
*file
, StringRef name
, StringRef aliasedName
,
342 bool isPrivateExtern
)
343 : Symbol(AliasKind
, name
, file
), privateExtern(isPrivateExtern
),
344 aliasedName(aliasedName
) {}
346 StringRef
getAliasedName() const { return aliasedName
; }
348 static bool classof(const Symbol
*s
) { return s
->kind() == AliasKind
; }
350 const bool privateExtern
;
353 StringRef aliasedName
;
357 alignas(Defined
) char a
[sizeof(Defined
)];
358 alignas(Undefined
) char b
[sizeof(Undefined
)];
359 alignas(CommonSymbol
) char c
[sizeof(CommonSymbol
)];
360 alignas(DylibSymbol
) char d
[sizeof(DylibSymbol
)];
361 alignas(LazyArchive
) char e
[sizeof(LazyArchive
)];
362 alignas(LazyObject
) char f
[sizeof(LazyObject
)];
363 alignas(AliasSymbol
) char g
[sizeof(AliasSymbol
)];
366 template <typename T
, typename
... ArgT
>
367 T
*replaceSymbol(Symbol
*s
, ArgT
&&...arg
) {
368 static_assert(sizeof(T
) <= sizeof(SymbolUnion
), "SymbolUnion too small");
369 static_assert(alignof(T
) <= alignof(SymbolUnion
),
370 "SymbolUnion not aligned enough");
371 assert(static_cast<Symbol
*>(static_cast<T
*>(nullptr)) == nullptr &&
374 bool isUsedInRegularObj
= s
->isUsedInRegularObj
;
376 T
*sym
= new (s
) T(std::forward
<ArgT
>(arg
)...);
377 sym
->isUsedInRegularObj
|= isUsedInRegularObj
;
382 // Can a symbol's address only be resolved at runtime?
383 inline bool needsBinding(const Symbol
*sym
) {
384 if (isa
<DylibSymbol
>(sym
))
386 if (const auto *defined
= dyn_cast
<Defined
>(sym
))
387 return defined
->isExternalWeakDef() || defined
->interposable
;
391 // Symbols with `l` or `L` as a prefix are linker-private and never appear in
393 inline bool isPrivateLabel(StringRef name
) {
394 return name
.starts_with("l") || name
.starts_with("L");
398 std::string
toString(const macho::Symbol
&);
399 std::string
toMachOString(const llvm::object::Archive::Symbol
&);