1 //===- Symbols.h ------------------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLD_MACHO_SYMBOLS_H
10 #define LLD_MACHO_SYMBOLS_H
12 #include "InputSection.h"
14 #include "lld/Common/ErrorHandler.h"
15 #include "lld/Common/Strings.h"
16 #include "llvm/Object/Archive.h"
17 #include "llvm/Support/MathExtras.h"
23 class MachHeaderSection
;
28 StringRefZ(const char *s
) : data(s
), size(-1) {}
29 StringRefZ(StringRef s
) : data(s
.data()), size(s
.size()) {}
48 Kind
kind() const { return static_cast<Kind
>(symbolKind
); }
50 StringRef
getName() const { return {name
.data
, name
.size
}; }
52 virtual uint64_t getVA() const { return 0; }
54 virtual uint64_t getFileOffset() const {
55 llvm_unreachable("attempt to get an offset from a non-defined symbol");
58 virtual bool isWeakDef() const { llvm_unreachable("cannot be weak def"); }
60 // Only undefined or dylib symbols can be weak references. A weak reference
61 // need not be satisfied at runtime, e.g. due to the symbol not being
62 // available on a given target platform.
63 virtual bool isWeakRef() const { llvm_unreachable("cannot be a weak ref"); }
65 virtual bool isTlv() const { llvm_unreachable("cannot be TLV"); }
67 // Whether this symbol is in the GOT or TLVPointer sections.
68 bool isInGot() const { return gotIndex
!= UINT32_MAX
; }
70 // Whether this symbol is in the StubsSection.
71 bool isInStubs() const { return stubsIndex
!= UINT32_MAX
; }
73 // The index of this symbol in the GOT or the TLVPointer section, depending
74 // on whether it is a thread-local. A given symbol cannot be referenced by
75 // both these sections at once.
76 uint32_t gotIndex
= UINT32_MAX
;
78 uint32_t stubsIndex
= UINT32_MAX
;
80 uint32_t symtabIndex
= UINT32_MAX
;
83 Symbol(Kind k
, StringRefZ name
) : symbolKind(k
), name(name
) {}
89 class Defined
: public Symbol
{
91 Defined(StringRefZ name
, InputSection
*isec
, uint32_t value
, bool isWeakDef
,
93 : Symbol(DefinedKind
, name
), isec(isec
), value(value
),
94 overridesWeakDef(false), weakDef(isWeakDef
), external(isExternal
) {}
96 bool isWeakDef() const override
{ return weakDef
; }
97 bool isTlv() const override
{
98 return !isAbsolute() && isThreadLocalVariables(isec
->flags
);
101 bool isExternal() const { return external
; }
102 bool isAbsolute() const { return isec
== nullptr; }
104 uint64_t getVA() const override
;
105 uint64_t getFileOffset() const override
;
107 static bool classof(const Symbol
*s
) { return s
->kind() == DefinedKind
; }
112 bool overridesWeakDef
: 1;
115 const bool weakDef
: 1;
116 const bool external
: 1;
119 // This enum does double-duty: as a symbol property, it indicates whether & how
120 // a dylib symbol is referenced. As a DylibFile property, it indicates the kind
121 // of referenced symbols contained within the file. If there are both weak
122 // and strong references to the same file, we will count the file as
123 // strongly-referenced.
124 enum class RefState
: uint8_t { Unreferenced
= 0, Weak
= 1, Strong
= 2 };
126 class Undefined
: public Symbol
{
128 Undefined(StringRefZ name
, RefState refState
)
129 : Symbol(UndefinedKind
, name
), refState(refState
) {
130 assert(refState
!= RefState::Unreferenced
);
133 bool isWeakRef() const override
{ return refState
== RefState::Weak
; }
135 static bool classof(const Symbol
*s
) { return s
->kind() == UndefinedKind
; }
137 RefState refState
: 2;
140 // On Unix, it is traditionally allowed to write variable definitions without
141 // initialization expressions (such as "int foo;") to header files. These are
142 // called tentative definitions.
144 // Using tentative definitions is usually considered a bad practice; you should
145 // write only declarations (such as "extern int foo;") to header files.
146 // Nevertheless, the linker and the compiler have to do something to support
147 // bad code by allowing duplicate definitions for this particular case.
149 // The compiler creates common symbols when it sees tentative definitions.
150 // (You can suppress this behavior and let the compiler create a regular
151 // defined symbol by passing -fno-common.) When linking the final binary, if
152 // there are remaining common symbols after name resolution is complete, the
153 // linker converts them to regular defined symbols in a __common section.
154 class CommonSymbol
: public Symbol
{
156 CommonSymbol(StringRefZ name
, InputFile
*file
, uint64_t size
, uint32_t align
)
157 : Symbol(CommonKind
, name
), file(file
), size(size
),
158 align(align
!= 1 ? align
: llvm::PowerOf2Ceil(size
)) {
159 // TODO: cap maximum alignment
162 static bool classof(const Symbol
*s
) { return s
->kind() == CommonKind
; }
164 InputFile
*const file
;
166 const uint32_t align
;
169 class DylibSymbol
: public Symbol
{
171 DylibSymbol(DylibFile
*file
, StringRefZ name
, bool isWeakDef
,
172 RefState refState
, bool isTlv
)
173 : Symbol(DylibKind
, name
), file(file
), refState(refState
),
174 weakDef(isWeakDef
), tlv(isTlv
) {}
176 bool isWeakDef() const override
{ return weakDef
; }
177 bool isWeakRef() const override
{ return refState
== RefState::Weak
; }
178 bool isReferenced() const { return refState
!= RefState::Unreferenced
; }
179 bool isTlv() const override
{ return tlv
; }
180 bool hasStubsHelper() const { return stubsHelperIndex
!= UINT32_MAX
; }
182 static bool classof(const Symbol
*s
) { return s
->kind() == DylibKind
; }
185 uint32_t stubsHelperIndex
= UINT32_MAX
;
186 uint32_t lazyBindOffset
= UINT32_MAX
;
188 RefState refState
: 2;
191 const bool weakDef
: 1;
195 class LazySymbol
: public Symbol
{
197 LazySymbol(ArchiveFile
*file
, const llvm::object::Archive::Symbol
&sym
)
198 : Symbol(LazyKind
, sym
.getName()), file(file
), sym(sym
) {}
200 static bool classof(const Symbol
*s
) { return s
->kind() == LazyKind
; }
202 void fetchArchiveMember();
206 const llvm::object::Archive::Symbol sym
;
209 // The Itanium C++ ABI requires dylibs to pass a pointer to __cxa_atexit which
210 // does e.g. cleanup of static global variables. The ABI document says that the
211 // pointer can point to any address in one of the dylib's segments, but in
212 // practice ld64 seems to set it to point to the header, so that's what's
215 // The ARM C++ ABI uses __dso_handle similarly, but I (int3) have not yet
216 // tested this on an ARM platform.
218 // DSOHandle effectively functions like a Defined symbol, but it doesn't belong
219 // to an InputSection.
220 class DSOHandle
: public Symbol
{
222 DSOHandle(const MachHeaderSection
*header
)
223 : Symbol(DSOHandleKind
, name
), header(header
) {}
225 const MachHeaderSection
*header
;
227 uint64_t getVA() const override
;
229 uint64_t getFileOffset() const override
;
231 bool isWeakDef() const override
{ return false; }
233 bool isTlv() const override
{ return false; }
235 static constexpr StringRef name
= "___dso_handle";
237 static bool classof(const Symbol
*s
) { return s
->kind() == DSOHandleKind
; }
241 alignas(Defined
) char a
[sizeof(Defined
)];
242 alignas(Undefined
) char b
[sizeof(Undefined
)];
243 alignas(CommonSymbol
) char c
[sizeof(CommonSymbol
)];
244 alignas(DylibSymbol
) char d
[sizeof(DylibSymbol
)];
245 alignas(LazySymbol
) char e
[sizeof(LazySymbol
)];
246 alignas(DSOHandle
) char f
[sizeof(DSOHandle
)];
249 template <typename T
, typename
... ArgT
>
250 T
*replaceSymbol(Symbol
*s
, ArgT
&&... arg
) {
251 static_assert(sizeof(T
) <= sizeof(SymbolUnion
), "SymbolUnion too small");
252 static_assert(alignof(T
) <= alignof(SymbolUnion
),
253 "SymbolUnion not aligned enough");
254 assert(static_cast<Symbol
*>(static_cast<T
*>(nullptr)) == nullptr &&
257 return new (s
) T(std::forward
<ArgT
>(arg
)...);
262 std::string
toString(const macho::Symbol
&);
263 std::string
toMachOString(const llvm::object::Archive::Symbol
&);