1 //===- Symbols.h ------------------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLD_WASM_SYMBOLS_H
10 #define LLD_WASM_SYMBOLS_H
13 #include "lld/Common/LLVM.h"
14 #include "llvm/Object/Archive.h"
15 #include "llvm/Object/Wasm.h"
21 // Shared string constants
23 // The default module name to use for symbol imports.
24 extern const char *defaultModule
;
26 // The name under which to import or export the wasm table.
27 extern const char *functionTableName
;
29 // The name under which to import or export the wasm memory.
30 extern const char *memoryName
;
32 using llvm::wasm::WasmSymbolType
;
44 #define INVALID_INDEX UINT32_MAX
46 // The base class for real symbol classes.
57 UndefinedFunctionKind
,
67 Kind
kind() const { return symbolKind
; }
69 bool isDefined() const { return !isLazy() && !isUndefined(); }
71 bool isUndefined() const {
72 return symbolKind
== UndefinedFunctionKind
||
73 symbolKind
== UndefinedDataKind
||
74 symbolKind
== UndefinedGlobalKind
||
75 symbolKind
== UndefinedTableKind
|| symbolKind
== UndefinedTagKind
;
78 bool isLazy() const { return symbolKind
== LazyKind
; }
79 bool isShared() const {
80 return symbolKind
== SharedFunctionKind
|| symbolKind
== SharedDataKind
;
85 bool isHidden() const;
88 // Returns true if this symbol exists in a discarded (due to COMDAT) section
89 bool isDiscarded() const;
91 // True if this is an undefined weak symbol. This only works once
92 // all input files have been added.
93 bool isUndefWeak() const {
94 // See comment on lazy symbols for details.
95 return isWeak() && (isUndefined() || isLazy());
98 // Returns the symbol name.
99 StringRef
getName() const { return name
; }
101 // Returns the file from which this symbol was created.
102 InputFile
*getFile() const { return file
; }
104 InputChunk
*getChunk() const;
106 // Indicates that the section or import for this symbol will be included in
110 // Marks the symbol's InputChunk as Live, so that it will be included in the
114 void setHidden(bool isHidden
);
116 // Get/set the index in the output symbol table. This is only used for
117 // relocatable output.
118 uint32_t getOutputSymbolIndex() const;
119 void setOutputSymbolIndex(uint32_t index
);
121 WasmSymbolType
getWasmType() const;
122 bool isImported() const;
123 bool isExported() const;
124 bool isExportedExplicit() const;
126 // Indicates that the symbol is used in an __attribute__((used)) directive
128 bool isNoStrip() const;
130 const WasmSignature
* getSignature() const;
132 uint32_t getGOTIndex() const {
133 assert(gotIndex
!= INVALID_INDEX
);
137 void setGOTIndex(uint32_t index
);
138 bool hasGOTIndex() const { return gotIndex
!= INVALID_INDEX
; }
141 Symbol(StringRef name
, Kind k
, uint32_t flags
, InputFile
*f
)
142 : name(name
), file(f
), symbolKind(k
), referenced(!config
->gcSections
),
143 requiresGOT(false), isUsedInRegularObj(false), forceExport(false),
144 forceImport(false), canInline(false), traced(false), isStub(false),
149 uint32_t outputSymbolIndex
= INVALID_INDEX
;
150 uint32_t gotIndex
= INVALID_INDEX
;
156 // True for data symbols that needs a dummy GOT entry. Used for static
157 // linking of GOT accesses.
158 bool requiresGOT
: 1;
160 // True if the symbol was used for linking and thus need to be added to the
161 // output file's symbol table. This is true for all symbols except for
162 // unreferenced DSO symbols, lazy (archive) symbols, and bitcode symbols that
163 // are unreferenced except by other bitcode objects.
164 bool isUsedInRegularObj
: 1;
166 // True if this symbol is explicitly marked for export (i.e. via the
167 // -e/--export command line flag)
168 bool forceExport
: 1;
170 bool forceImport
: 1;
172 // False if LTO shouldn't inline whatever this symbol points to. If a symbol
173 // is overwritten after LTO, LTO shouldn't inline the symbol because it
174 // doesn't know the final contents of the symbol.
177 // True if this symbol is specified by --trace-symbol option.
180 // True if this symbol is a linker-synthesized stub function (traps when
181 // called) and should otherwise be treated as missing/undefined. See
182 // SymbolTable::replaceWithUndefined.
183 // These stubs never appear in the table and any table index relocations
184 // against them will produce address 0 (The table index representing
185 // the null function pointer).
190 std::optional
<StringRef
> importName
;
191 std::optional
<StringRef
> importModule
;
194 class FunctionSymbol
: public Symbol
{
196 static bool classof(const Symbol
*s
) {
197 return s
->kind() == DefinedFunctionKind
||
198 s
->kind() == SharedFunctionKind
||
199 s
->kind() == UndefinedFunctionKind
;
202 // Get/set the table index
203 void setTableIndex(uint32_t index
);
204 uint32_t getTableIndex() const;
205 bool hasTableIndex() const;
207 // Get/set the function index
208 uint32_t getFunctionIndex() const;
209 void setFunctionIndex(uint32_t index
);
210 bool hasFunctionIndex() const;
212 const WasmSignature
*signature
;
215 FunctionSymbol(StringRef name
, Kind k
, uint32_t flags
, InputFile
*f
,
216 const WasmSignature
*sig
)
217 : Symbol(name
, k
, flags
, f
), signature(sig
) {}
219 uint32_t tableIndex
= INVALID_INDEX
;
220 uint32_t functionIndex
= INVALID_INDEX
;
223 class DefinedFunction
: public FunctionSymbol
{
225 DefinedFunction(StringRef name
, uint32_t flags
, InputFile
*f
,
226 InputFunction
*function
);
228 static bool classof(const Symbol
*s
) {
229 return s
->kind() == DefinedFunctionKind
;
232 // Get the function index to be used when exporting. This only applies to
233 // defined functions and can be differ from the regular function index for
234 // weakly defined functions (that are imported and used via one index but
235 // defined and exported via another).
236 uint32_t getExportedFunctionIndex() const;
238 InputFunction
*function
;
241 class UndefinedFunction
: public FunctionSymbol
{
243 UndefinedFunction(StringRef name
, std::optional
<StringRef
> importName
,
244 std::optional
<StringRef
> importModule
, uint32_t flags
,
245 InputFile
*file
= nullptr,
246 const WasmSignature
*type
= nullptr,
247 bool isCalledDirectly
= true)
248 : FunctionSymbol(name
, UndefinedFunctionKind
, flags
, file
, type
),
249 isCalledDirectly(isCalledDirectly
) {
250 this->importName
= importName
;
251 this->importModule
= importModule
;
254 static bool classof(const Symbol
*s
) {
255 return s
->kind() == UndefinedFunctionKind
;
258 DefinedFunction
*stubFunction
= nullptr;
259 bool isCalledDirectly
;
262 // Section symbols for output sections are different from those for input
263 // section. These are generated by the linker and point the OutputSection
264 // rather than an InputSection.
265 class OutputSectionSymbol
: public Symbol
{
267 OutputSectionSymbol(const OutputSection
*s
)
268 : Symbol("", OutputSectionKind
, llvm::wasm::WASM_SYMBOL_BINDING_LOCAL
,
272 static bool classof(const Symbol
*s
) {
273 return s
->kind() == OutputSectionKind
;
276 const OutputSection
*section
;
279 class SectionSymbol
: public Symbol
{
281 SectionSymbol(uint32_t flags
, const InputChunk
*s
, InputFile
*f
= nullptr)
282 : Symbol("", SectionKind
, flags
, f
), section(s
) {}
284 static bool classof(const Symbol
*s
) { return s
->kind() == SectionKind
; }
286 const OutputSectionSymbol
*getOutputSectionSymbol() const;
288 const InputChunk
*section
;
291 class DataSymbol
: public Symbol
{
293 static bool classof(const Symbol
*s
) {
294 return s
->kind() == DefinedDataKind
|| s
->kind() == UndefinedDataKind
||
295 s
->kind() == SharedDataKind
;
299 DataSymbol(StringRef name
, Kind k
, uint32_t flags
, InputFile
*f
)
300 : Symbol(name
, k
, flags
, f
) {}
303 class DefinedData
: public DataSymbol
{
305 // Constructor for regular data symbols originating from input files.
306 DefinedData(StringRef name
, uint32_t flags
, InputFile
*f
, InputChunk
*segment
,
307 uint64_t value
, uint64_t size
)
308 : DataSymbol(name
, DefinedDataKind
, flags
, f
), segment(segment
),
309 value(value
), size(size
) {}
311 // Constructor for linker synthetic data symbols.
312 DefinedData(StringRef name
, uint32_t flags
)
313 : DataSymbol(name
, DefinedDataKind
, flags
, nullptr) {}
315 static bool classof(const Symbol
*s
) { return s
->kind() == DefinedDataKind
; }
317 // Returns the output virtual address of a defined data symbol.
318 uint64_t getVA() const;
319 void setVA(uint64_t va
);
321 // Returns the offset of a defined data symbol within its OutputSegment.
322 uint64_t getOutputSegmentOffset() const;
323 uint64_t getOutputSegmentIndex() const;
324 uint64_t getSize() const { return size
; }
326 InputChunk
*segment
= nullptr;
333 class SharedData
: public DataSymbol
{
335 SharedData(StringRef name
, uint32_t flags
, InputFile
*f
)
336 : DataSymbol(name
, SharedDataKind
, flags
, f
) {}
339 class UndefinedData
: public DataSymbol
{
341 UndefinedData(StringRef name
, uint32_t flags
, InputFile
*file
= nullptr)
342 : DataSymbol(name
, UndefinedDataKind
, flags
, file
) {}
343 static bool classof(const Symbol
*s
) {
344 return s
->kind() == UndefinedDataKind
;
348 class GlobalSymbol
: public Symbol
{
350 static bool classof(const Symbol
*s
) {
351 return s
->kind() == DefinedGlobalKind
|| s
->kind() == UndefinedGlobalKind
;
354 const WasmGlobalType
*getGlobalType() const { return globalType
; }
356 // Get/set the global index
357 uint32_t getGlobalIndex() const;
358 void setGlobalIndex(uint32_t index
);
359 bool hasGlobalIndex() const;
362 GlobalSymbol(StringRef name
, Kind k
, uint32_t flags
, InputFile
*f
,
363 const WasmGlobalType
*globalType
)
364 : Symbol(name
, k
, flags
, f
), globalType(globalType
) {}
366 const WasmGlobalType
*globalType
;
367 uint32_t globalIndex
= INVALID_INDEX
;
370 class DefinedGlobal
: public GlobalSymbol
{
372 DefinedGlobal(StringRef name
, uint32_t flags
, InputFile
*file
,
373 InputGlobal
*global
);
375 static bool classof(const Symbol
*s
) {
376 return s
->kind() == DefinedGlobalKind
;
382 class UndefinedGlobal
: public GlobalSymbol
{
384 UndefinedGlobal(StringRef name
, std::optional
<StringRef
> importName
,
385 std::optional
<StringRef
> importModule
, uint32_t flags
,
386 InputFile
*file
= nullptr,
387 const WasmGlobalType
*type
= nullptr)
388 : GlobalSymbol(name
, UndefinedGlobalKind
, flags
, file
, type
) {
389 this->importName
= importName
;
390 this->importModule
= importModule
;
393 static bool classof(const Symbol
*s
) {
394 return s
->kind() == UndefinedGlobalKind
;
398 class TableSymbol
: public Symbol
{
400 static bool classof(const Symbol
*s
) {
401 return s
->kind() == DefinedTableKind
|| s
->kind() == UndefinedTableKind
;
404 const WasmTableType
*getTableType() const { return tableType
; }
405 void setLimits(const WasmLimits
&limits
);
407 // Get/set the table number
408 uint32_t getTableNumber() const;
409 void setTableNumber(uint32_t number
);
410 bool hasTableNumber() const;
413 TableSymbol(StringRef name
, Kind k
, uint32_t flags
, InputFile
*f
,
414 const WasmTableType
*type
)
415 : Symbol(name
, k
, flags
, f
), tableType(type
) {}
417 const WasmTableType
*tableType
;
418 uint32_t tableNumber
= INVALID_INDEX
;
421 class DefinedTable
: public TableSymbol
{
423 DefinedTable(StringRef name
, uint32_t flags
, InputFile
*file
,
426 static bool classof(const Symbol
*s
) { return s
->kind() == DefinedTableKind
; }
431 class UndefinedTable
: public TableSymbol
{
433 UndefinedTable(StringRef name
, std::optional
<StringRef
> importName
,
434 std::optional
<StringRef
> importModule
, uint32_t flags
,
435 InputFile
*file
, const WasmTableType
*type
)
436 : TableSymbol(name
, UndefinedTableKind
, flags
, file
, type
) {
437 this->importName
= importName
;
438 this->importModule
= importModule
;
441 static bool classof(const Symbol
*s
) {
442 return s
->kind() == UndefinedTableKind
;
446 // A tag is a general format to distinguish typed entities. Each tag has an
447 // attribute and a type. Currently the attribute can only specify that the tag
448 // is for an exception tag.
450 // In exception handling, tags are used to distinguish different kinds of
451 // exceptions. For example, they can be used to distinguish different language's
452 // exceptions, e.g., all C++ exceptions have the same tag and Java exceptions
453 // would have a distinct tag. Wasm can filter the exceptions it catches based on
456 // A single TagSymbol object represents a single tag. The C++ exception symbol
457 // is a weak symbol generated in every object file in which exceptions are used,
458 // and is named '__cpp_exception' for linking.
459 class TagSymbol
: public Symbol
{
461 static bool classof(const Symbol
*s
) {
462 return s
->kind() == DefinedTagKind
|| s
->kind() == UndefinedTagKind
;
465 // Get/set the tag index
466 uint32_t getTagIndex() const;
467 void setTagIndex(uint32_t index
);
468 bool hasTagIndex() const;
470 const WasmSignature
*signature
;
473 TagSymbol(StringRef name
, Kind k
, uint32_t flags
, InputFile
*f
,
474 const WasmSignature
*sig
)
475 : Symbol(name
, k
, flags
, f
), signature(sig
) {}
477 uint32_t tagIndex
= INVALID_INDEX
;
480 class DefinedTag
: public TagSymbol
{
482 DefinedTag(StringRef name
, uint32_t flags
, InputFile
*file
, InputTag
*tag
);
484 static bool classof(const Symbol
*s
) { return s
->kind() == DefinedTagKind
; }
489 class UndefinedTag
: public TagSymbol
{
491 UndefinedTag(StringRef name
, std::optional
<StringRef
> importName
,
492 std::optional
<StringRef
> importModule
, uint32_t flags
,
493 InputFile
*file
= nullptr, const WasmSignature
*sig
= nullptr)
494 : TagSymbol(name
, UndefinedTagKind
, flags
, file
, sig
) {
495 this->importName
= importName
;
496 this->importModule
= importModule
;
499 static bool classof(const Symbol
*s
) { return s
->kind() == UndefinedTagKind
; }
502 class SharedFunctionSymbol
: public FunctionSymbol
{
504 SharedFunctionSymbol(StringRef name
, uint32_t flags
, InputFile
*file
,
505 const WasmSignature
*sig
)
506 : FunctionSymbol(name
, SharedFunctionKind
, flags
, file
, sig
) {}
507 static bool classof(const Symbol
*s
) {
508 return s
->kind() == SharedFunctionKind
;
512 // LazySymbol symbols represent symbols in object files between --start-lib and
513 // --end-lib options. LLD also handles traditional archives as if all the files
514 // in the archive are surrounded by --start-lib and --end-lib.
516 // A special complication is the handling of weak undefined symbols. They should
517 // not load a file, but we have to remember we have seen both the weak undefined
518 // and the lazy. We represent that with a lazy symbol with a weak binding. This
519 // means that code looking for undefined symbols normally also has to take lazy
520 // symbols into consideration.
521 class LazySymbol
: public Symbol
{
523 LazySymbol(StringRef name
, uint32_t flags
, InputFile
*file
)
524 : Symbol(name
, LazyKind
, flags
, file
) {}
526 static bool classof(const Symbol
*s
) { return s
->kind() == LazyKind
; }
530 // Lazy symbols can have a signature because they can replace an
531 // UndefinedFunction in which case we need to be able to preserve the
533 // TODO(sbc): This repetition of the signature field is inelegant. Revisit
534 // the use of class hierarchy to represent symbol taxonomy.
535 const WasmSignature
*signature
= nullptr;
538 // linker-generated symbols
541 // Symbol marking the start of the global section.
542 static DefinedData
*globalBase
;
544 // __stack_pointer/__stack_low/__stack_high
545 // Global that holds current value of stack pointer and data symbols marking
546 // the start and end of the stack region. stackPointer is initialized to
547 // stackHigh and grows downwards towards stackLow
548 static GlobalSymbol
*stackPointer
;
549 static DefinedData
*stackLow
;
550 static DefinedData
*stackHigh
;
553 // Global that holds the address of the base of the current thread's
555 static GlobalSymbol
*tlsBase
;
558 // Symbol whose value is the size of the TLS block.
559 static GlobalSymbol
*tlsSize
;
562 // Symbol whose value is the alignment of the TLS block.
563 static GlobalSymbol
*tlsAlign
;
566 // Symbol marking the end of the data and bss.
567 static DefinedData
*dataEnd
;
569 // __heap_base/__heap_end
570 // Symbols marking the beginning and end of the "heap". It starts at the end
571 // of the data, bss and explicit stack, and extends to the end of the linear
572 // memory allocated by wasm-ld. This region of memory is not used by the
573 // linked code, so it may be used as a backing store for `sbrk` or `malloc`
575 static DefinedData
*heapBase
;
576 static DefinedData
*heapEnd
;
578 // __wasm_init_memory_flag
579 // Symbol whose contents are nonzero iff memory has already been initialized.
580 static DefinedData
*initMemoryFlag
;
582 // __wasm_init_memory
583 // Function that initializes passive data segments during instantiation.
584 static DefinedFunction
*initMemory
;
587 // Function that directly calls all ctors in priority order.
588 static DefinedFunction
*callCtors
;
591 // Function that calls the libc/etc. cleanup function.
592 static DefinedFunction
*callDtors
;
594 // __wasm_apply_data_relocs
595 // Function that applies relocations to data segment post-instantiation.
596 static DefinedFunction
*applyDataRelocs
;
598 // __wasm_apply_global_relocs
599 // Function that applies relocations to wasm globals post-instantiation.
600 // Unlike __wasm_apply_data_relocs this needs to run on every thread.
601 static DefinedFunction
*applyGlobalRelocs
;
603 // __wasm_apply_tls_relocs
604 // Like applyDataRelocs but for TLS section. These must be delayed until
606 static DefinedFunction
*applyTLSRelocs
;
608 // __wasm_apply_global_tls_relocs
609 // Like applyGlobalRelocs but for globals that hold TLS addresses. These
610 // must be delayed until __wasm_init_tls.
611 static DefinedFunction
*applyGlobalTLSRelocs
;
614 // Function that allocates thread-local storage and initializes it.
615 static DefinedFunction
*initTLS
;
617 // Pointer to the function that is to be used in the start section.
618 // (normally an alias of initMemory, or applyGlobalRelocs).
619 static DefinedFunction
*startFunction
;
622 // Symbol used in calls to __cxa_atexit to determine current DLL
623 static DefinedData
*dsoHandle
;
626 // Used in PIC code for offset of indirect function table
627 static UndefinedGlobal
*tableBase
;
628 static DefinedData
*definedTableBase
;
631 // Used in PIC code for offset of global data
632 static UndefinedGlobal
*memoryBase
;
633 static DefinedData
*definedMemoryBase
;
635 // __indirect_function_table
636 // Used as an address space for function pointers, with each function that is
637 // used as a function pointer being allocated a slot.
638 static TableSymbol
*indirectFunctionTable
;
641 // A buffer class that is large enough to hold any Symbol-derived
642 // object. We allocate memory using this class and instantiate a symbol
643 // using the placement new.
645 alignas(DefinedFunction
) char a
[sizeof(DefinedFunction
)];
646 alignas(DefinedData
) char b
[sizeof(DefinedData
)];
647 alignas(DefinedGlobal
) char c
[sizeof(DefinedGlobal
)];
648 alignas(DefinedTag
) char d
[sizeof(DefinedTag
)];
649 alignas(DefinedTable
) char e
[sizeof(DefinedTable
)];
650 alignas(LazySymbol
) char f
[sizeof(LazySymbol
)];
651 alignas(UndefinedFunction
) char g
[sizeof(UndefinedFunction
)];
652 alignas(UndefinedData
) char h
[sizeof(UndefinedData
)];
653 alignas(UndefinedGlobal
) char i
[sizeof(UndefinedGlobal
)];
654 alignas(UndefinedTable
) char j
[sizeof(UndefinedTable
)];
655 alignas(SectionSymbol
) char k
[sizeof(SectionSymbol
)];
656 alignas(SharedFunctionSymbol
) char l
[sizeof(SharedFunctionSymbol
)];
659 // It is important to keep the size of SymbolUnion small for performance and
660 // memory usage reasons. 96 bytes is a soft limit based on the size of
661 // UndefinedFunction on a 64-bit system.
662 static_assert(sizeof(SymbolUnion
) <= 120, "SymbolUnion too large");
664 void printTraceSymbol(Symbol
*sym
);
665 void printTraceSymbolUndefined(StringRef name
, const InputFile
* file
);
667 template <typename T
, typename
... ArgT
>
668 T
*replaceSymbol(Symbol
*s
, ArgT
&&... arg
) {
669 static_assert(std::is_trivially_destructible
<T
>(),
670 "Symbol types must be trivially destructible");
671 static_assert(sizeof(T
) <= sizeof(SymbolUnion
), "SymbolUnion too small");
672 static_assert(alignof(T
) <= alignof(SymbolUnion
),
673 "SymbolUnion not aligned enough");
674 assert(static_cast<Symbol
*>(static_cast<T
*>(nullptr)) == nullptr &&
679 T
*s2
= new (s
) T(std::forward
<ArgT
>(arg
)...);
680 s2
->isUsedInRegularObj
= symCopy
.isUsedInRegularObj
;
681 s2
->forceExport
= symCopy
.forceExport
;
682 s2
->forceImport
= symCopy
.forceImport
;
683 s2
->canInline
= symCopy
.canInline
;
684 s2
->traced
= symCopy
.traced
;
685 s2
->referenced
= symCopy
.referenced
;
687 // Print out a log message if --trace-symbol was specified.
688 // This is for debugging.
690 printTraceSymbol(s2
);
697 // Returns a symbol name for an error message.
698 std::string
toString(const wasm::Symbol
&sym
);
699 std::string
toString(wasm::Symbol::Kind kind
);
700 std::string
maybeDemangleSymbol(StringRef name
);