Revert "[libc] Use best-fit binary trie to make malloc logarithmic" (#117065)
[llvm-project.git] / lld / wasm / Symbols.h
blob80b658773bd20b212dadbe3331b361eb2b3ba5af
1 //===- Symbols.h ------------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #ifndef LLD_WASM_SYMBOLS_H
10 #define LLD_WASM_SYMBOLS_H
12 #include "Config.h"
13 #include "lld/Common/LLVM.h"
14 #include "llvm/Object/Archive.h"
15 #include "llvm/Object/Wasm.h"
16 #include <optional>
18 namespace lld {
19 namespace wasm {
21 // Shared string constants
23 // The default module name to use for symbol imports.
24 extern const char *defaultModule;
26 // The name under which to import or export the wasm table.
27 extern const char *functionTableName;
29 // The name under which to import or export the wasm memory.
30 extern const char *memoryName;
32 using llvm::wasm::WasmSymbolType;
34 class InputFile;
35 class InputChunk;
36 class InputSegment;
37 class InputFunction;
38 class InputGlobal;
39 class InputTag;
40 class InputSection;
41 class InputTable;
42 class OutputSection;
44 #define INVALID_INDEX UINT32_MAX
46 // The base class for real symbol classes.
47 class Symbol {
48 public:
49 enum Kind : uint8_t {
50 DefinedFunctionKind,
51 DefinedDataKind,
52 DefinedGlobalKind,
53 DefinedTagKind,
54 DefinedTableKind,
55 SectionKind,
56 OutputSectionKind,
57 UndefinedFunctionKind,
58 UndefinedDataKind,
59 UndefinedGlobalKind,
60 UndefinedTableKind,
61 UndefinedTagKind,
62 LazyKind,
63 SharedFunctionKind,
64 SharedDataKind,
67 Kind kind() const { return symbolKind; }
69 bool isDefined() const { return !isLazy() && !isUndefined(); }
71 bool isUndefined() const {
72 return symbolKind == UndefinedFunctionKind ||
73 symbolKind == UndefinedDataKind ||
74 symbolKind == UndefinedGlobalKind ||
75 symbolKind == UndefinedTableKind || symbolKind == UndefinedTagKind;
78 bool isLazy() const { return symbolKind == LazyKind; }
79 bool isShared() const {
80 return symbolKind == SharedFunctionKind || symbolKind == SharedDataKind;
83 bool isLocal() const;
84 bool isWeak() const;
85 bool isHidden() const;
86 bool isTLS() const;
88 // Returns true if this symbol exists in a discarded (due to COMDAT) section
89 bool isDiscarded() const;
91 // True if this is an undefined weak symbol. This only works once
92 // all input files have been added.
93 bool isUndefWeak() const {
94 // See comment on lazy symbols for details.
95 return isWeak() && (isUndefined() || isLazy());
98 // Returns the symbol name.
99 StringRef getName() const { return name; }
101 // Returns the file from which this symbol was created.
102 InputFile *getFile() const { return file; }
104 InputChunk *getChunk() const;
106 // Indicates that the section or import for this symbol will be included in
107 // the final image.
108 bool isLive() const;
110 // Marks the symbol's InputChunk as Live, so that it will be included in the
111 // final image.
112 void markLive();
114 void setHidden(bool isHidden);
116 // Get/set the index in the output symbol table. This is only used for
117 // relocatable output.
118 uint32_t getOutputSymbolIndex() const;
119 void setOutputSymbolIndex(uint32_t index);
121 WasmSymbolType getWasmType() const;
122 bool isImported() const;
123 bool isExported() const;
124 bool isExportedExplicit() const;
126 // Indicates that the symbol is used in an __attribute__((used)) directive
127 // or similar.
128 bool isNoStrip() const;
130 const WasmSignature* getSignature() const;
132 uint32_t getGOTIndex() const {
133 assert(gotIndex != INVALID_INDEX);
134 return gotIndex;
137 void setGOTIndex(uint32_t index);
138 bool hasGOTIndex() const { return gotIndex != INVALID_INDEX; }
140 protected:
141 Symbol(StringRef name, Kind k, uint32_t flags, InputFile *f)
142 : name(name), file(f), symbolKind(k), referenced(!config->gcSections),
143 requiresGOT(false), isUsedInRegularObj(false), forceExport(false),
144 forceImport(false), canInline(false), traced(false), isStub(false),
145 flags(flags) {}
147 StringRef name;
148 InputFile *file;
149 uint32_t outputSymbolIndex = INVALID_INDEX;
150 uint32_t gotIndex = INVALID_INDEX;
151 Kind symbolKind;
153 public:
154 bool referenced : 1;
156 // True for data symbols that needs a dummy GOT entry. Used for static
157 // linking of GOT accesses.
158 bool requiresGOT : 1;
160 // True if the symbol was used for linking and thus need to be added to the
161 // output file's symbol table. This is true for all symbols except for
162 // unreferenced DSO symbols, lazy (archive) symbols, and bitcode symbols that
163 // are unreferenced except by other bitcode objects.
164 bool isUsedInRegularObj : 1;
166 // True if this symbol is explicitly marked for export (i.e. via the
167 // -e/--export command line flag)
168 bool forceExport : 1;
170 bool forceImport : 1;
172 // False if LTO shouldn't inline whatever this symbol points to. If a symbol
173 // is overwritten after LTO, LTO shouldn't inline the symbol because it
174 // doesn't know the final contents of the symbol.
175 bool canInline : 1;
177 // True if this symbol is specified by --trace-symbol option.
178 bool traced : 1;
180 // True if this symbol is a linker-synthesized stub function (traps when
181 // called) and should otherwise be treated as missing/undefined. See
182 // SymbolTable::replaceWithUndefined.
183 // These stubs never appear in the table and any table index relocations
184 // against them will produce address 0 (The table index representing
185 // the null function pointer).
186 bool isStub : 1;
188 uint32_t flags;
190 std::optional<StringRef> importName;
191 std::optional<StringRef> importModule;
194 class FunctionSymbol : public Symbol {
195 public:
196 static bool classof(const Symbol *s) {
197 return s->kind() == DefinedFunctionKind ||
198 s->kind() == SharedFunctionKind ||
199 s->kind() == UndefinedFunctionKind;
202 // Get/set the table index
203 void setTableIndex(uint32_t index);
204 uint32_t getTableIndex() const;
205 bool hasTableIndex() const;
207 // Get/set the function index
208 uint32_t getFunctionIndex() const;
209 void setFunctionIndex(uint32_t index);
210 bool hasFunctionIndex() const;
212 const WasmSignature *signature;
214 protected:
215 FunctionSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f,
216 const WasmSignature *sig)
217 : Symbol(name, k, flags, f), signature(sig) {}
219 uint32_t tableIndex = INVALID_INDEX;
220 uint32_t functionIndex = INVALID_INDEX;
223 class DefinedFunction : public FunctionSymbol {
224 public:
225 DefinedFunction(StringRef name, uint32_t flags, InputFile *f,
226 InputFunction *function);
228 static bool classof(const Symbol *s) {
229 return s->kind() == DefinedFunctionKind;
232 // Get the function index to be used when exporting. This only applies to
233 // defined functions and can be differ from the regular function index for
234 // weakly defined functions (that are imported and used via one index but
235 // defined and exported via another).
236 uint32_t getExportedFunctionIndex() const;
238 InputFunction *function;
241 class UndefinedFunction : public FunctionSymbol {
242 public:
243 UndefinedFunction(StringRef name, std::optional<StringRef> importName,
244 std::optional<StringRef> importModule, uint32_t flags,
245 InputFile *file = nullptr,
246 const WasmSignature *type = nullptr,
247 bool isCalledDirectly = true)
248 : FunctionSymbol(name, UndefinedFunctionKind, flags, file, type),
249 isCalledDirectly(isCalledDirectly) {
250 this->importName = importName;
251 this->importModule = importModule;
254 static bool classof(const Symbol *s) {
255 return s->kind() == UndefinedFunctionKind;
258 DefinedFunction *stubFunction = nullptr;
259 bool isCalledDirectly;
262 // Section symbols for output sections are different from those for input
263 // section. These are generated by the linker and point the OutputSection
264 // rather than an InputSection.
265 class OutputSectionSymbol : public Symbol {
266 public:
267 OutputSectionSymbol(const OutputSection *s)
268 : Symbol("", OutputSectionKind, llvm::wasm::WASM_SYMBOL_BINDING_LOCAL,
269 nullptr),
270 section(s) {}
272 static bool classof(const Symbol *s) {
273 return s->kind() == OutputSectionKind;
276 const OutputSection *section;
279 class SectionSymbol : public Symbol {
280 public:
281 SectionSymbol(uint32_t flags, const InputChunk *s, InputFile *f = nullptr)
282 : Symbol("", SectionKind, flags, f), section(s) {}
284 static bool classof(const Symbol *s) { return s->kind() == SectionKind; }
286 const OutputSectionSymbol *getOutputSectionSymbol() const;
288 const InputChunk *section;
291 class DataSymbol : public Symbol {
292 public:
293 static bool classof(const Symbol *s) {
294 return s->kind() == DefinedDataKind || s->kind() == UndefinedDataKind ||
295 s->kind() == SharedDataKind;
298 protected:
299 DataSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f)
300 : Symbol(name, k, flags, f) {}
303 class DefinedData : public DataSymbol {
304 public:
305 // Constructor for regular data symbols originating from input files.
306 DefinedData(StringRef name, uint32_t flags, InputFile *f, InputChunk *segment,
307 uint64_t value, uint64_t size)
308 : DataSymbol(name, DefinedDataKind, flags, f), segment(segment),
309 value(value), size(size) {}
311 // Constructor for linker synthetic data symbols.
312 DefinedData(StringRef name, uint32_t flags)
313 : DataSymbol(name, DefinedDataKind, flags, nullptr) {}
315 static bool classof(const Symbol *s) { return s->kind() == DefinedDataKind; }
317 // Returns the output virtual address of a defined data symbol.
318 // For TLS symbols, by default (unless absolute is set), this returns an
319 // address relative the `__tls_base`.
320 uint64_t getVA(bool absolute = false) const;
321 void setVA(uint64_t va);
323 // Returns the offset of a defined data symbol within its OutputSegment.
324 uint64_t getOutputSegmentOffset() const;
325 uint64_t getOutputSegmentIndex() const;
326 uint64_t getSize() const { return size; }
328 InputChunk *segment = nullptr;
329 uint64_t value = 0;
331 protected:
332 uint64_t size = 0;
335 class SharedData : public DataSymbol {
336 public:
337 SharedData(StringRef name, uint32_t flags, InputFile *f)
338 : DataSymbol(name, SharedDataKind, flags, f) {}
341 class UndefinedData : public DataSymbol {
342 public:
343 UndefinedData(StringRef name, uint32_t flags, InputFile *file = nullptr)
344 : DataSymbol(name, UndefinedDataKind, flags, file) {}
345 static bool classof(const Symbol *s) {
346 return s->kind() == UndefinedDataKind;
350 class GlobalSymbol : public Symbol {
351 public:
352 static bool classof(const Symbol *s) {
353 return s->kind() == DefinedGlobalKind || s->kind() == UndefinedGlobalKind;
356 const WasmGlobalType *getGlobalType() const { return globalType; }
358 // Get/set the global index
359 uint32_t getGlobalIndex() const;
360 void setGlobalIndex(uint32_t index);
361 bool hasGlobalIndex() const;
363 protected:
364 GlobalSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f,
365 const WasmGlobalType *globalType)
366 : Symbol(name, k, flags, f), globalType(globalType) {}
368 const WasmGlobalType *globalType;
369 uint32_t globalIndex = INVALID_INDEX;
372 class DefinedGlobal : public GlobalSymbol {
373 public:
374 DefinedGlobal(StringRef name, uint32_t flags, InputFile *file,
375 InputGlobal *global);
377 static bool classof(const Symbol *s) {
378 return s->kind() == DefinedGlobalKind;
381 InputGlobal *global;
384 class UndefinedGlobal : public GlobalSymbol {
385 public:
386 UndefinedGlobal(StringRef name, std::optional<StringRef> importName,
387 std::optional<StringRef> importModule, uint32_t flags,
388 InputFile *file = nullptr,
389 const WasmGlobalType *type = nullptr)
390 : GlobalSymbol(name, UndefinedGlobalKind, flags, file, type) {
391 this->importName = importName;
392 this->importModule = importModule;
395 static bool classof(const Symbol *s) {
396 return s->kind() == UndefinedGlobalKind;
400 class TableSymbol : public Symbol {
401 public:
402 static bool classof(const Symbol *s) {
403 return s->kind() == DefinedTableKind || s->kind() == UndefinedTableKind;
406 const WasmTableType *getTableType() const { return tableType; }
407 void setLimits(const WasmLimits &limits);
409 // Get/set the table number
410 uint32_t getTableNumber() const;
411 void setTableNumber(uint32_t number);
412 bool hasTableNumber() const;
414 protected:
415 TableSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f,
416 const WasmTableType *type)
417 : Symbol(name, k, flags, f), tableType(type) {}
419 const WasmTableType *tableType;
420 uint32_t tableNumber = INVALID_INDEX;
423 class DefinedTable : public TableSymbol {
424 public:
425 DefinedTable(StringRef name, uint32_t flags, InputFile *file,
426 InputTable *table);
428 static bool classof(const Symbol *s) { return s->kind() == DefinedTableKind; }
430 InputTable *table;
433 class UndefinedTable : public TableSymbol {
434 public:
435 UndefinedTable(StringRef name, std::optional<StringRef> importName,
436 std::optional<StringRef> importModule, uint32_t flags,
437 InputFile *file, const WasmTableType *type)
438 : TableSymbol(name, UndefinedTableKind, flags, file, type) {
439 this->importName = importName;
440 this->importModule = importModule;
443 static bool classof(const Symbol *s) {
444 return s->kind() == UndefinedTableKind;
448 // A tag is a general format to distinguish typed entities. Each tag has an
449 // attribute and a type. Currently the attribute can only specify that the tag
450 // is for an exception tag.
452 // In exception handling, tags are used to distinguish different kinds of
453 // exceptions. For example, they can be used to distinguish different language's
454 // exceptions, e.g., all C++ exceptions have the same tag and Java exceptions
455 // would have a distinct tag. Wasm can filter the exceptions it catches based on
456 // their tag.
458 // A single TagSymbol object represents a single tag. The C++ exception symbol
459 // is a weak symbol generated in every object file in which exceptions are used,
460 // and is named '__cpp_exception' for linking.
461 class TagSymbol : public Symbol {
462 public:
463 static bool classof(const Symbol *s) {
464 return s->kind() == DefinedTagKind || s->kind() == UndefinedTagKind;
467 // Get/set the tag index
468 uint32_t getTagIndex() const;
469 void setTagIndex(uint32_t index);
470 bool hasTagIndex() const;
472 const WasmSignature *signature;
474 protected:
475 TagSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f,
476 const WasmSignature *sig)
477 : Symbol(name, k, flags, f), signature(sig) {}
479 uint32_t tagIndex = INVALID_INDEX;
482 class DefinedTag : public TagSymbol {
483 public:
484 DefinedTag(StringRef name, uint32_t flags, InputFile *file, InputTag *tag);
486 static bool classof(const Symbol *s) { return s->kind() == DefinedTagKind; }
488 InputTag *tag;
491 class UndefinedTag : public TagSymbol {
492 public:
493 UndefinedTag(StringRef name, std::optional<StringRef> importName,
494 std::optional<StringRef> importModule, uint32_t flags,
495 InputFile *file = nullptr, const WasmSignature *sig = nullptr)
496 : TagSymbol(name, UndefinedTagKind, flags, file, sig) {
497 this->importName = importName;
498 this->importModule = importModule;
501 static bool classof(const Symbol *s) { return s->kind() == UndefinedTagKind; }
504 class SharedFunctionSymbol : public FunctionSymbol {
505 public:
506 SharedFunctionSymbol(StringRef name, uint32_t flags, InputFile *file,
507 const WasmSignature *sig)
508 : FunctionSymbol(name, SharedFunctionKind, flags, file, sig) {}
509 static bool classof(const Symbol *s) {
510 return s->kind() == SharedFunctionKind;
514 // LazySymbol symbols represent symbols in object files between --start-lib and
515 // --end-lib options. LLD also handles traditional archives as if all the files
516 // in the archive are surrounded by --start-lib and --end-lib.
518 // A special complication is the handling of weak undefined symbols. They should
519 // not load a file, but we have to remember we have seen both the weak undefined
520 // and the lazy. We represent that with a lazy symbol with a weak binding. This
521 // means that code looking for undefined symbols normally also has to take lazy
522 // symbols into consideration.
523 class LazySymbol : public Symbol {
524 public:
525 LazySymbol(StringRef name, uint32_t flags, InputFile *file)
526 : Symbol(name, LazyKind, flags, file) {}
528 static bool classof(const Symbol *s) { return s->kind() == LazyKind; }
529 void extract();
530 void setWeak();
532 // Lazy symbols can have a signature because they can replace an
533 // UndefinedFunction in which case we need to be able to preserve the
534 // signature.
535 // TODO(sbc): This repetition of the signature field is inelegant. Revisit
536 // the use of class hierarchy to represent symbol taxonomy.
537 const WasmSignature *signature = nullptr;
540 // linker-generated symbols
541 struct WasmSym {
542 // __global_base
543 // Symbol marking the start of the global section.
544 static DefinedData *globalBase;
546 // __stack_pointer/__stack_low/__stack_high
547 // Global that holds current value of stack pointer and data symbols marking
548 // the start and end of the stack region. stackPointer is initialized to
549 // stackHigh and grows downwards towards stackLow
550 static GlobalSymbol *stackPointer;
551 static DefinedData *stackLow;
552 static DefinedData *stackHigh;
554 // __tls_base
555 // Global that holds the address of the base of the current thread's
556 // TLS block.
557 static GlobalSymbol *tlsBase;
559 // __tls_size
560 // Symbol whose value is the size of the TLS block.
561 static GlobalSymbol *tlsSize;
563 // __tls_size
564 // Symbol whose value is the alignment of the TLS block.
565 static GlobalSymbol *tlsAlign;
567 // __data_end
568 // Symbol marking the end of the data and bss.
569 static DefinedData *dataEnd;
571 // __heap_base/__heap_end
572 // Symbols marking the beginning and end of the "heap". It starts at the end
573 // of the data, bss and explicit stack, and extends to the end of the linear
574 // memory allocated by wasm-ld. This region of memory is not used by the
575 // linked code, so it may be used as a backing store for `sbrk` or `malloc`
576 // implementations.
577 static DefinedData *heapBase;
578 static DefinedData *heapEnd;
580 // __wasm_init_memory_flag
581 // Symbol whose contents are nonzero iff memory has already been initialized.
582 static DefinedData *initMemoryFlag;
584 // __wasm_init_memory
585 // Function that initializes passive data segments during instantiation.
586 static DefinedFunction *initMemory;
588 // __wasm_call_ctors
589 // Function that directly calls all ctors in priority order.
590 static DefinedFunction *callCtors;
592 // __wasm_call_dtors
593 // Function that calls the libc/etc. cleanup function.
594 static DefinedFunction *callDtors;
596 // __wasm_apply_global_relocs
597 // Function that applies relocations to wasm globals post-instantiation.
598 // Unlike __wasm_apply_data_relocs this needs to run on every thread.
599 static DefinedFunction *applyGlobalRelocs;
601 // __wasm_apply_tls_relocs
602 // Like __wasm_apply_data_relocs but for TLS section. These must be
603 // delayed until __wasm_init_tls.
604 static DefinedFunction *applyTLSRelocs;
606 // __wasm_apply_global_tls_relocs
607 // Like applyGlobalRelocs but for globals that hold TLS addresses. These
608 // must be delayed until __wasm_init_tls.
609 static DefinedFunction *applyGlobalTLSRelocs;
611 // __wasm_init_tls
612 // Function that allocates thread-local storage and initializes it.
613 static DefinedFunction *initTLS;
615 // Pointer to the function that is to be used in the start section.
616 // (normally an alias of initMemory, or applyGlobalRelocs).
617 static DefinedFunction *startFunction;
619 // __dso_handle
620 // Symbol used in calls to __cxa_atexit to determine current DLL
621 static DefinedData *dsoHandle;
623 // __table_base
624 // Used in PIC code for offset of indirect function table
625 static UndefinedGlobal *tableBase;
626 static DefinedData *definedTableBase;
628 // __memory_base
629 // Used in PIC code for offset of global data
630 static UndefinedGlobal *memoryBase;
631 static DefinedData *definedMemoryBase;
633 // __indirect_function_table
634 // Used as an address space for function pointers, with each function that is
635 // used as a function pointer being allocated a slot.
636 static TableSymbol *indirectFunctionTable;
639 // A buffer class that is large enough to hold any Symbol-derived
640 // object. We allocate memory using this class and instantiate a symbol
641 // using the placement new.
642 union SymbolUnion {
643 alignas(DefinedFunction) char a[sizeof(DefinedFunction)];
644 alignas(DefinedData) char b[sizeof(DefinedData)];
645 alignas(DefinedGlobal) char c[sizeof(DefinedGlobal)];
646 alignas(DefinedTag) char d[sizeof(DefinedTag)];
647 alignas(DefinedTable) char e[sizeof(DefinedTable)];
648 alignas(LazySymbol) char f[sizeof(LazySymbol)];
649 alignas(UndefinedFunction) char g[sizeof(UndefinedFunction)];
650 alignas(UndefinedData) char h[sizeof(UndefinedData)];
651 alignas(UndefinedGlobal) char i[sizeof(UndefinedGlobal)];
652 alignas(UndefinedTable) char j[sizeof(UndefinedTable)];
653 alignas(SectionSymbol) char k[sizeof(SectionSymbol)];
654 alignas(SharedFunctionSymbol) char l[sizeof(SharedFunctionSymbol)];
657 // It is important to keep the size of SymbolUnion small for performance and
658 // memory usage reasons. 96 bytes is a soft limit based on the size of
659 // UndefinedFunction on a 64-bit system.
660 static_assert(sizeof(SymbolUnion) <= 120, "SymbolUnion too large");
662 void printTraceSymbol(Symbol *sym);
663 void printTraceSymbolUndefined(StringRef name, const InputFile* file);
665 template <typename T, typename... ArgT>
666 T *replaceSymbol(Symbol *s, ArgT &&... arg) {
667 static_assert(std::is_trivially_destructible<T>(),
668 "Symbol types must be trivially destructible");
669 static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small");
670 static_assert(alignof(T) <= alignof(SymbolUnion),
671 "SymbolUnion not aligned enough");
672 assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr &&
673 "Not a Symbol");
675 Symbol symCopy = *s;
677 T *s2 = new (s) T(std::forward<ArgT>(arg)...);
678 s2->isUsedInRegularObj = symCopy.isUsedInRegularObj;
679 s2->forceExport = symCopy.forceExport;
680 s2->forceImport = symCopy.forceImport;
681 s2->canInline = symCopy.canInline;
682 s2->traced = symCopy.traced;
683 s2->referenced = symCopy.referenced;
685 // Print out a log message if --trace-symbol was specified.
686 // This is for debugging.
687 if (s2->traced)
688 printTraceSymbol(s2);
690 return s2;
693 } // namespace wasm
695 // Returns a symbol name for an error message.
696 std::string toString(const wasm::Symbol &sym);
697 std::string toString(wasm::Symbol::Kind kind);
698 std::string maybeDemangleSymbol(StringRef name);
700 } // namespace lld
702 #endif