Revert rGe6ccb57bb3f6b761f2310e97fd6ca99eff42f73e "[SLP] Add cost model for `llvm...
[llvm-project.git] / lld / wasm / Symbols.h
blobc17b720a90fae72b06d1845a24f1b6548fd8514f
1 //===- Symbols.h ------------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #ifndef LLD_WASM_SYMBOLS_H
10 #define LLD_WASM_SYMBOLS_H
12 #include "Config.h"
13 #include "lld/Common/LLVM.h"
14 #include "llvm/ADT/Optional.h"
15 #include "llvm/Object/Archive.h"
16 #include "llvm/Object/Wasm.h"
18 namespace lld {
19 namespace wasm {
21 // Shared string constants
23 // The default module name to use for symbol imports.
24 extern const char *defaultModule;
26 // The name under which to import or export the wasm table.
27 extern const char *functionTableName;
29 using llvm::wasm::WasmSymbolType;
31 class InputFile;
32 class InputChunk;
33 class InputSegment;
34 class InputFunction;
35 class InputGlobal;
36 class InputTag;
37 class InputSection;
38 class InputTable;
39 class OutputSection;
41 #define INVALID_INDEX UINT32_MAX
43 // The base class for real symbol classes.
44 class Symbol {
45 public:
46 enum Kind : uint8_t {
47 DefinedFunctionKind,
48 DefinedDataKind,
49 DefinedGlobalKind,
50 DefinedTagKind,
51 DefinedTableKind,
52 SectionKind,
53 OutputSectionKind,
54 UndefinedFunctionKind,
55 UndefinedDataKind,
56 UndefinedGlobalKind,
57 UndefinedTableKind,
58 UndefinedTagKind,
59 LazyKind,
62 Kind kind() const { return symbolKind; }
64 bool isDefined() const { return !isLazy() && !isUndefined(); }
66 bool isUndefined() const {
67 return symbolKind == UndefinedFunctionKind ||
68 symbolKind == UndefinedDataKind ||
69 symbolKind == UndefinedGlobalKind ||
70 symbolKind == UndefinedTableKind || symbolKind == UndefinedTagKind;
73 bool isLazy() const { return symbolKind == LazyKind; }
75 bool isLocal() const;
76 bool isWeak() const;
77 bool isHidden() const;
78 bool isTLS() const;
80 // Returns true if this symbol exists in a discarded (due to COMDAT) section
81 bool isDiscarded() const;
83 // True if this is an undefined weak symbol. This only works once
84 // all input files have been added.
85 bool isUndefWeak() const {
86 // See comment on lazy symbols for details.
87 return isWeak() && (isUndefined() || isLazy());
90 // Returns the symbol name.
91 StringRef getName() const { return name; }
93 // Returns the file from which this symbol was created.
94 InputFile *getFile() const { return file; }
96 InputChunk *getChunk() const;
98 // Indicates that the section or import for this symbol will be included in
99 // the final image.
100 bool isLive() const;
102 // Marks the symbol's InputChunk as Live, so that it will be included in the
103 // final image.
104 void markLive();
106 void setHidden(bool isHidden);
108 // Get/set the index in the output symbol table. This is only used for
109 // relocatable output.
110 uint32_t getOutputSymbolIndex() const;
111 void setOutputSymbolIndex(uint32_t index);
113 WasmSymbolType getWasmType() const;
114 bool isExported() const;
115 bool isExportedExplicit() const;
117 // Indicates that the symbol is used in an __attribute__((used)) directive
118 // or similar.
119 bool isNoStrip() const;
121 const WasmSignature* getSignature() const;
123 uint32_t getGOTIndex() const {
124 assert(gotIndex != INVALID_INDEX);
125 return gotIndex;
128 void setGOTIndex(uint32_t index);
129 bool hasGOTIndex() const { return gotIndex != INVALID_INDEX; }
131 protected:
132 Symbol(StringRef name, Kind k, uint32_t flags, InputFile *f)
133 : name(name), file(f), symbolKind(k), referenced(!config->gcSections),
134 requiresGOT(false), isUsedInRegularObj(false), forceExport(false),
135 canInline(false), traced(false), isStub(false), flags(flags) {}
137 StringRef name;
138 InputFile *file;
139 uint32_t outputSymbolIndex = INVALID_INDEX;
140 uint32_t gotIndex = INVALID_INDEX;
141 Kind symbolKind;
143 public:
144 bool referenced : 1;
146 // True for data symbols that needs a dummy GOT entry. Used for static
147 // linking of GOT accesses.
148 bool requiresGOT : 1;
150 // True if the symbol was used for linking and thus need to be added to the
151 // output file's symbol table. This is true for all symbols except for
152 // unreferenced DSO symbols, lazy (archive) symbols, and bitcode symbols that
153 // are unreferenced except by other bitcode objects.
154 bool isUsedInRegularObj : 1;
156 // True if this symbol is explicitly marked for export (i.e. via the
157 // -e/--export command line flag)
158 bool forceExport : 1;
160 // False if LTO shouldn't inline whatever this symbol points to. If a symbol
161 // is overwritten after LTO, LTO shouldn't inline the symbol because it
162 // doesn't know the final contents of the symbol.
163 bool canInline : 1;
165 // True if this symbol is specified by --trace-symbol option.
166 bool traced : 1;
168 // True if this symbol is a linker-synthesized stub function (traps when
169 // called) and should otherwise be treated as missing/undefined. See
170 // SymbolTable::replaceWithUndefined.
171 // These stubs never appear in the table and any table index relocations
172 // against them will produce address 0 (The table index representing
173 // the null function pointer).
174 bool isStub : 1;
176 uint32_t flags;
178 llvm::Optional<StringRef> importName;
179 llvm::Optional<StringRef> importModule;
182 class FunctionSymbol : public Symbol {
183 public:
184 static bool classof(const Symbol *s) {
185 return s->kind() == DefinedFunctionKind ||
186 s->kind() == UndefinedFunctionKind;
189 // Get/set the table index
190 void setTableIndex(uint32_t index);
191 uint32_t getTableIndex() const;
192 bool hasTableIndex() const;
194 // Get/set the function index
195 uint32_t getFunctionIndex() const;
196 void setFunctionIndex(uint32_t index);
197 bool hasFunctionIndex() const;
199 const WasmSignature *signature;
201 protected:
202 FunctionSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f,
203 const WasmSignature *sig)
204 : Symbol(name, k, flags, f), signature(sig) {}
206 uint32_t tableIndex = INVALID_INDEX;
207 uint32_t functionIndex = INVALID_INDEX;
210 class DefinedFunction : public FunctionSymbol {
211 public:
212 DefinedFunction(StringRef name, uint32_t flags, InputFile *f,
213 InputFunction *function);
215 static bool classof(const Symbol *s) {
216 return s->kind() == DefinedFunctionKind;
219 // Get the function index to be used when exporting. This only applies to
220 // defined functions and can be differ from the regular function index for
221 // weakly defined functions (that are imported and used via one index but
222 // defined and exported via another).
223 uint32_t getExportedFunctionIndex() const;
225 InputFunction *function;
228 class UndefinedFunction : public FunctionSymbol {
229 public:
230 UndefinedFunction(StringRef name, llvm::Optional<StringRef> importName,
231 llvm::Optional<StringRef> importModule, uint32_t flags,
232 InputFile *file = nullptr,
233 const WasmSignature *type = nullptr,
234 bool isCalledDirectly = true)
235 : FunctionSymbol(name, UndefinedFunctionKind, flags, file, type),
236 isCalledDirectly(isCalledDirectly) {
237 this->importName = importName;
238 this->importModule = importModule;
241 static bool classof(const Symbol *s) {
242 return s->kind() == UndefinedFunctionKind;
245 DefinedFunction *stubFunction = nullptr;
246 bool isCalledDirectly;
249 // Section symbols for output sections are different from those for input
250 // section. These are generated by the linker and point the OutputSection
251 // rather than an InputSection.
252 class OutputSectionSymbol : public Symbol {
253 public:
254 OutputSectionSymbol(const OutputSection *s)
255 : Symbol("", OutputSectionKind, llvm::wasm::WASM_SYMBOL_BINDING_LOCAL,
256 nullptr),
257 section(s) {}
259 static bool classof(const Symbol *s) {
260 return s->kind() == OutputSectionKind;
263 const OutputSection *section;
266 class SectionSymbol : public Symbol {
267 public:
268 SectionSymbol(uint32_t flags, const InputChunk *s, InputFile *f = nullptr)
269 : Symbol("", SectionKind, flags, f), section(s) {}
271 static bool classof(const Symbol *s) { return s->kind() == SectionKind; }
273 const OutputSectionSymbol *getOutputSectionSymbol() const;
275 const InputChunk *section;
278 class DataSymbol : public Symbol {
279 public:
280 static bool classof(const Symbol *s) {
281 return s->kind() == DefinedDataKind || s->kind() == UndefinedDataKind;
284 protected:
285 DataSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f)
286 : Symbol(name, k, flags, f) {}
289 class DefinedData : public DataSymbol {
290 public:
291 // Constructor for regular data symbols originating from input files.
292 DefinedData(StringRef name, uint32_t flags, InputFile *f, InputChunk *segment,
293 uint64_t value, uint64_t size)
294 : DataSymbol(name, DefinedDataKind, flags, f), segment(segment),
295 value(value), size(size) {}
297 // Constructor for linker synthetic data symbols.
298 DefinedData(StringRef name, uint32_t flags)
299 : DataSymbol(name, DefinedDataKind, flags, nullptr) {}
301 static bool classof(const Symbol *s) { return s->kind() == DefinedDataKind; }
303 // Returns the output virtual address of a defined data symbol.
304 uint64_t getVA() const;
305 void setVA(uint64_t va);
307 // Returns the offset of a defined data symbol within its OutputSegment.
308 uint64_t getOutputSegmentOffset() const;
309 uint64_t getOutputSegmentIndex() const;
310 uint64_t getSize() const { return size; }
312 InputChunk *segment = nullptr;
313 uint64_t value = 0;
315 protected:
316 uint64_t size = 0;
319 class UndefinedData : public DataSymbol {
320 public:
321 UndefinedData(StringRef name, uint32_t flags, InputFile *file = nullptr)
322 : DataSymbol(name, UndefinedDataKind, flags, file) {}
323 static bool classof(const Symbol *s) {
324 return s->kind() == UndefinedDataKind;
328 class GlobalSymbol : public Symbol {
329 public:
330 static bool classof(const Symbol *s) {
331 return s->kind() == DefinedGlobalKind || s->kind() == UndefinedGlobalKind;
334 const WasmGlobalType *getGlobalType() const { return globalType; }
336 // Get/set the global index
337 uint32_t getGlobalIndex() const;
338 void setGlobalIndex(uint32_t index);
339 bool hasGlobalIndex() const;
341 protected:
342 GlobalSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f,
343 const WasmGlobalType *globalType)
344 : Symbol(name, k, flags, f), globalType(globalType) {}
346 const WasmGlobalType *globalType;
347 uint32_t globalIndex = INVALID_INDEX;
350 class DefinedGlobal : public GlobalSymbol {
351 public:
352 DefinedGlobal(StringRef name, uint32_t flags, InputFile *file,
353 InputGlobal *global);
355 static bool classof(const Symbol *s) {
356 return s->kind() == DefinedGlobalKind;
359 InputGlobal *global;
362 class UndefinedGlobal : public GlobalSymbol {
363 public:
364 UndefinedGlobal(StringRef name, llvm::Optional<StringRef> importName,
365 llvm::Optional<StringRef> importModule, uint32_t flags,
366 InputFile *file = nullptr,
367 const WasmGlobalType *type = nullptr)
368 : GlobalSymbol(name, UndefinedGlobalKind, flags, file, type) {
369 this->importName = importName;
370 this->importModule = importModule;
373 static bool classof(const Symbol *s) {
374 return s->kind() == UndefinedGlobalKind;
378 class TableSymbol : public Symbol {
379 public:
380 static bool classof(const Symbol *s) {
381 return s->kind() == DefinedTableKind || s->kind() == UndefinedTableKind;
384 const WasmTableType *getTableType() const { return tableType; }
385 void setLimits(const WasmLimits &limits);
387 // Get/set the table number
388 uint32_t getTableNumber() const;
389 void setTableNumber(uint32_t number);
390 bool hasTableNumber() const;
392 protected:
393 TableSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f,
394 const WasmTableType *type)
395 : Symbol(name, k, flags, f), tableType(type) {}
397 const WasmTableType *tableType;
398 uint32_t tableNumber = INVALID_INDEX;
401 class DefinedTable : public TableSymbol {
402 public:
403 DefinedTable(StringRef name, uint32_t flags, InputFile *file,
404 InputTable *table);
406 static bool classof(const Symbol *s) { return s->kind() == DefinedTableKind; }
408 InputTable *table;
411 class UndefinedTable : public TableSymbol {
412 public:
413 UndefinedTable(StringRef name, llvm::Optional<StringRef> importName,
414 llvm::Optional<StringRef> importModule, uint32_t flags,
415 InputFile *file, const WasmTableType *type)
416 : TableSymbol(name, UndefinedTableKind, flags, file, type) {
417 this->importName = importName;
418 this->importModule = importModule;
421 static bool classof(const Symbol *s) {
422 return s->kind() == UndefinedTableKind;
426 // A tag is a general format to distinguish typed entities. Each tag has an
427 // attribute and a type. Currently the attribute can only specify that the tag
428 // is for an exception tag.
430 // In exception handling, tags are used to distinguish different kinds of
431 // exceptions. For example, they can be used to distinguish different language's
432 // exceptions, e.g., all C++ exceptions have the same tag and Java exceptions
433 // would have a distinct tag. Wasm can filter the exceptions it catches based on
434 // their tag.
436 // A single TagSymbol object represents a single tag. The C++ exception symbol
437 // is a weak symbol generated in every object file in which exceptions are used,
438 // and is named '__cpp_exception' for linking.
439 class TagSymbol : public Symbol {
440 public:
441 static bool classof(const Symbol *s) {
442 return s->kind() == DefinedTagKind || s->kind() == UndefinedTagKind;
445 // Get/set the tag index
446 uint32_t getTagIndex() const;
447 void setTagIndex(uint32_t index);
448 bool hasTagIndex() const;
450 const WasmSignature *signature;
452 protected:
453 TagSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f,
454 const WasmSignature *sig)
455 : Symbol(name, k, flags, f), signature(sig) {}
457 uint32_t tagIndex = INVALID_INDEX;
460 class DefinedTag : public TagSymbol {
461 public:
462 DefinedTag(StringRef name, uint32_t flags, InputFile *file, InputTag *tag);
464 static bool classof(const Symbol *s) { return s->kind() == DefinedTagKind; }
466 InputTag *tag;
469 class UndefinedTag : public TagSymbol {
470 public:
471 UndefinedTag(StringRef name, llvm::Optional<StringRef> importName,
472 llvm::Optional<StringRef> importModule, uint32_t flags,
473 InputFile *file = nullptr, const WasmSignature *sig = nullptr)
474 : TagSymbol(name, UndefinedTagKind, flags, file, sig) {
475 this->importName = importName;
476 this->importModule = importModule;
479 static bool classof(const Symbol *s) { return s->kind() == UndefinedTagKind; }
482 // LazySymbol represents a symbol that is not yet in the link, but we know where
483 // to find it if needed. If the resolver finds both Undefined and Lazy for the
484 // same name, it will ask the Lazy to load a file.
486 // A special complication is the handling of weak undefined symbols. They should
487 // not load a file, but we have to remember we have seen both the weak undefined
488 // and the lazy. We represent that with a lazy symbol with a weak binding. This
489 // means that code looking for undefined symbols normally also has to take lazy
490 // symbols into consideration.
491 class LazySymbol : public Symbol {
492 public:
493 LazySymbol(StringRef name, uint32_t flags, InputFile *file,
494 const llvm::object::Archive::Symbol &sym)
495 : Symbol(name, LazyKind, flags, file), archiveSymbol(sym) {}
497 static bool classof(const Symbol *s) { return s->kind() == LazyKind; }
498 void fetch();
499 void setWeak();
500 MemoryBufferRef getMemberBuffer();
502 // Lazy symbols can have a signature because they can replace an
503 // UndefinedFunction which which case we need to be able to preserve the
504 // signature.
505 // TODO(sbc): This repetition of the signature field is inelegant. Revisit
506 // the use of class hierarchy to represent symbol taxonomy.
507 const WasmSignature *signature = nullptr;
509 private:
510 llvm::object::Archive::Symbol archiveSymbol;
513 // linker-generated symbols
514 struct WasmSym {
515 // __global_base
516 // Symbol marking the start of the global section.
517 static DefinedData *globalBase;
519 // __stack_pointer
520 // Global that holds the address of the top of the explicit value stack in
521 // linear memory.
522 static GlobalSymbol *stackPointer;
524 // __tls_base
525 // Global that holds the address of the base of the current thread's
526 // TLS block.
527 static GlobalSymbol *tlsBase;
529 // __tls_size
530 // Symbol whose value is the size of the TLS block.
531 static GlobalSymbol *tlsSize;
533 // __tls_size
534 // Symbol whose value is the alignment of the TLS block.
535 static GlobalSymbol *tlsAlign;
537 // __data_end
538 // Symbol marking the end of the data and bss.
539 static DefinedData *dataEnd;
541 // __heap_base
542 // Symbol marking the end of the data, bss and explicit stack. Any linear
543 // memory following this address is not used by the linked code and can
544 // therefore be used as a backing store for brk()/malloc() implementations.
545 static DefinedData *heapBase;
547 // __wasm_init_memory_flag
548 // Symbol whose contents are nonzero iff memory has already been initialized.
549 static DefinedData *initMemoryFlag;
551 // __wasm_init_memory
552 // Function that initializes passive data segments during instantiation.
553 static DefinedFunction *initMemory;
555 // __wasm_call_ctors
556 // Function that directly calls all ctors in priority order.
557 static DefinedFunction *callCtors;
559 // __wasm_call_dtors
560 // Function that calls the libc/etc. cleanup function.
561 static DefinedFunction *callDtors;
563 // __wasm_apply_data_relocs
564 // Function that applies relocations to data segment post-instantiation.
565 static DefinedFunction *applyDataRelocs;
567 // __wasm_apply_global_relocs
568 // Function that applies relocations to wasm globals post-instantiation.
569 // Unlike __wasm_apply_data_relocs this needs to run on every thread.
570 static DefinedFunction *applyGlobalRelocs;
572 // __wasm_apply_global_tls_relocs
573 // Like applyGlobalRelocs but for globals that hold TLS addresses. These
574 // must be delayed until __wasm_init_tls.
575 static DefinedFunction *applyGlobalTLSRelocs;
577 // __wasm_init_tls
578 // Function that allocates thread-local storage and initializes it.
579 static DefinedFunction *initTLS;
581 // Pointer to the function that is to be used in the start section.
582 // (normally an alias of initMemory, or applyGlobalRelocs).
583 static DefinedFunction *startFunction;
585 // __dso_handle
586 // Symbol used in calls to __cxa_atexit to determine current DLL
587 static DefinedData *dsoHandle;
589 // __table_base
590 // Used in PIC code for offset of indirect function table
591 static UndefinedGlobal *tableBase;
592 static DefinedData *definedTableBase;
593 // 32-bit copy in wasm64 to work around init expr limitations.
594 // These can potentially be removed again once we have
595 // https://github.com/WebAssembly/extended-const
596 static UndefinedGlobal *tableBase32;
597 static DefinedData *definedTableBase32;
599 // __memory_base
600 // Used in PIC code for offset of global data
601 static UndefinedGlobal *memoryBase;
602 static DefinedData *definedMemoryBase;
604 // __indirect_function_table
605 // Used as an address space for function pointers, with each function that is
606 // used as a function pointer being allocated a slot.
607 static TableSymbol *indirectFunctionTable;
610 // A buffer class that is large enough to hold any Symbol-derived
611 // object. We allocate memory using this class and instantiate a symbol
612 // using the placement new.
613 union SymbolUnion {
614 alignas(DefinedFunction) char a[sizeof(DefinedFunction)];
615 alignas(DefinedData) char b[sizeof(DefinedData)];
616 alignas(DefinedGlobal) char c[sizeof(DefinedGlobal)];
617 alignas(DefinedTag) char d[sizeof(DefinedTag)];
618 alignas(DefinedTable) char e[sizeof(DefinedTable)];
619 alignas(LazySymbol) char f[sizeof(LazySymbol)];
620 alignas(UndefinedFunction) char g[sizeof(UndefinedFunction)];
621 alignas(UndefinedData) char h[sizeof(UndefinedData)];
622 alignas(UndefinedGlobal) char i[sizeof(UndefinedGlobal)];
623 alignas(UndefinedTable) char j[sizeof(UndefinedTable)];
624 alignas(SectionSymbol) char k[sizeof(SectionSymbol)];
627 // It is important to keep the size of SymbolUnion small for performance and
628 // memory usage reasons. 96 bytes is a soft limit based on the size of
629 // UndefinedFunction on a 64-bit system.
630 static_assert(sizeof(SymbolUnion) <= 120, "SymbolUnion too large");
632 void printTraceSymbol(Symbol *sym);
633 void printTraceSymbolUndefined(StringRef name, const InputFile* file);
635 template <typename T, typename... ArgT>
636 T *replaceSymbol(Symbol *s, ArgT &&... arg) {
637 static_assert(std::is_trivially_destructible<T>(),
638 "Symbol types must be trivially destructible");
639 static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small");
640 static_assert(alignof(T) <= alignof(SymbolUnion),
641 "SymbolUnion not aligned enough");
642 assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr &&
643 "Not a Symbol");
645 Symbol symCopy = *s;
647 T *s2 = new (s) T(std::forward<ArgT>(arg)...);
648 s2->isUsedInRegularObj = symCopy.isUsedInRegularObj;
649 s2->forceExport = symCopy.forceExport;
650 s2->canInline = symCopy.canInline;
651 s2->traced = symCopy.traced;
653 // Print out a log message if --trace-symbol was specified.
654 // This is for debugging.
655 if (s2->traced)
656 printTraceSymbol(s2);
658 return s2;
661 } // namespace wasm
663 // Returns a symbol name for an error message.
664 std::string toString(const wasm::Symbol &sym);
665 std::string toString(wasm::Symbol::Kind kind);
666 std::string maybeDemangleSymbol(StringRef name);
668 } // namespace lld
670 #endif