Revert "[libc] Use best-fit binary trie to make malloc logarithmic" (#117065)
[llvm-project.git] / lld / wasm / SymbolTable.cpp
blob4cbf44b4d0398a4a8bbf2cb7d321dd8a493a7355
1 //===- SymbolTable.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "SymbolTable.h"
10 #include "Config.h"
11 #include "InputChunks.h"
12 #include "InputElement.h"
13 #include "WriterUtils.h"
14 #include "lld/Common/CommonLinkerContext.h"
15 #include <optional>
17 #define DEBUG_TYPE "lld"
19 using namespace llvm;
20 using namespace llvm::wasm;
21 using namespace llvm::object;
23 namespace lld::wasm {
24 SymbolTable *symtab;
26 void SymbolTable::addFile(InputFile *file, StringRef symName) {
27 log("Processing: " + toString(file));
29 // Lazy object file
30 if (file->lazy) {
31 if (auto *f = dyn_cast<BitcodeFile>(file)) {
32 ctx.lazyBitcodeFiles.push_back(f);
33 f->parseLazy();
34 } else {
35 cast<ObjFile>(file)->parseLazy();
37 return;
40 // .so file
41 if (auto *f = dyn_cast<SharedFile>(file)) {
42 // If we are not reporting undefined symbols that we don't actualy
43 // parse the shared library symbol table.
44 f->parse();
45 ctx.sharedFiles.push_back(f);
46 return;
49 // stub file
50 if (auto *f = dyn_cast<StubFile>(file)) {
51 f->parse();
52 ctx.stubFiles.push_back(f);
53 return;
56 if (config->trace)
57 message(toString(file));
59 // LLVM bitcode file
60 if (auto *f = dyn_cast<BitcodeFile>(file)) {
61 // This order, first adding to `bitcodeFiles` and then parsing is necessary.
62 // See https://github.com/llvm/llvm-project/pull/73095
63 ctx.bitcodeFiles.push_back(f);
64 f->parse(symName);
65 return;
68 // Regular object file
69 auto *f = cast<ObjFile>(file);
70 f->parse(false);
71 ctx.objectFiles.push_back(f);
74 // This function is where all the optimizations of link-time
75 // optimization happens. When LTO is in use, some input files are
76 // not in native object file format but in the LLVM bitcode format.
77 // This function compiles bitcode files into a few big native files
78 // using LLVM functions and replaces bitcode symbols with the results.
79 // Because all bitcode files that the program consists of are passed
80 // to the compiler at once, it can do whole-program optimization.
81 void SymbolTable::compileBitcodeFiles() {
82 // Prevent further LTO objects being included
83 BitcodeFile::doneLTO = true;
85 // Compile bitcode files and replace bitcode symbols.
86 lto.reset(new BitcodeCompiler);
87 for (BitcodeFile *f : ctx.bitcodeFiles)
88 lto->add(*f);
90 for (StringRef filename : lto->compile()) {
91 auto *obj = make<ObjFile>(MemoryBufferRef(filename, "lto.tmp"), "");
92 obj->parse(true);
93 ctx.objectFiles.push_back(obj);
97 Symbol *SymbolTable::find(StringRef name) {
98 auto it = symMap.find(CachedHashStringRef(name));
99 if (it == symMap.end() || it->second == -1)
100 return nullptr;
101 return symVector[it->second];
104 void SymbolTable::replace(StringRef name, Symbol* sym) {
105 auto it = symMap.find(CachedHashStringRef(name));
106 symVector[it->second] = sym;
109 std::pair<Symbol *, bool> SymbolTable::insertName(StringRef name) {
110 bool trace = false;
111 auto p = symMap.insert({CachedHashStringRef(name), (int)symVector.size()});
112 int &symIndex = p.first->second;
113 bool isNew = p.second;
114 if (symIndex == -1) {
115 symIndex = symVector.size();
116 trace = true;
117 isNew = true;
120 if (!isNew)
121 return {symVector[symIndex], false};
123 Symbol *sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
124 sym->isUsedInRegularObj = false;
125 sym->canInline = true;
126 sym->traced = trace;
127 sym->forceExport = false;
128 sym->referenced = !config->gcSections;
129 symVector.emplace_back(sym);
130 return {sym, true};
133 std::pair<Symbol *, bool> SymbolTable::insert(StringRef name,
134 const InputFile *file) {
135 Symbol *s;
136 bool wasInserted;
137 std::tie(s, wasInserted) = insertName(name);
139 if (!file || file->kind() == InputFile::ObjectKind)
140 s->isUsedInRegularObj = true;
142 return {s, wasInserted};
145 static void reportTypeError(const Symbol *existing, const InputFile *file,
146 llvm::wasm::WasmSymbolType type) {
147 error("symbol type mismatch: " + toString(*existing) + "\n>>> defined as " +
148 toString(existing->getWasmType()) + " in " +
149 toString(existing->getFile()) + "\n>>> defined as " + toString(type) +
150 " in " + toString(file));
153 // Check the type of new symbol matches that of the symbol is replacing.
154 // Returns true if the function types match, false is there is a signature
155 // mismatch.
156 static bool signatureMatches(FunctionSymbol *existing,
157 const WasmSignature *newSig) {
158 const WasmSignature *oldSig = existing->signature;
160 // If either function is missing a signature (this happens for bitcode
161 // symbols) then assume they match. Any mismatch will be reported later
162 // when the LTO objects are added.
163 if (!newSig || !oldSig)
164 return true;
166 return *newSig == *oldSig;
169 static void checkGlobalType(const Symbol *existing, const InputFile *file,
170 const WasmGlobalType *newType) {
171 if (!isa<GlobalSymbol>(existing)) {
172 reportTypeError(existing, file, WASM_SYMBOL_TYPE_GLOBAL);
173 return;
176 const WasmGlobalType *oldType = cast<GlobalSymbol>(existing)->getGlobalType();
177 if (*newType != *oldType) {
178 error("Global type mismatch: " + existing->getName() + "\n>>> defined as " +
179 toString(*oldType) + " in " + toString(existing->getFile()) +
180 "\n>>> defined as " + toString(*newType) + " in " + toString(file));
184 static void checkTagType(const Symbol *existing, const InputFile *file,
185 const WasmSignature *newSig) {
186 const auto *existingTag = dyn_cast<TagSymbol>(existing);
187 if (!isa<TagSymbol>(existing)) {
188 reportTypeError(existing, file, WASM_SYMBOL_TYPE_TAG);
189 return;
192 const WasmSignature *oldSig = existingTag->signature;
193 if (*newSig != *oldSig)
194 warn("Tag signature mismatch: " + existing->getName() +
195 "\n>>> defined as " + toString(*oldSig) + " in " +
196 toString(existing->getFile()) + "\n>>> defined as " +
197 toString(*newSig) + " in " + toString(file));
200 static void checkTableType(const Symbol *existing, const InputFile *file,
201 const WasmTableType *newType) {
202 if (!isa<TableSymbol>(existing)) {
203 reportTypeError(existing, file, WASM_SYMBOL_TYPE_TABLE);
204 return;
207 const WasmTableType *oldType = cast<TableSymbol>(existing)->getTableType();
208 if (newType->ElemType != oldType->ElemType) {
209 error("Table type mismatch: " + existing->getName() + "\n>>> defined as " +
210 toString(*oldType) + " in " + toString(existing->getFile()) +
211 "\n>>> defined as " + toString(*newType) + " in " + toString(file));
213 // FIXME: No assertions currently on the limits.
216 static void checkDataType(const Symbol *existing, const InputFile *file) {
217 if (!isa<DataSymbol>(existing))
218 reportTypeError(existing, file, WASM_SYMBOL_TYPE_DATA);
221 DefinedFunction *SymbolTable::addSyntheticFunction(StringRef name,
222 uint32_t flags,
223 InputFunction *function) {
224 LLVM_DEBUG(dbgs() << "addSyntheticFunction: " << name << "\n");
225 assert(!find(name));
226 ctx.syntheticFunctions.emplace_back(function);
227 return replaceSymbol<DefinedFunction>(insertName(name).first, name,
228 flags, nullptr, function);
231 // Adds an optional, linker generated, data symbol. The symbol will only be
232 // added if there is an undefine reference to it, or if it is explicitly
233 // exported via the --export flag. Otherwise we don't add the symbol and return
234 // nullptr.
235 DefinedData *SymbolTable::addOptionalDataSymbol(StringRef name,
236 uint64_t value) {
237 Symbol *s = find(name);
238 if (!s && (config->exportAll || config->exportedSymbols.count(name) != 0))
239 s = insertName(name).first;
240 else if (!s || s->isDefined())
241 return nullptr;
242 LLVM_DEBUG(dbgs() << "addOptionalDataSymbol: " << name << "\n");
243 auto *rtn = replaceSymbol<DefinedData>(
244 s, name, WASM_SYMBOL_VISIBILITY_HIDDEN | WASM_SYMBOL_ABSOLUTE);
245 rtn->setVA(value);
246 rtn->referenced = true;
247 return rtn;
250 DefinedData *SymbolTable::addSyntheticDataSymbol(StringRef name,
251 uint32_t flags) {
252 LLVM_DEBUG(dbgs() << "addSyntheticDataSymbol: " << name << "\n");
253 assert(!find(name));
254 return replaceSymbol<DefinedData>(insertName(name).first, name,
255 flags | WASM_SYMBOL_ABSOLUTE);
258 DefinedGlobal *SymbolTable::addSyntheticGlobal(StringRef name, uint32_t flags,
259 InputGlobal *global) {
260 LLVM_DEBUG(dbgs() << "addSyntheticGlobal: " << name << " -> " << global
261 << "\n");
262 assert(!find(name));
263 ctx.syntheticGlobals.emplace_back(global);
264 return replaceSymbol<DefinedGlobal>(insertName(name).first, name, flags,
265 nullptr, global);
268 DefinedGlobal *SymbolTable::addOptionalGlobalSymbol(StringRef name,
269 InputGlobal *global) {
270 Symbol *s = find(name);
271 if (!s || s->isDefined())
272 return nullptr;
273 LLVM_DEBUG(dbgs() << "addOptionalGlobalSymbol: " << name << " -> " << global
274 << "\n");
275 ctx.syntheticGlobals.emplace_back(global);
276 return replaceSymbol<DefinedGlobal>(s, name, WASM_SYMBOL_VISIBILITY_HIDDEN,
277 nullptr, global);
280 DefinedTable *SymbolTable::addSyntheticTable(StringRef name, uint32_t flags,
281 InputTable *table) {
282 LLVM_DEBUG(dbgs() << "addSyntheticTable: " << name << " -> " << table
283 << "\n");
284 Symbol *s = find(name);
285 assert(!s || s->isUndefined());
286 if (!s)
287 s = insertName(name).first;
288 ctx.syntheticTables.emplace_back(table);
289 return replaceSymbol<DefinedTable>(s, name, flags, nullptr, table);
292 static bool shouldReplace(const Symbol *existing, InputFile *newFile,
293 uint32_t newFlags) {
294 // If existing symbol is undefined, replace it.
295 if (!existing->isDefined()) {
296 LLVM_DEBUG(dbgs() << "resolving existing undefined symbol: "
297 << existing->getName() << "\n");
298 return true;
301 // Now we have two defined symbols. If the new one is weak, we can ignore it.
302 if ((newFlags & WASM_SYMBOL_BINDING_MASK) == WASM_SYMBOL_BINDING_WEAK) {
303 LLVM_DEBUG(dbgs() << "existing symbol takes precedence\n");
304 return false;
307 // If the existing symbol is weak, we should replace it.
308 if (existing->isWeak()) {
309 LLVM_DEBUG(dbgs() << "replacing existing weak symbol\n");
310 return true;
313 // Similarly with shared symbols
314 if (existing->isShared()) {
315 LLVM_DEBUG(dbgs() << "replacing existing shared symbol\n");
316 return true;
319 // Neither symbol is week. They conflict.
320 if (config->allowMultipleDefinition)
321 return false;
323 errorOrWarn("duplicate symbol: " + toString(*existing) + "\n>>> defined in " +
324 toString(existing->getFile()) + "\n>>> defined in " +
325 toString(newFile));
326 return true;
329 static void reportFunctionSignatureMismatch(StringRef symName,
330 FunctionSymbol *sym,
331 const WasmSignature *signature,
332 InputFile *file,
333 bool isError = true) {
334 std::string msg =
335 ("function signature mismatch: " + symName + "\n>>> defined as " +
336 toString(*sym->signature) + " in " + toString(sym->getFile()) +
337 "\n>>> defined as " + toString(*signature) + " in " + toString(file))
338 .str();
339 if (isError)
340 error(msg);
341 else
342 warn(msg);
345 static void reportFunctionSignatureMismatch(StringRef symName,
346 FunctionSymbol *a,
347 FunctionSymbol *b,
348 bool isError = true) {
349 reportFunctionSignatureMismatch(symName, a, b->signature, b->getFile(),
350 isError);
353 Symbol *SymbolTable::addSharedFunction(StringRef name, uint32_t flags,
354 InputFile *file,
355 const WasmSignature *sig) {
356 LLVM_DEBUG(dbgs() << "addSharedFunction: " << name << " [" << toString(*sig)
357 << "]\n");
358 Symbol *s;
359 bool wasInserted;
360 std::tie(s, wasInserted) = insert(name, file);
362 auto replaceSym = [&](Symbol *sym) {
363 replaceSymbol<SharedFunctionSymbol>(sym, name, flags, file, sig);
366 if (wasInserted) {
367 replaceSym(s);
368 return s;
371 auto existingFunction = dyn_cast<FunctionSymbol>(s);
372 if (!existingFunction) {
373 reportTypeError(s, file, WASM_SYMBOL_TYPE_FUNCTION);
374 return s;
377 // Shared symbols should never replace locally-defined ones
378 if (s->isDefined()) {
379 return s;
382 LLVM_DEBUG(dbgs() << "resolving existing undefined symbol: " << s->getName()
383 << "\n");
385 bool checkSig = true;
386 if (auto ud = dyn_cast<UndefinedFunction>(existingFunction))
387 checkSig = ud->isCalledDirectly;
389 if (checkSig && !signatureMatches(existingFunction, sig)) {
390 if (config->shlibSigCheck) {
391 reportFunctionSignatureMismatch(name, existingFunction, sig, file);
392 } else {
393 // With --no-shlib-sigcheck we ignore the signature of the function as
394 // defined by the shared library and instead use the signature as
395 // expected by the program being linked.
396 sig = existingFunction->signature;
400 replaceSym(s);
401 return s;
404 Symbol *SymbolTable::addSharedData(StringRef name, uint32_t flags,
405 InputFile *file) {
406 LLVM_DEBUG(dbgs() << "addSharedData: " << name << "\n");
407 Symbol *s;
408 bool wasInserted;
409 std::tie(s, wasInserted) = insert(name, file);
411 if (wasInserted || s->isUndefined()) {
412 replaceSymbol<SharedData>(s, name, flags, file);
415 return s;
418 Symbol *SymbolTable::addDefinedFunction(StringRef name, uint32_t flags,
419 InputFile *file,
420 InputFunction *function) {
421 LLVM_DEBUG(dbgs() << "addDefinedFunction: " << name << " ["
422 << (function ? toString(function->signature) : "none")
423 << "]\n");
424 Symbol *s;
425 bool wasInserted;
426 std::tie(s, wasInserted) = insert(name, file);
428 auto replaceSym = [&](Symbol *sym) {
429 // If the new defined function doesn't have signature (i.e. bitcode
430 // functions) but the old symbol does, then preserve the old signature
431 const WasmSignature *oldSig = s->getSignature();
432 auto* newSym = replaceSymbol<DefinedFunction>(sym, name, flags, file, function);
433 if (!newSym->signature)
434 newSym->signature = oldSig;
437 if (wasInserted || s->isLazy()) {
438 replaceSym(s);
439 return s;
442 auto existingFunction = dyn_cast<FunctionSymbol>(s);
443 if (!existingFunction) {
444 reportTypeError(s, file, WASM_SYMBOL_TYPE_FUNCTION);
445 return s;
448 bool checkSig = true;
449 if (auto ud = dyn_cast<UndefinedFunction>(existingFunction))
450 checkSig = ud->isCalledDirectly;
452 if (checkSig && function && !signatureMatches(existingFunction, &function->signature)) {
453 Symbol* variant;
454 if (getFunctionVariant(s, &function->signature, file, &variant))
455 // New variant, always replace
456 replaceSym(variant);
457 else if (shouldReplace(s, file, flags))
458 // Variant already exists, replace it after checking shouldReplace
459 replaceSym(variant);
461 // This variant we found take the place in the symbol table as the primary
462 // variant.
463 replace(name, variant);
464 return variant;
467 // Existing function with matching signature.
468 if (shouldReplace(s, file, flags))
469 replaceSym(s);
471 return s;
474 Symbol *SymbolTable::addDefinedData(StringRef name, uint32_t flags,
475 InputFile *file, InputChunk *segment,
476 uint64_t address, uint64_t size) {
477 LLVM_DEBUG(dbgs() << "addDefinedData:" << name << " addr:" << address
478 << "\n");
479 Symbol *s;
480 bool wasInserted;
481 std::tie(s, wasInserted) = insert(name, file);
483 auto replaceSym = [&]() {
484 replaceSymbol<DefinedData>(s, name, flags, file, segment, address, size);
487 if (wasInserted || s->isLazy()) {
488 replaceSym();
489 return s;
492 checkDataType(s, file);
494 if (shouldReplace(s, file, flags))
495 replaceSym();
496 return s;
499 Symbol *SymbolTable::addDefinedGlobal(StringRef name, uint32_t flags,
500 InputFile *file, InputGlobal *global) {
501 LLVM_DEBUG(dbgs() << "addDefinedGlobal:" << name << "\n");
503 Symbol *s;
504 bool wasInserted;
505 std::tie(s, wasInserted) = insert(name, file);
507 auto replaceSym = [&]() {
508 replaceSymbol<DefinedGlobal>(s, name, flags, file, global);
511 if (wasInserted || s->isLazy()) {
512 replaceSym();
513 return s;
516 checkGlobalType(s, file, &global->getType());
518 if (shouldReplace(s, file, flags))
519 replaceSym();
520 return s;
523 Symbol *SymbolTable::addDefinedTag(StringRef name, uint32_t flags,
524 InputFile *file, InputTag *tag) {
525 LLVM_DEBUG(dbgs() << "addDefinedTag:" << name << "\n");
527 Symbol *s;
528 bool wasInserted;
529 std::tie(s, wasInserted) = insert(name, file);
531 auto replaceSym = [&]() {
532 replaceSymbol<DefinedTag>(s, name, flags, file, tag);
535 if (wasInserted || s->isLazy()) {
536 replaceSym();
537 return s;
540 checkTagType(s, file, &tag->signature);
542 if (shouldReplace(s, file, flags))
543 replaceSym();
544 return s;
547 Symbol *SymbolTable::addDefinedTable(StringRef name, uint32_t flags,
548 InputFile *file, InputTable *table) {
549 LLVM_DEBUG(dbgs() << "addDefinedTable:" << name << "\n");
551 Symbol *s;
552 bool wasInserted;
553 std::tie(s, wasInserted) = insert(name, file);
555 auto replaceSym = [&]() {
556 replaceSymbol<DefinedTable>(s, name, flags, file, table);
559 if (wasInserted || s->isLazy()) {
560 replaceSym();
561 return s;
564 checkTableType(s, file, &table->getType());
566 if (shouldReplace(s, file, flags))
567 replaceSym();
568 return s;
571 // This function get called when an undefined symbol is added, and there is
572 // already an existing one in the symbols table. In this case we check that
573 // custom 'import-module' and 'import-field' symbol attributes agree.
574 // With LTO these attributes are not available when the bitcode is read and only
575 // become available when the LTO object is read. In this case we silently
576 // replace the empty attributes with the valid ones.
577 template <typename T>
578 static void setImportAttributes(T *existing,
579 std::optional<StringRef> importName,
580 std::optional<StringRef> importModule,
581 uint32_t flags, InputFile *file) {
582 if (importName) {
583 if (!existing->importName)
584 existing->importName = importName;
585 if (existing->importName != importName)
586 error("import name mismatch for symbol: " + toString(*existing) +
587 "\n>>> defined as " + *existing->importName + " in " +
588 toString(existing->getFile()) + "\n>>> defined as " + *importName +
589 " in " + toString(file));
592 if (importModule) {
593 if (!existing->importModule)
594 existing->importModule = importModule;
595 if (existing->importModule != importModule)
596 error("import module mismatch for symbol: " + toString(*existing) +
597 "\n>>> defined as " + *existing->importModule + " in " +
598 toString(existing->getFile()) + "\n>>> defined as " +
599 *importModule + " in " + toString(file));
602 // Update symbol binding, if the existing symbol is weak
603 uint32_t binding = flags & WASM_SYMBOL_BINDING_MASK;
604 if (existing->isWeak() && binding != WASM_SYMBOL_BINDING_WEAK) {
605 existing->flags = (existing->flags & ~WASM_SYMBOL_BINDING_MASK) | binding;
609 Symbol *SymbolTable::addUndefinedFunction(StringRef name,
610 std::optional<StringRef> importName,
611 std::optional<StringRef> importModule,
612 uint32_t flags, InputFile *file,
613 const WasmSignature *sig,
614 bool isCalledDirectly) {
615 LLVM_DEBUG(dbgs() << "addUndefinedFunction: " << name << " ["
616 << (sig ? toString(*sig) : "none")
617 << "] IsCalledDirectly:" << isCalledDirectly << " flags=0x"
618 << utohexstr(flags) << "\n");
619 assert(flags & WASM_SYMBOL_UNDEFINED);
621 Symbol *s;
622 bool wasInserted;
623 std::tie(s, wasInserted) = insert(name, file);
624 if (s->traced)
625 printTraceSymbolUndefined(name, file);
627 auto replaceSym = [&]() {
628 replaceSymbol<UndefinedFunction>(s, name, importName, importModule, flags,
629 file, sig, isCalledDirectly);
632 if (wasInserted) {
633 replaceSym();
634 } else if (auto *lazy = dyn_cast<LazySymbol>(s)) {
635 if ((flags & WASM_SYMBOL_BINDING_MASK) == WASM_SYMBOL_BINDING_WEAK) {
636 lazy->setWeak();
637 lazy->signature = sig;
638 } else {
639 lazy->extract();
640 if (!config->whyExtract.empty())
641 ctx.whyExtractRecords.emplace_back(toString(file), s->getFile(), *s);
643 } else {
644 auto existingFunction = dyn_cast<FunctionSymbol>(s);
645 if (!existingFunction) {
646 reportTypeError(s, file, WASM_SYMBOL_TYPE_FUNCTION);
647 return s;
649 if (!existingFunction->signature && sig)
650 existingFunction->signature = sig;
651 auto *existingUndefined = dyn_cast<UndefinedFunction>(existingFunction);
652 if (isCalledDirectly && !signatureMatches(existingFunction, sig)) {
653 if (existingFunction->isShared()) {
654 // Special handling for when the existing function is a shared symbol
655 if (config->shlibSigCheck) {
656 reportFunctionSignatureMismatch(name, existingFunction, sig, file);
657 } else {
658 existingFunction->signature = sig;
661 // If the existing undefined functions is not called directly then let
662 // this one take precedence. Otherwise the existing function is either
663 // directly called or defined, in which case we need a function variant.
664 else if (existingUndefined && !existingUndefined->isCalledDirectly)
665 replaceSym();
666 else if (getFunctionVariant(s, sig, file, &s))
667 replaceSym();
669 if (existingUndefined) {
670 setImportAttributes(existingUndefined, importName, importModule, flags,
671 file);
672 if (isCalledDirectly)
673 existingUndefined->isCalledDirectly = true;
674 if (s->isWeak())
675 s->flags = flags;
679 return s;
682 Symbol *SymbolTable::addUndefinedData(StringRef name, uint32_t flags,
683 InputFile *file) {
684 LLVM_DEBUG(dbgs() << "addUndefinedData: " << name << "\n");
685 assert(flags & WASM_SYMBOL_UNDEFINED);
687 Symbol *s;
688 bool wasInserted;
689 std::tie(s, wasInserted) = insert(name, file);
690 if (s->traced)
691 printTraceSymbolUndefined(name, file);
693 if (wasInserted) {
694 replaceSymbol<UndefinedData>(s, name, flags, file);
695 } else if (auto *lazy = dyn_cast<LazySymbol>(s)) {
696 if ((flags & WASM_SYMBOL_BINDING_MASK) == WASM_SYMBOL_BINDING_WEAK)
697 lazy->setWeak();
698 else
699 lazy->extract();
700 } else if (s->isDefined()) {
701 checkDataType(s, file);
702 } else if (s->isWeak()) {
703 s->flags = flags;
705 return s;
708 Symbol *SymbolTable::addUndefinedGlobal(StringRef name,
709 std::optional<StringRef> importName,
710 std::optional<StringRef> importModule,
711 uint32_t flags, InputFile *file,
712 const WasmGlobalType *type) {
713 LLVM_DEBUG(dbgs() << "addUndefinedGlobal: " << name << "\n");
714 assert(flags & WASM_SYMBOL_UNDEFINED);
716 Symbol *s;
717 bool wasInserted;
718 std::tie(s, wasInserted) = insert(name, file);
719 if (s->traced)
720 printTraceSymbolUndefined(name, file);
722 if (wasInserted)
723 replaceSymbol<UndefinedGlobal>(s, name, importName, importModule, flags,
724 file, type);
725 else if (auto *lazy = dyn_cast<LazySymbol>(s))
726 lazy->extract();
727 else if (s->isDefined())
728 checkGlobalType(s, file, type);
729 else if (s->isWeak())
730 s->flags = flags;
731 return s;
734 Symbol *SymbolTable::addUndefinedTable(StringRef name,
735 std::optional<StringRef> importName,
736 std::optional<StringRef> importModule,
737 uint32_t flags, InputFile *file,
738 const WasmTableType *type) {
739 LLVM_DEBUG(dbgs() << "addUndefinedTable: " << name << "\n");
740 assert(flags & WASM_SYMBOL_UNDEFINED);
742 Symbol *s;
743 bool wasInserted;
744 std::tie(s, wasInserted) = insert(name, file);
745 if (s->traced)
746 printTraceSymbolUndefined(name, file);
748 if (wasInserted)
749 replaceSymbol<UndefinedTable>(s, name, importName, importModule, flags,
750 file, type);
751 else if (auto *lazy = dyn_cast<LazySymbol>(s))
752 lazy->extract();
753 else if (s->isDefined())
754 checkTableType(s, file, type);
755 else if (s->isWeak())
756 s->flags = flags;
757 return s;
760 Symbol *SymbolTable::addUndefinedTag(StringRef name,
761 std::optional<StringRef> importName,
762 std::optional<StringRef> importModule,
763 uint32_t flags, InputFile *file,
764 const WasmSignature *sig) {
765 LLVM_DEBUG(dbgs() << "addUndefinedTag: " << name << "\n");
766 assert(flags & WASM_SYMBOL_UNDEFINED);
768 Symbol *s;
769 bool wasInserted;
770 std::tie(s, wasInserted) = insert(name, file);
771 if (s->traced)
772 printTraceSymbolUndefined(name, file);
774 if (wasInserted)
775 replaceSymbol<UndefinedTag>(s, name, importName, importModule, flags, file,
776 sig);
777 else if (auto *lazy = dyn_cast<LazySymbol>(s))
778 lazy->extract();
779 else if (s->isDefined())
780 checkTagType(s, file, sig);
781 else if (s->isWeak())
782 s->flags = flags;
783 return s;
786 TableSymbol *SymbolTable::createUndefinedIndirectFunctionTable(StringRef name) {
787 WasmLimits limits{0, 0, 0}; // Set by the writer.
788 WasmTableType *type = make<WasmTableType>();
789 type->ElemType = ValType::FUNCREF;
790 type->Limits = limits;
791 uint32_t flags = config->exportTable ? 0 : WASM_SYMBOL_VISIBILITY_HIDDEN;
792 flags |= WASM_SYMBOL_UNDEFINED;
793 Symbol *sym =
794 addUndefinedTable(name, name, defaultModule, flags, nullptr, type);
795 sym->markLive();
796 sym->forceExport = config->exportTable;
797 return cast<TableSymbol>(sym);
800 TableSymbol *SymbolTable::createDefinedIndirectFunctionTable(StringRef name) {
801 const uint32_t invalidIndex = -1;
802 WasmLimits limits{0, 0, 0}; // Set by the writer.
803 WasmTableType type{ValType::FUNCREF, limits};
804 WasmTable desc{invalidIndex, type, name};
805 InputTable *table = make<InputTable>(desc, nullptr);
806 uint32_t flags = config->exportTable ? 0 : WASM_SYMBOL_VISIBILITY_HIDDEN;
807 TableSymbol *sym = addSyntheticTable(name, flags, table);
808 sym->markLive();
809 sym->forceExport = config->exportTable;
810 return sym;
813 // Whether or not we need an indirect function table is usually a function of
814 // whether an input declares a need for it. However sometimes it's possible for
815 // no input to need the indirect function table, but then a late
816 // addInternalGOTEntry causes a function to be allocated an address. In that
817 // case address we synthesize a definition at the last minute.
818 TableSymbol *SymbolTable::resolveIndirectFunctionTable(bool required) {
819 Symbol *existing = find(functionTableName);
820 if (existing) {
821 if (!isa<TableSymbol>(existing)) {
822 error(Twine("reserved symbol must be of type table: `") +
823 functionTableName + "`");
824 return nullptr;
826 if (existing->isDefined()) {
827 error(Twine("reserved symbol must not be defined in input files: `") +
828 functionTableName + "`");
829 return nullptr;
833 if (config->importTable) {
834 if (existing) {
835 existing->importModule = defaultModule;
836 existing->importName = functionTableName;
837 return cast<TableSymbol>(existing);
839 if (required)
840 return createUndefinedIndirectFunctionTable(functionTableName);
841 } else if ((existing && existing->isLive()) || config->exportTable ||
842 required) {
843 // A defined table is required. Either because the user request an exported
844 // table or because the table symbol is already live. The existing table is
845 // guaranteed to be undefined due to the check above.
846 return createDefinedIndirectFunctionTable(functionTableName);
849 // An indirect function table will only be present in the symbol table if
850 // needed by a reloc; if we get here, we don't need one.
851 return nullptr;
854 void SymbolTable::addLazy(StringRef name, InputFile *file) {
855 LLVM_DEBUG(dbgs() << "addLazy: " << name << "\n");
857 Symbol *s;
858 bool wasInserted;
859 std::tie(s, wasInserted) = insertName(name);
861 if (wasInserted) {
862 replaceSymbol<LazySymbol>(s, name, 0, file);
863 return;
866 if (!s->isUndefined())
867 return;
869 // The existing symbol is undefined, load a new one from the archive,
870 // unless the existing symbol is weak in which case replace the undefined
871 // symbols with a LazySymbol.
872 if (s->isWeak()) {
873 const WasmSignature *oldSig = nullptr;
874 // In the case of an UndefinedFunction we need to preserve the expected
875 // signature.
876 if (auto *f = dyn_cast<UndefinedFunction>(s))
877 oldSig = f->signature;
878 LLVM_DEBUG(dbgs() << "replacing existing weak undefined symbol\n");
879 auto newSym =
880 replaceSymbol<LazySymbol>(s, name, WASM_SYMBOL_BINDING_WEAK, file);
881 newSym->signature = oldSig;
882 return;
885 LLVM_DEBUG(dbgs() << "replacing existing undefined\n");
886 const InputFile *oldFile = s->getFile();
887 LazySymbol(name, 0, file).extract();
888 if (!config->whyExtract.empty())
889 ctx.whyExtractRecords.emplace_back(toString(oldFile), s->getFile(), *s);
892 bool SymbolTable::addComdat(StringRef name) {
893 return comdatGroups.insert(CachedHashStringRef(name)).second;
896 // The new signature doesn't match. Create a variant to the symbol with the
897 // signature encoded in the name and return that instead. These symbols are
898 // then unified later in handleSymbolVariants.
899 bool SymbolTable::getFunctionVariant(Symbol* sym, const WasmSignature *sig,
900 const InputFile *file, Symbol **out) {
901 LLVM_DEBUG(dbgs() << "getFunctionVariant: " << sym->getName() << " -> "
902 << " " << toString(*sig) << "\n");
903 Symbol *variant = nullptr;
905 // Linear search through symbol variants. Should never be more than two
906 // or three entries here.
907 auto &variants = symVariants[CachedHashStringRef(sym->getName())];
908 if (variants.empty())
909 variants.push_back(sym);
911 for (Symbol* v : variants) {
912 if (*v->getSignature() == *sig) {
913 variant = v;
914 break;
918 bool wasAdded = !variant;
919 if (wasAdded) {
920 // Create a new variant;
921 LLVM_DEBUG(dbgs() << "added new variant\n");
922 variant = reinterpret_cast<Symbol *>(make<SymbolUnion>());
923 variant->isUsedInRegularObj =
924 !file || file->kind() == InputFile::ObjectKind;
925 variant->canInline = true;
926 variant->traced = false;
927 variant->forceExport = false;
928 variants.push_back(variant);
929 } else {
930 LLVM_DEBUG(dbgs() << "variant already exists: " << toString(*variant) << "\n");
931 assert(*variant->getSignature() == *sig);
934 *out = variant;
935 return wasAdded;
938 // Set a flag for --trace-symbol so that we can print out a log message
939 // if a new symbol with the same name is inserted into the symbol table.
940 void SymbolTable::trace(StringRef name) {
941 symMap.insert({CachedHashStringRef(name), -1});
944 void SymbolTable::wrap(Symbol *sym, Symbol *real, Symbol *wrap) {
945 // Swap symbols as instructed by -wrap.
946 int &origIdx = symMap[CachedHashStringRef(sym->getName())];
947 int &realIdx= symMap[CachedHashStringRef(real->getName())];
948 int &wrapIdx = symMap[CachedHashStringRef(wrap->getName())];
949 LLVM_DEBUG(dbgs() << "wrap: " << sym->getName() << "\n");
951 // Anyone looking up __real symbols should get the original
952 realIdx = origIdx;
953 // Anyone looking up the original should get the __wrap symbol
954 origIdx = wrapIdx;
957 static const uint8_t unreachableFn[] = {
958 0x03 /* ULEB length */, 0x00 /* ULEB num locals */,
959 0x00 /* opcode unreachable */, 0x0b /* opcode end */
962 // Replace the given symbol body with an unreachable function.
963 // This is used by handleWeakUndefines in order to generate a callable
964 // equivalent of an undefined function and also handleSymbolVariants for
965 // undefined functions that don't match the signature of the definition.
966 InputFunction *SymbolTable::replaceWithUnreachable(Symbol *sym,
967 const WasmSignature &sig,
968 StringRef debugName) {
969 auto *func = make<SyntheticFunction>(sig, sym->getName(), debugName);
970 func->setBody(unreachableFn);
971 ctx.syntheticFunctions.emplace_back(func);
972 // Mark new symbols as local. For relocatable output we don't want them
973 // to be exported outside the object file.
974 replaceSymbol<DefinedFunction>(sym, debugName, WASM_SYMBOL_BINDING_LOCAL,
975 nullptr, func);
976 // Ensure the stub function doesn't get a table entry. Its address
977 // should always compare equal to the null pointer.
978 sym->isStub = true;
979 return func;
982 void SymbolTable::replaceWithUndefined(Symbol *sym) {
983 // Add a synthetic dummy for weak undefined functions. These dummies will
984 // be GC'd if not used as the target of any "call" instructions.
985 StringRef debugName = saver().save("undefined_weak:" + toString(*sym));
986 replaceWithUnreachable(sym, *sym->getSignature(), debugName);
987 // Hide our dummy to prevent export.
988 sym->setHidden(true);
991 // For weak undefined functions, there may be "call" instructions that reference
992 // the symbol. In this case, we need to synthesise a dummy/stub function that
993 // will abort at runtime, so that relocations can still provided an operand to
994 // the call instruction that passes Wasm validation.
995 void SymbolTable::handleWeakUndefines() {
996 for (Symbol *sym : symbols()) {
997 if (sym->isUndefWeak() && sym->isUsedInRegularObj) {
998 if (sym->getSignature()) {
999 replaceWithUndefined(sym);
1000 } else {
1001 // It is possible for undefined functions not to have a signature (eg.
1002 // if added via "--undefined"), but weak undefined ones do have a
1003 // signature. Lazy symbols may not be functions and therefore Sig can
1004 // still be null in some circumstance.
1005 assert(!isa<FunctionSymbol>(sym));
1011 DefinedFunction *SymbolTable::createUndefinedStub(const WasmSignature &sig) {
1012 if (stubFunctions.count(sig))
1013 return stubFunctions[sig];
1014 LLVM_DEBUG(dbgs() << "createUndefinedStub: " << toString(sig) << "\n");
1015 auto *sym = reinterpret_cast<DefinedFunction *>(make<SymbolUnion>());
1016 sym->isUsedInRegularObj = true;
1017 sym->canInline = true;
1018 sym->traced = false;
1019 sym->forceExport = false;
1020 sym->signature = &sig;
1021 replaceSymbol<DefinedFunction>(
1022 sym, "undefined_stub", WASM_SYMBOL_VISIBILITY_HIDDEN, nullptr, nullptr);
1023 replaceWithUnreachable(sym, sig, "undefined_stub");
1024 stubFunctions[sig] = sym;
1025 return sym;
1028 // Remove any variant symbols that were created due to function signature
1029 // mismatches.
1030 void SymbolTable::handleSymbolVariants() {
1031 for (auto pair : symVariants) {
1032 // Push the initial symbol onto the list of variants.
1033 StringRef symName = pair.first.val();
1034 std::vector<Symbol *> &variants = pair.second;
1036 #ifndef NDEBUG
1037 LLVM_DEBUG(dbgs() << "symbol with (" << variants.size()
1038 << ") variants: " << symName << "\n");
1039 for (auto *s: variants) {
1040 auto *f = cast<FunctionSymbol>(s);
1041 LLVM_DEBUG(dbgs() << " variant: " + f->getName() << " "
1042 << toString(*f->signature) << "\n");
1044 #endif
1046 // Find the one definition.
1047 DefinedFunction *defined = nullptr;
1048 for (auto *symbol : variants) {
1049 if (auto f = dyn_cast<DefinedFunction>(symbol)) {
1050 defined = f;
1051 break;
1055 // If there are no definitions, and the undefined symbols disagree on
1056 // the signature, there is not we can do since we don't know which one
1057 // to use as the signature on the import.
1058 if (!defined) {
1059 reportFunctionSignatureMismatch(symName,
1060 cast<FunctionSymbol>(variants[0]),
1061 cast<FunctionSymbol>(variants[1]));
1062 return;
1065 for (auto *symbol : variants) {
1066 if (symbol != defined) {
1067 auto *f = cast<FunctionSymbol>(symbol);
1068 reportFunctionSignatureMismatch(symName, f, defined, false);
1069 StringRef debugName =
1070 saver().save("signature_mismatch:" + toString(*f));
1071 replaceWithUnreachable(f, *f->signature, debugName);
1077 } // namespace wasm::lld