Revert "[libc] Use best-fit binary trie to make malloc logarithmic" (#117065)
[llvm-project.git] / lld / COFF / SymbolTable.cpp
blobdf3c5a176b52e0731a1de66df84aed357bef40ec
1 //===- SymbolTable.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "SymbolTable.h"
10 #include "COFFLinkerContext.h"
11 #include "Config.h"
12 #include "Driver.h"
13 #include "LTO.h"
14 #include "PDB.h"
15 #include "Symbols.h"
16 #include "lld/Common/ErrorHandler.h"
17 #include "lld/Common/Memory.h"
18 #include "lld/Common/Timer.h"
19 #include "llvm/DebugInfo/DIContext.h"
20 #include "llvm/IR/LLVMContext.h"
21 #include "llvm/IR/Mangler.h"
22 #include "llvm/LTO/LTO.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/Parallel.h"
25 #include "llvm/Support/TimeProfiler.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include <utility>
29 using namespace llvm;
31 namespace lld::coff {
33 StringRef ltrim1(StringRef s, const char *chars) {
34 if (!s.empty() && strchr(chars, s[0]))
35 return s.substr(1);
36 return s;
39 static bool compatibleMachineType(COFFLinkerContext &ctx, MachineTypes mt) {
40 if (mt == IMAGE_FILE_MACHINE_UNKNOWN)
41 return true;
42 switch (ctx.config.machine) {
43 case ARM64:
44 return mt == ARM64 || mt == ARM64X;
45 case ARM64EC:
46 return COFF::isArm64EC(mt) || mt == AMD64;
47 case ARM64X:
48 return COFF::isAnyArm64(mt) || mt == AMD64;
49 default:
50 return ctx.config.machine == mt;
54 void SymbolTable::addFile(InputFile *file) {
55 log("Reading " + toString(file));
56 if (file->lazy) {
57 if (auto *f = dyn_cast<BitcodeFile>(file))
58 f->parseLazy();
59 else
60 cast<ObjFile>(file)->parseLazy();
61 } else {
62 file->parse();
63 if (auto *f = dyn_cast<ObjFile>(file)) {
64 ctx.objFileInstances.push_back(f);
65 } else if (auto *f = dyn_cast<BitcodeFile>(file)) {
66 if (ltoCompilationDone) {
67 error("LTO object file " + toString(file) + " linked in after "
68 "doing LTO compilation.");
70 ctx.bitcodeFileInstances.push_back(f);
71 } else if (auto *f = dyn_cast<ImportFile>(file)) {
72 ctx.importFileInstances.push_back(f);
76 MachineTypes mt = file->getMachineType();
77 if (ctx.config.machine == IMAGE_FILE_MACHINE_UNKNOWN) {
78 ctx.config.machine = mt;
79 ctx.driver.addWinSysRootLibSearchPaths();
80 } else if (!compatibleMachineType(ctx, mt)) {
81 error(toString(file) + ": machine type " + machineToStr(mt) +
82 " conflicts with " + machineToStr(ctx.config.machine));
83 return;
86 ctx.driver.parseDirectives(file);
89 static void errorOrWarn(const Twine &s, bool forceUnresolved) {
90 if (forceUnresolved)
91 warn(s);
92 else
93 error(s);
96 // Causes the file associated with a lazy symbol to be linked in.
97 static void forceLazy(Symbol *s) {
98 s->pendingArchiveLoad = true;
99 switch (s->kind()) {
100 case Symbol::Kind::LazyArchiveKind: {
101 auto *l = cast<LazyArchive>(s);
102 l->file->addMember(l->sym);
103 break;
105 case Symbol::Kind::LazyObjectKind: {
106 InputFile *file = cast<LazyObject>(s)->file;
107 file->ctx.symtab.addFile(file);
108 break;
110 case Symbol::Kind::LazyDLLSymbolKind: {
111 auto *l = cast<LazyDLLSymbol>(s);
112 l->file->makeImport(l->sym);
113 break;
115 default:
116 llvm_unreachable(
117 "symbol passed to forceLazy is not a LazyArchive or LazyObject");
121 // Returns the symbol in SC whose value is <= Addr that is closest to Addr.
122 // This is generally the global variable or function whose definition contains
123 // Addr.
124 static Symbol *getSymbol(SectionChunk *sc, uint32_t addr) {
125 DefinedRegular *candidate = nullptr;
127 for (Symbol *s : sc->file->getSymbols()) {
128 auto *d = dyn_cast_or_null<DefinedRegular>(s);
129 if (!d || !d->data || d->file != sc->file || d->getChunk() != sc ||
130 d->getValue() > addr ||
131 (candidate && d->getValue() < candidate->getValue()))
132 continue;
134 candidate = d;
137 return candidate;
140 static std::vector<std::string> getSymbolLocations(BitcodeFile *file) {
141 std::string res("\n>>> referenced by ");
142 StringRef source = file->obj->getSourceFileName();
143 if (!source.empty())
144 res += source.str() + "\n>>> ";
145 res += toString(file);
146 return {res};
149 static std::optional<std::pair<StringRef, uint32_t>>
150 getFileLineDwarf(const SectionChunk *c, uint32_t addr) {
151 std::optional<DILineInfo> optionalLineInfo =
152 c->file->getDILineInfo(addr, c->getSectionNumber() - 1);
153 if (!optionalLineInfo)
154 return std::nullopt;
155 const DILineInfo &lineInfo = *optionalLineInfo;
156 if (lineInfo.FileName == DILineInfo::BadString)
157 return std::nullopt;
158 return std::make_pair(saver().save(lineInfo.FileName), lineInfo.Line);
161 static std::optional<std::pair<StringRef, uint32_t>>
162 getFileLine(const SectionChunk *c, uint32_t addr) {
163 // MinGW can optionally use codeview, even if the default is dwarf.
164 std::optional<std::pair<StringRef, uint32_t>> fileLine =
165 getFileLineCodeView(c, addr);
166 // If codeview didn't yield any result, check dwarf in MinGW mode.
167 if (!fileLine && c->file->ctx.config.mingw)
168 fileLine = getFileLineDwarf(c, addr);
169 return fileLine;
172 // Given a file and the index of a symbol in that file, returns a description
173 // of all references to that symbol from that file. If no debug information is
174 // available, returns just the name of the file, else one string per actual
175 // reference as described in the debug info.
176 // Returns up to maxStrings string descriptions, along with the total number of
177 // locations found.
178 static std::pair<std::vector<std::string>, size_t>
179 getSymbolLocations(ObjFile *file, uint32_t symIndex, size_t maxStrings) {
180 struct Location {
181 Symbol *sym;
182 std::pair<StringRef, uint32_t> fileLine;
184 std::vector<Location> locations;
185 size_t numLocations = 0;
187 for (Chunk *c : file->getChunks()) {
188 auto *sc = dyn_cast<SectionChunk>(c);
189 if (!sc)
190 continue;
191 for (const coff_relocation &r : sc->getRelocs()) {
192 if (r.SymbolTableIndex != symIndex)
193 continue;
194 numLocations++;
195 if (locations.size() >= maxStrings)
196 continue;
198 std::optional<std::pair<StringRef, uint32_t>> fileLine =
199 getFileLine(sc, r.VirtualAddress);
200 Symbol *sym = getSymbol(sc, r.VirtualAddress);
201 if (fileLine)
202 locations.push_back({sym, *fileLine});
203 else if (sym)
204 locations.push_back({sym, {"", 0}});
208 if (maxStrings == 0)
209 return std::make_pair(std::vector<std::string>(), numLocations);
211 if (numLocations == 0)
212 return std::make_pair(
213 std::vector<std::string>{"\n>>> referenced by " + toString(file)}, 1);
215 std::vector<std::string> symbolLocations(locations.size());
216 size_t i = 0;
217 for (Location loc : locations) {
218 llvm::raw_string_ostream os(symbolLocations[i++]);
219 os << "\n>>> referenced by ";
220 if (!loc.fileLine.first.empty())
221 os << loc.fileLine.first << ":" << loc.fileLine.second
222 << "\n>>> ";
223 os << toString(file);
224 if (loc.sym)
225 os << ":(" << toString(file->ctx, *loc.sym) << ')';
227 return std::make_pair(symbolLocations, numLocations);
230 std::vector<std::string> getSymbolLocations(ObjFile *file, uint32_t symIndex) {
231 return getSymbolLocations(file, symIndex, SIZE_MAX).first;
234 static std::pair<std::vector<std::string>, size_t>
235 getSymbolLocations(InputFile *file, uint32_t symIndex, size_t maxStrings) {
236 if (auto *o = dyn_cast<ObjFile>(file))
237 return getSymbolLocations(o, symIndex, maxStrings);
238 if (auto *b = dyn_cast<BitcodeFile>(file)) {
239 std::vector<std::string> symbolLocations = getSymbolLocations(b);
240 size_t numLocations = symbolLocations.size();
241 if (symbolLocations.size() > maxStrings)
242 symbolLocations.resize(maxStrings);
243 return std::make_pair(symbolLocations, numLocations);
245 llvm_unreachable("unsupported file type passed to getSymbolLocations");
246 return std::make_pair(std::vector<std::string>(), (size_t)0);
249 // For an undefined symbol, stores all files referencing it and the index of
250 // the undefined symbol in each file.
251 struct UndefinedDiag {
252 Symbol *sym;
253 struct File {
254 InputFile *file;
255 uint32_t symIndex;
257 std::vector<File> files;
260 static void reportUndefinedSymbol(const COFFLinkerContext &ctx,
261 const UndefinedDiag &undefDiag) {
262 std::string out;
263 llvm::raw_string_ostream os(out);
264 os << "undefined symbol: " << toString(ctx, *undefDiag.sym);
266 const size_t maxUndefReferences = 3;
267 size_t numDisplayedRefs = 0, numRefs = 0;
268 for (const UndefinedDiag::File &ref : undefDiag.files) {
269 auto [symbolLocations, totalLocations] = getSymbolLocations(
270 ref.file, ref.symIndex, maxUndefReferences - numDisplayedRefs);
272 numRefs += totalLocations;
273 numDisplayedRefs += symbolLocations.size();
274 for (const std::string &s : symbolLocations) {
275 os << s;
278 if (numDisplayedRefs < numRefs)
279 os << "\n>>> referenced " << numRefs - numDisplayedRefs << " more times";
280 errorOrWarn(out, ctx.config.forceUnresolved);
283 void SymbolTable::loadMinGWSymbols() {
284 for (auto &i : symMap) {
285 Symbol *sym = i.second;
286 auto *undef = dyn_cast<Undefined>(sym);
287 if (!undef)
288 continue;
289 if (undef->getWeakAlias())
290 continue;
292 StringRef name = undef->getName();
294 if (ctx.config.machine == I386 && ctx.config.stdcallFixup) {
295 // Check if we can resolve an undefined decorated symbol by finding
296 // the intended target as an undecorated symbol (only with a leading
297 // underscore).
298 StringRef origName = name;
299 StringRef baseName = name;
300 // Trim down stdcall/fastcall/vectorcall symbols to the base name.
301 baseName = ltrim1(baseName, "_@");
302 baseName = baseName.substr(0, baseName.find('@'));
303 // Add a leading underscore, as it would be in cdecl form.
304 std::string newName = ("_" + baseName).str();
305 Symbol *l;
306 if (newName != origName && (l = find(newName)) != nullptr) {
307 // If we found a symbol and it is lazy; load it.
308 if (l->isLazy() && !l->pendingArchiveLoad) {
309 log("Loading lazy " + l->getName() + " from " +
310 l->getFile()->getName() + " for stdcall fixup");
311 forceLazy(l);
313 // If it's lazy or already defined, hook it up as weak alias.
314 if (l->isLazy() || isa<Defined>(l)) {
315 if (ctx.config.warnStdcallFixup)
316 warn("Resolving " + origName + " by linking to " + newName);
317 else
318 log("Resolving " + origName + " by linking to " + newName);
319 undef->setWeakAlias(l);
320 continue;
325 if (ctx.config.autoImport) {
326 if (name.starts_with("__imp_"))
327 continue;
328 // If we have an undefined symbol, but we have a lazy symbol we could
329 // load, load it.
330 Symbol *l = find(("__imp_" + name).str());
331 if (!l || l->pendingArchiveLoad || !l->isLazy())
332 continue;
334 log("Loading lazy " + l->getName() + " from " + l->getFile()->getName() +
335 " for automatic import");
336 forceLazy(l);
341 Defined *SymbolTable::impSymbol(StringRef name) {
342 if (name.starts_with("__imp_"))
343 return nullptr;
344 return dyn_cast_or_null<Defined>(find(("__imp_" + name).str()));
347 bool SymbolTable::handleMinGWAutomaticImport(Symbol *sym, StringRef name) {
348 Defined *imp = impSymbol(name);
349 if (!imp)
350 return false;
352 // Replace the reference directly to a variable with a reference
353 // to the import address table instead. This obviously isn't right,
354 // but we mark the symbol as isRuntimePseudoReloc, and a later pass
355 // will add runtime pseudo relocations for every relocation against
356 // this Symbol. The runtime pseudo relocation framework expects the
357 // reference itself to point at the IAT entry.
358 size_t impSize = 0;
359 if (isa<DefinedImportData>(imp)) {
360 log("Automatically importing " + name + " from " +
361 cast<DefinedImportData>(imp)->getDLLName());
362 impSize = sizeof(DefinedImportData);
363 } else if (isa<DefinedRegular>(imp)) {
364 log("Automatically importing " + name + " from " +
365 toString(cast<DefinedRegular>(imp)->file));
366 impSize = sizeof(DefinedRegular);
367 } else {
368 warn("unable to automatically import " + name + " from " + imp->getName() +
369 " from " + toString(cast<DefinedRegular>(imp)->file) +
370 "; unexpected symbol type");
371 return false;
373 sym->replaceKeepingName(imp, impSize);
374 sym->isRuntimePseudoReloc = true;
376 // There may exist symbols named .refptr.<name> which only consist
377 // of a single pointer to <name>. If it turns out <name> is
378 // automatically imported, we don't need to keep the .refptr.<name>
379 // pointer at all, but redirect all accesses to it to the IAT entry
380 // for __imp_<name> instead, and drop the whole .refptr.<name> chunk.
381 DefinedRegular *refptr =
382 dyn_cast_or_null<DefinedRegular>(find((".refptr." + name).str()));
383 if (refptr && refptr->getChunk()->getSize() == ctx.config.wordsize) {
384 SectionChunk *sc = dyn_cast_or_null<SectionChunk>(refptr->getChunk());
385 if (sc && sc->getRelocs().size() == 1 && *sc->symbols().begin() == sym) {
386 log("Replacing .refptr." + name + " with " + imp->getName());
387 refptr->getChunk()->live = false;
388 refptr->replaceKeepingName(imp, impSize);
391 return true;
394 /// Helper function for reportUnresolvable and resolveRemainingUndefines.
395 /// This function emits an "undefined symbol" diagnostic for each symbol in
396 /// undefs. If localImports is not nullptr, it also emits a "locally
397 /// defined symbol imported" diagnostic for symbols in localImports.
398 /// objFiles and bitcodeFiles (if not nullptr) are used to report where
399 /// undefined symbols are referenced.
400 static void reportProblemSymbols(
401 const COFFLinkerContext &ctx, const SmallPtrSetImpl<Symbol *> &undefs,
402 const DenseMap<Symbol *, Symbol *> *localImports, bool needBitcodeFiles) {
403 // Return early if there is nothing to report (which should be
404 // the common case).
405 if (undefs.empty() && (!localImports || localImports->empty()))
406 return;
408 for (Symbol *b : ctx.config.gcroot) {
409 if (undefs.count(b))
410 errorOrWarn("<root>: undefined symbol: " + toString(ctx, *b),
411 ctx.config.forceUnresolved);
412 if (localImports)
413 if (Symbol *imp = localImports->lookup(b))
414 warn("<root>: locally defined symbol imported: " + toString(ctx, *imp) +
415 " (defined in " + toString(imp->getFile()) + ") [LNK4217]");
418 std::vector<UndefinedDiag> undefDiags;
419 DenseMap<Symbol *, int> firstDiag;
421 auto processFile = [&](InputFile *file, ArrayRef<Symbol *> symbols) {
422 uint32_t symIndex = (uint32_t)-1;
423 for (Symbol *sym : symbols) {
424 ++symIndex;
425 if (!sym)
426 continue;
427 if (undefs.count(sym)) {
428 auto [it, inserted] = firstDiag.try_emplace(sym, undefDiags.size());
429 if (inserted)
430 undefDiags.push_back({sym, {{file, symIndex}}});
431 else
432 undefDiags[it->second].files.push_back({file, symIndex});
434 if (localImports)
435 if (Symbol *imp = localImports->lookup(sym))
436 warn(toString(file) +
437 ": locally defined symbol imported: " + toString(ctx, *imp) +
438 " (defined in " + toString(imp->getFile()) + ") [LNK4217]");
442 for (ObjFile *file : ctx.objFileInstances)
443 processFile(file, file->getSymbols());
445 if (needBitcodeFiles)
446 for (BitcodeFile *file : ctx.bitcodeFileInstances)
447 processFile(file, file->getSymbols());
449 for (const UndefinedDiag &undefDiag : undefDiags)
450 reportUndefinedSymbol(ctx, undefDiag);
453 void SymbolTable::reportUnresolvable() {
454 SmallPtrSet<Symbol *, 8> undefs;
455 for (auto &i : symMap) {
456 Symbol *sym = i.second;
457 auto *undef = dyn_cast<Undefined>(sym);
458 if (!undef || sym->deferUndefined)
459 continue;
460 if (undef->getWeakAlias())
461 continue;
462 StringRef name = undef->getName();
463 if (name.starts_with("__imp_")) {
464 Symbol *imp = find(name.substr(strlen("__imp_")));
465 if (Defined *def = dyn_cast_or_null<Defined>(imp)) {
466 def->isUsedInRegularObj = true;
467 continue;
470 if (name.contains("_PchSym_"))
471 continue;
472 if (ctx.config.autoImport && impSymbol(name))
473 continue;
474 undefs.insert(sym);
477 reportProblemSymbols(ctx, undefs,
478 /* localImports */ nullptr, true);
481 bool SymbolTable::resolveRemainingUndefines() {
482 llvm::TimeTraceScope timeScope("Resolve remaining undefined symbols");
483 SmallPtrSet<Symbol *, 8> undefs;
484 DenseMap<Symbol *, Symbol *> localImports;
485 bool foundLazy = false;
487 for (auto &i : symMap) {
488 Symbol *sym = i.second;
489 auto *undef = dyn_cast<Undefined>(sym);
490 if (!undef)
491 continue;
492 if (!sym->isUsedInRegularObj)
493 continue;
495 StringRef name = undef->getName();
497 // A weak alias may have been resolved, so check for that.
498 if (undef->resolveWeakAlias())
499 continue;
501 // If we can resolve a symbol by removing __imp_ prefix, do that.
502 // This odd rule is for compatibility with MSVC linker.
503 if (name.starts_with("__imp_")) {
504 auto findLocalSym = [&](StringRef n) {
505 Symbol *sym = find(n);
506 if (auto undef = dyn_cast_or_null<Undefined>(sym)) {
507 // The unprefixed symbol might come later in symMap, so handle it now
508 // if needed.
509 if (!undef->resolveWeakAlias())
510 sym = nullptr;
512 return sym;
515 StringRef impName = name.substr(strlen("__imp_"));
516 Symbol *imp = findLocalSym(impName);
517 if (!imp && isArm64EC(ctx.config.machine)) {
518 // Try to use the mangled symbol on ARM64EC.
519 std::optional<std::string> mangledName =
520 getArm64ECMangledFunctionName(impName);
521 if (mangledName)
522 imp = findLocalSym(*mangledName);
523 if (!imp && impName.consume_front("aux_")) {
524 // If it's a __imp_aux_ symbol, try skipping the aux_ prefix.
525 imp = findLocalSym(impName);
526 if (!imp && (mangledName = getArm64ECMangledFunctionName(impName)))
527 imp = findLocalSym(*mangledName);
530 if (imp && imp->isLazy()) {
531 forceLazy(imp);
532 foundLazy = true;
533 continue;
535 if (imp && isa<Defined>(imp)) {
536 auto *d = cast<Defined>(imp);
537 replaceSymbol<DefinedLocalImport>(sym, ctx, name, d);
538 localImportChunks.push_back(cast<DefinedLocalImport>(sym)->getChunk());
539 localImports[sym] = d;
540 continue;
544 // We don't want to report missing Microsoft precompiled headers symbols.
545 // A proper message will be emitted instead in PDBLinker::aquirePrecompObj
546 if (name.contains("_PchSym_"))
547 continue;
549 if (ctx.config.autoImport && handleMinGWAutomaticImport(sym, name))
550 continue;
552 // Remaining undefined symbols are not fatal if /force is specified.
553 // They are replaced with dummy defined symbols.
554 if (ctx.config.forceUnresolved)
555 replaceSymbol<DefinedAbsolute>(sym, ctx, name, 0);
556 undefs.insert(sym);
559 reportProblemSymbols(
560 ctx, undefs,
561 ctx.config.warnLocallyDefinedImported ? &localImports : nullptr, false);
562 return foundLazy;
565 std::pair<Symbol *, bool> SymbolTable::insert(StringRef name) {
566 bool inserted = false;
567 Symbol *&sym = symMap[CachedHashStringRef(name)];
568 if (!sym) {
569 sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
570 sym->isUsedInRegularObj = false;
571 sym->pendingArchiveLoad = false;
572 sym->canInline = true;
573 inserted = true;
575 if (isArm64EC(ctx.config.machine) && name.starts_with("EXP+"))
576 expSymbols.push_back(sym);
578 return {sym, inserted};
581 std::pair<Symbol *, bool> SymbolTable::insert(StringRef name, InputFile *file) {
582 std::pair<Symbol *, bool> result = insert(name);
583 if (!file || !isa<BitcodeFile>(file))
584 result.first->isUsedInRegularObj = true;
585 return result;
588 void SymbolTable::addEntryThunk(Symbol *from, Symbol *to) {
589 entryThunks.push_back({from, to});
592 void SymbolTable::addExitThunk(Symbol *from, Symbol *to) {
593 exitThunks[from] = to;
596 void SymbolTable::initializeECThunks() {
597 if (!isArm64EC(ctx.config.machine))
598 return;
600 for (auto it : entryThunks) {
601 auto *to = dyn_cast<Defined>(it.second);
602 if (!to)
603 continue;
604 auto *from = dyn_cast<DefinedRegular>(it.first);
605 // We need to be able to add padding to the function and fill it with an
606 // offset to its entry thunks. To ensure that padding the function is
607 // feasible, functions are required to be COMDAT symbols with no offset.
608 if (!from || !from->getChunk()->isCOMDAT() ||
609 cast<DefinedRegular>(from)->getValue()) {
610 error("non COMDAT symbol '" + from->getName() + "' in hybrid map");
611 continue;
613 from->getChunk()->setEntryThunk(to);
616 for (ImportFile *file : ctx.importFileInstances) {
617 if (!file->impchkThunk)
618 continue;
620 Symbol *sym = exitThunks.lookup(file->thunkSym);
621 if (!sym)
622 sym = exitThunks.lookup(file->impECSym);
623 file->impchkThunk->exitThunk = dyn_cast_or_null<Defined>(sym);
626 // On ARM64EC, the __imp_ symbol references the auxiliary IAT, while the
627 // __imp_aux_ symbol references the regular IAT. However, x86_64 code expects
628 // both to reference the regular IAT, so adjust the symbol if necessary.
629 parallelForEach(ctx.objFileInstances, [&](ObjFile *file) {
630 if (file->getMachineType() != AMD64)
631 return;
632 for (auto &sym : file->getMutableSymbols()) {
633 auto impSym = dyn_cast_or_null<DefinedImportData>(sym);
634 if (impSym && impSym->file->impchkThunk && sym == impSym->file->impECSym)
635 sym = impSym->file->impSym;
640 Symbol *SymbolTable::addUndefined(StringRef name, InputFile *f,
641 bool overrideLazy) {
642 auto [s, wasInserted] = insert(name, f);
643 if (wasInserted || (s->isLazy() && overrideLazy)) {
644 replaceSymbol<Undefined>(s, name);
645 return s;
647 if (s->isLazy())
648 forceLazy(s);
649 return s;
652 // On ARM64EC, a function symbol may appear in both mangled and demangled forms:
653 // - ARM64EC archives contain only the mangled name, while the demangled symbol
654 // is defined by the object file as an alias.
655 // - x86_64 archives contain only the demangled name (the mangled name is
656 // usually defined by an object referencing the symbol as an alias to a guess
657 // exit thunk).
658 // - ARM64EC import files contain both the mangled and demangled names for
659 // thunks.
660 // If more than one archive defines the same function, this could lead
661 // to different libraries being used for the same function depending on how they
662 // are referenced. Avoid this by checking if the paired symbol is already
663 // defined before adding a symbol to the table.
664 template <typename T>
665 bool checkLazyECPair(SymbolTable *symtab, StringRef name, InputFile *f) {
666 if (name.starts_with("__imp_"))
667 return true;
668 std::string pairName;
669 if (std::optional<std::string> mangledName =
670 getArm64ECMangledFunctionName(name))
671 pairName = std::move(*mangledName);
672 else if (std::optional<std::string> demangledName =
673 getArm64ECDemangledFunctionName(name))
674 pairName = std::move(*demangledName);
675 else
676 return true;
678 Symbol *sym = symtab->find(pairName);
679 if (!sym)
680 return true;
681 if (sym->pendingArchiveLoad)
682 return false;
683 if (auto u = dyn_cast<Undefined>(sym))
684 return !u->weakAlias || u->isAntiDep;
685 // If the symbol is lazy, allow it only if it originates from the same
686 // archive.
687 auto lazy = dyn_cast<T>(sym);
688 return lazy && lazy->file == f;
691 void SymbolTable::addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym) {
692 StringRef name = sym.getName();
693 if (isArm64EC(ctx.config.machine) &&
694 !checkLazyECPair<LazyArchive>(this, name, f))
695 return;
696 auto [s, wasInserted] = insert(name);
697 if (wasInserted) {
698 replaceSymbol<LazyArchive>(s, f, sym);
699 return;
701 auto *u = dyn_cast<Undefined>(s);
702 if (!u || (u->weakAlias && !u->isECAlias(ctx.config.machine)) ||
703 s->pendingArchiveLoad)
704 return;
705 s->pendingArchiveLoad = true;
706 f->addMember(sym);
709 void SymbolTable::addLazyObject(InputFile *f, StringRef n) {
710 assert(f->lazy);
711 if (isArm64EC(ctx.config.machine) && !checkLazyECPair<LazyObject>(this, n, f))
712 return;
713 auto [s, wasInserted] = insert(n, f);
714 if (wasInserted) {
715 replaceSymbol<LazyObject>(s, f, n);
716 return;
718 auto *u = dyn_cast<Undefined>(s);
719 if (!u || (u->weakAlias && !u->isECAlias(ctx.config.machine)) ||
720 s->pendingArchiveLoad)
721 return;
722 s->pendingArchiveLoad = true;
723 f->lazy = false;
724 addFile(f);
727 void SymbolTable::addLazyDLLSymbol(DLLFile *f, DLLFile::Symbol *sym,
728 StringRef n) {
729 auto [s, wasInserted] = insert(n);
730 if (wasInserted) {
731 replaceSymbol<LazyDLLSymbol>(s, f, sym, n);
732 return;
734 auto *u = dyn_cast<Undefined>(s);
735 if (!u || u->weakAlias || s->pendingArchiveLoad)
736 return;
737 s->pendingArchiveLoad = true;
738 f->makeImport(sym);
741 static std::string getSourceLocationBitcode(BitcodeFile *file) {
742 std::string res("\n>>> defined at ");
743 StringRef source = file->obj->getSourceFileName();
744 if (!source.empty())
745 res += source.str() + "\n>>> ";
746 res += toString(file);
747 return res;
750 static std::string getSourceLocationObj(ObjFile *file, SectionChunk *sc,
751 uint32_t offset, StringRef name) {
752 std::optional<std::pair<StringRef, uint32_t>> fileLine;
753 if (sc)
754 fileLine = getFileLine(sc, offset);
755 if (!fileLine)
756 fileLine = file->getVariableLocation(name);
758 std::string res;
759 llvm::raw_string_ostream os(res);
760 os << "\n>>> defined at ";
761 if (fileLine)
762 os << fileLine->first << ":" << fileLine->second << "\n>>> ";
763 os << toString(file);
764 return res;
767 static std::string getSourceLocation(InputFile *file, SectionChunk *sc,
768 uint32_t offset, StringRef name) {
769 if (!file)
770 return "";
771 if (auto *o = dyn_cast<ObjFile>(file))
772 return getSourceLocationObj(o, sc, offset, name);
773 if (auto *b = dyn_cast<BitcodeFile>(file))
774 return getSourceLocationBitcode(b);
775 return "\n>>> defined at " + toString(file);
778 // Construct and print an error message in the form of:
780 // lld-link: error: duplicate symbol: foo
781 // >>> defined at bar.c:30
782 // >>> bar.o
783 // >>> defined at baz.c:563
784 // >>> baz.o
785 void SymbolTable::reportDuplicate(Symbol *existing, InputFile *newFile,
786 SectionChunk *newSc,
787 uint32_t newSectionOffset) {
788 std::string msg;
789 llvm::raw_string_ostream os(msg);
790 os << "duplicate symbol: " << toString(ctx, *existing);
792 DefinedRegular *d = dyn_cast<DefinedRegular>(existing);
793 if (d && isa<ObjFile>(d->getFile())) {
794 os << getSourceLocation(d->getFile(), d->getChunk(), d->getValue(),
795 existing->getName());
796 } else {
797 os << getSourceLocation(existing->getFile(), nullptr, 0, "");
799 os << getSourceLocation(newFile, newSc, newSectionOffset,
800 existing->getName());
802 if (ctx.config.forceMultiple)
803 warn(msg);
804 else
805 error(msg);
808 Symbol *SymbolTable::addAbsolute(StringRef n, COFFSymbolRef sym) {
809 auto [s, wasInserted] = insert(n, nullptr);
810 s->isUsedInRegularObj = true;
811 if (wasInserted || isa<Undefined>(s) || s->isLazy())
812 replaceSymbol<DefinedAbsolute>(s, ctx, n, sym);
813 else if (auto *da = dyn_cast<DefinedAbsolute>(s)) {
814 if (da->getVA() != sym.getValue())
815 reportDuplicate(s, nullptr);
816 } else if (!isa<DefinedCOFF>(s))
817 reportDuplicate(s, nullptr);
818 return s;
821 Symbol *SymbolTable::addAbsolute(StringRef n, uint64_t va) {
822 auto [s, wasInserted] = insert(n, nullptr);
823 s->isUsedInRegularObj = true;
824 if (wasInserted || isa<Undefined>(s) || s->isLazy())
825 replaceSymbol<DefinedAbsolute>(s, ctx, n, va);
826 else if (auto *da = dyn_cast<DefinedAbsolute>(s)) {
827 if (da->getVA() != va)
828 reportDuplicate(s, nullptr);
829 } else if (!isa<DefinedCOFF>(s))
830 reportDuplicate(s, nullptr);
831 return s;
834 Symbol *SymbolTable::addSynthetic(StringRef n, Chunk *c) {
835 auto [s, wasInserted] = insert(n, nullptr);
836 s->isUsedInRegularObj = true;
837 if (wasInserted || isa<Undefined>(s) || s->isLazy())
838 replaceSymbol<DefinedSynthetic>(s, n, c);
839 else if (!isa<DefinedCOFF>(s))
840 reportDuplicate(s, nullptr);
841 return s;
844 Symbol *SymbolTable::addRegular(InputFile *f, StringRef n,
845 const coff_symbol_generic *sym, SectionChunk *c,
846 uint32_t sectionOffset, bool isWeak) {
847 auto [s, wasInserted] = insert(n, f);
848 if (wasInserted || !isa<DefinedRegular>(s) || s->isWeak)
849 replaceSymbol<DefinedRegular>(s, f, n, /*IsCOMDAT*/ false,
850 /*IsExternal*/ true, sym, c, isWeak);
851 else if (!isWeak)
852 reportDuplicate(s, f, c, sectionOffset);
853 return s;
856 std::pair<DefinedRegular *, bool>
857 SymbolTable::addComdat(InputFile *f, StringRef n,
858 const coff_symbol_generic *sym) {
859 auto [s, wasInserted] = insert(n, f);
860 if (wasInserted || !isa<DefinedRegular>(s)) {
861 replaceSymbol<DefinedRegular>(s, f, n, /*IsCOMDAT*/ true,
862 /*IsExternal*/ true, sym, nullptr);
863 return {cast<DefinedRegular>(s), true};
865 auto *existingSymbol = cast<DefinedRegular>(s);
866 if (!existingSymbol->isCOMDAT)
867 reportDuplicate(s, f);
868 return {existingSymbol, false};
871 Symbol *SymbolTable::addCommon(InputFile *f, StringRef n, uint64_t size,
872 const coff_symbol_generic *sym, CommonChunk *c) {
873 auto [s, wasInserted] = insert(n, f);
874 if (wasInserted || !isa<DefinedCOFF>(s))
875 replaceSymbol<DefinedCommon>(s, f, n, size, sym, c);
876 else if (auto *dc = dyn_cast<DefinedCommon>(s))
877 if (size > dc->getSize())
878 replaceSymbol<DefinedCommon>(s, f, n, size, sym, c);
879 return s;
882 DefinedImportData *SymbolTable::addImportData(StringRef n, ImportFile *f,
883 Chunk *&location) {
884 auto [s, wasInserted] = insert(n, nullptr);
885 s->isUsedInRegularObj = true;
886 if (wasInserted || isa<Undefined>(s) || s->isLazy()) {
887 replaceSymbol<DefinedImportData>(s, n, f, location);
888 return cast<DefinedImportData>(s);
891 reportDuplicate(s, f);
892 return nullptr;
895 Defined *SymbolTable::addImportThunk(StringRef name, DefinedImportData *id,
896 ImportThunkChunk *chunk) {
897 auto [s, wasInserted] = insert(name, nullptr);
898 s->isUsedInRegularObj = true;
899 if (wasInserted || isa<Undefined>(s) || s->isLazy()) {
900 replaceSymbol<DefinedImportThunk>(s, ctx, name, id, chunk);
901 return cast<Defined>(s);
904 reportDuplicate(s, id->file);
905 return nullptr;
908 void SymbolTable::addLibcall(StringRef name) {
909 Symbol *sym = findUnderscore(name);
910 if (!sym)
911 return;
913 if (auto *l = dyn_cast<LazyArchive>(sym)) {
914 MemoryBufferRef mb = l->getMemberBuffer();
915 if (isBitcode(mb))
916 addUndefined(sym->getName());
917 } else if (LazyObject *o = dyn_cast<LazyObject>(sym)) {
918 if (isBitcode(o->file->mb))
919 addUndefined(sym->getName());
923 std::vector<Chunk *> SymbolTable::getChunks() const {
924 std::vector<Chunk *> res;
925 for (ObjFile *file : ctx.objFileInstances) {
926 ArrayRef<Chunk *> v = file->getChunks();
927 res.insert(res.end(), v.begin(), v.end());
929 return res;
932 Symbol *SymbolTable::find(StringRef name) const {
933 return symMap.lookup(CachedHashStringRef(name));
936 Symbol *SymbolTable::findUnderscore(StringRef name) const {
937 if (ctx.config.machine == I386)
938 return find(("_" + name).str());
939 return find(name);
942 // Return all symbols that start with Prefix, possibly ignoring the first
943 // character of Prefix or the first character symbol.
944 std::vector<Symbol *> SymbolTable::getSymsWithPrefix(StringRef prefix) {
945 std::vector<Symbol *> syms;
946 for (auto pair : symMap) {
947 StringRef name = pair.first.val();
948 if (name.starts_with(prefix) || name.starts_with(prefix.drop_front()) ||
949 name.drop_front().starts_with(prefix) ||
950 name.drop_front().starts_with(prefix.drop_front())) {
951 syms.push_back(pair.second);
954 return syms;
957 Symbol *SymbolTable::findMangle(StringRef name) {
958 if (Symbol *sym = find(name)) {
959 if (auto *u = dyn_cast<Undefined>(sym)) {
960 // We're specifically looking for weak aliases that ultimately resolve to
961 // defined symbols, hence the call to getWeakAlias() instead of just using
962 // the weakAlias member variable. This matches link.exe's behavior.
963 if (Symbol *weakAlias = u->getWeakAlias())
964 return weakAlias;
965 } else {
966 return sym;
970 // Efficient fuzzy string lookup is impossible with a hash table, so iterate
971 // the symbol table once and collect all possibly matching symbols into this
972 // vector. Then compare each possibly matching symbol with each possible
973 // mangling.
974 std::vector<Symbol *> syms = getSymsWithPrefix(name);
975 auto findByPrefix = [&syms](const Twine &t) -> Symbol * {
976 std::string prefix = t.str();
977 for (auto *s : syms)
978 if (s->getName().starts_with(prefix))
979 return s;
980 return nullptr;
983 // For non-x86, just look for C++ functions.
984 if (ctx.config.machine != I386)
985 return findByPrefix("?" + name + "@@Y");
987 if (!name.starts_with("_"))
988 return nullptr;
989 // Search for x86 stdcall function.
990 if (Symbol *s = findByPrefix(name + "@"))
991 return s;
992 // Search for x86 fastcall function.
993 if (Symbol *s = findByPrefix("@" + name.substr(1) + "@"))
994 return s;
995 // Search for x86 vectorcall function.
996 if (Symbol *s = findByPrefix(name.substr(1) + "@@"))
997 return s;
998 // Search for x86 C++ non-member function.
999 return findByPrefix("?" + name.substr(1) + "@@Y");
1002 Symbol *SymbolTable::addUndefined(StringRef name) {
1003 return addUndefined(name, nullptr, false);
1006 void SymbolTable::compileBitcodeFiles() {
1007 ltoCompilationDone = true;
1008 if (ctx.bitcodeFileInstances.empty())
1009 return;
1011 llvm::TimeTraceScope timeScope("Compile bitcode");
1012 ScopedTimer t(ctx.ltoTimer);
1013 lto.reset(new BitcodeCompiler(ctx));
1014 for (BitcodeFile *f : ctx.bitcodeFileInstances)
1015 lto->add(*f);
1016 for (InputFile *newObj : lto->compile()) {
1017 ObjFile *obj = cast<ObjFile>(newObj);
1018 obj->parse();
1019 ctx.objFileInstances.push_back(obj);
1023 } // namespace lld::coff