[mlir][py] Enable loading only specified dialects during creation. (#121421)
[llvm-project.git] / lld / COFF / SymbolTable.cpp
blobae88675ab93a1fcf4244e114c21a38c931b5c5b7
1 //===- SymbolTable.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "SymbolTable.h"
10 #include "COFFLinkerContext.h"
11 #include "Config.h"
12 #include "Driver.h"
13 #include "LTO.h"
14 #include "PDB.h"
15 #include "Symbols.h"
16 #include "lld/Common/ErrorHandler.h"
17 #include "lld/Common/Memory.h"
18 #include "lld/Common/Timer.h"
19 #include "llvm/DebugInfo/DIContext.h"
20 #include "llvm/IR/LLVMContext.h"
21 #include "llvm/IR/Mangler.h"
22 #include "llvm/LTO/LTO.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/Parallel.h"
25 #include "llvm/Support/TimeProfiler.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include <utility>
29 using namespace llvm;
30 using namespace llvm::support;
32 namespace lld::coff {
34 StringRef ltrim1(StringRef s, const char *chars) {
35 if (!s.empty() && strchr(chars, s[0]))
36 return s.substr(1);
37 return s;
40 static COFFSyncStream errorOrWarn(COFFLinkerContext &ctx) {
41 return {ctx, ctx.config.forceUnresolved ? DiagLevel::Warn : DiagLevel::Err};
44 // Causes the file associated with a lazy symbol to be linked in.
45 static void forceLazy(Symbol *s) {
46 s->pendingArchiveLoad = true;
47 switch (s->kind()) {
48 case Symbol::Kind::LazyArchiveKind: {
49 auto *l = cast<LazyArchive>(s);
50 l->file->addMember(l->sym);
51 break;
53 case Symbol::Kind::LazyObjectKind: {
54 InputFile *file = cast<LazyObject>(s)->file;
55 file->lazy = false;
56 file->symtab.ctx.driver.addFile(file);
57 break;
59 case Symbol::Kind::LazyDLLSymbolKind: {
60 auto *l = cast<LazyDLLSymbol>(s);
61 l->file->makeImport(l->sym);
62 break;
64 default:
65 llvm_unreachable(
66 "symbol passed to forceLazy is not a LazyArchive or LazyObject");
70 // Returns the symbol in SC whose value is <= Addr that is closest to Addr.
71 // This is generally the global variable or function whose definition contains
72 // Addr.
73 static Symbol *getSymbol(SectionChunk *sc, uint32_t addr) {
74 DefinedRegular *candidate = nullptr;
76 for (Symbol *s : sc->file->getSymbols()) {
77 auto *d = dyn_cast_or_null<DefinedRegular>(s);
78 if (!d || !d->data || d->file != sc->file || d->getChunk() != sc ||
79 d->getValue() > addr ||
80 (candidate && d->getValue() < candidate->getValue()))
81 continue;
83 candidate = d;
86 return candidate;
89 static std::vector<std::string> getSymbolLocations(BitcodeFile *file) {
90 std::string res("\n>>> referenced by ");
91 StringRef source = file->obj->getSourceFileName();
92 if (!source.empty())
93 res += source.str() + "\n>>> ";
94 res += toString(file);
95 return {res};
98 static std::optional<std::pair<StringRef, uint32_t>>
99 getFileLineDwarf(const SectionChunk *c, uint32_t addr) {
100 std::optional<DILineInfo> optionalLineInfo =
101 c->file->getDILineInfo(addr, c->getSectionNumber() - 1);
102 if (!optionalLineInfo)
103 return std::nullopt;
104 const DILineInfo &lineInfo = *optionalLineInfo;
105 if (lineInfo.FileName == DILineInfo::BadString)
106 return std::nullopt;
107 return std::make_pair(saver().save(lineInfo.FileName), lineInfo.Line);
110 static std::optional<std::pair<StringRef, uint32_t>>
111 getFileLine(const SectionChunk *c, uint32_t addr) {
112 // MinGW can optionally use codeview, even if the default is dwarf.
113 std::optional<std::pair<StringRef, uint32_t>> fileLine =
114 getFileLineCodeView(c, addr);
115 // If codeview didn't yield any result, check dwarf in MinGW mode.
116 if (!fileLine && c->file->symtab.ctx.config.mingw)
117 fileLine = getFileLineDwarf(c, addr);
118 return fileLine;
121 // Given a file and the index of a symbol in that file, returns a description
122 // of all references to that symbol from that file. If no debug information is
123 // available, returns just the name of the file, else one string per actual
124 // reference as described in the debug info.
125 // Returns up to maxStrings string descriptions, along with the total number of
126 // locations found.
127 static std::pair<std::vector<std::string>, size_t>
128 getSymbolLocations(ObjFile *file, uint32_t symIndex, size_t maxStrings) {
129 struct Location {
130 Symbol *sym;
131 std::pair<StringRef, uint32_t> fileLine;
133 std::vector<Location> locations;
134 size_t numLocations = 0;
136 for (Chunk *c : file->getChunks()) {
137 auto *sc = dyn_cast<SectionChunk>(c);
138 if (!sc)
139 continue;
140 for (const coff_relocation &r : sc->getRelocs()) {
141 if (r.SymbolTableIndex != symIndex)
142 continue;
143 numLocations++;
144 if (locations.size() >= maxStrings)
145 continue;
147 std::optional<std::pair<StringRef, uint32_t>> fileLine =
148 getFileLine(sc, r.VirtualAddress);
149 Symbol *sym = getSymbol(sc, r.VirtualAddress);
150 if (fileLine)
151 locations.push_back({sym, *fileLine});
152 else if (sym)
153 locations.push_back({sym, {"", 0}});
157 if (maxStrings == 0)
158 return std::make_pair(std::vector<std::string>(), numLocations);
160 if (numLocations == 0)
161 return std::make_pair(
162 std::vector<std::string>{"\n>>> referenced by " + toString(file)}, 1);
164 std::vector<std::string> symbolLocations(locations.size());
165 size_t i = 0;
166 for (Location loc : locations) {
167 llvm::raw_string_ostream os(symbolLocations[i++]);
168 os << "\n>>> referenced by ";
169 if (!loc.fileLine.first.empty())
170 os << loc.fileLine.first << ":" << loc.fileLine.second
171 << "\n>>> ";
172 os << toString(file);
173 if (loc.sym)
174 os << ":(" << toString(file->symtab.ctx, *loc.sym) << ')';
176 return std::make_pair(symbolLocations, numLocations);
179 std::vector<std::string> getSymbolLocations(ObjFile *file, uint32_t symIndex) {
180 return getSymbolLocations(file, symIndex, SIZE_MAX).first;
183 static std::pair<std::vector<std::string>, size_t>
184 getSymbolLocations(InputFile *file, uint32_t symIndex, size_t maxStrings) {
185 if (auto *o = dyn_cast<ObjFile>(file))
186 return getSymbolLocations(o, symIndex, maxStrings);
187 if (auto *b = dyn_cast<BitcodeFile>(file)) {
188 std::vector<std::string> symbolLocations = getSymbolLocations(b);
189 size_t numLocations = symbolLocations.size();
190 if (symbolLocations.size() > maxStrings)
191 symbolLocations.resize(maxStrings);
192 return std::make_pair(symbolLocations, numLocations);
194 llvm_unreachable("unsupported file type passed to getSymbolLocations");
195 return std::make_pair(std::vector<std::string>(), (size_t)0);
198 // For an undefined symbol, stores all files referencing it and the index of
199 // the undefined symbol in each file.
200 struct UndefinedDiag {
201 Symbol *sym;
202 struct File {
203 InputFile *file;
204 uint32_t symIndex;
206 std::vector<File> files;
209 static void reportUndefinedSymbol(COFFLinkerContext &ctx,
210 const UndefinedDiag &undefDiag) {
211 auto diag = errorOrWarn(ctx);
212 diag << "undefined symbol: " << undefDiag.sym;
214 const size_t maxUndefReferences = 3;
215 size_t numDisplayedRefs = 0, numRefs = 0;
216 for (const UndefinedDiag::File &ref : undefDiag.files) {
217 auto [symbolLocations, totalLocations] = getSymbolLocations(
218 ref.file, ref.symIndex, maxUndefReferences - numDisplayedRefs);
220 numRefs += totalLocations;
221 numDisplayedRefs += symbolLocations.size();
222 for (const std::string &s : symbolLocations)
223 diag << s;
225 if (numDisplayedRefs < numRefs)
226 diag << "\n>>> referenced " << numRefs - numDisplayedRefs << " more times";
229 void SymbolTable::loadMinGWSymbols() {
230 for (auto &i : symMap) {
231 Symbol *sym = i.second;
232 auto *undef = dyn_cast<Undefined>(sym);
233 if (!undef)
234 continue;
235 if (undef->getWeakAlias())
236 continue;
238 StringRef name = undef->getName();
240 if (machine == I386 && ctx.config.stdcallFixup) {
241 // Check if we can resolve an undefined decorated symbol by finding
242 // the intended target as an undecorated symbol (only with a leading
243 // underscore).
244 StringRef origName = name;
245 StringRef baseName = name;
246 // Trim down stdcall/fastcall/vectorcall symbols to the base name.
247 baseName = ltrim1(baseName, "_@");
248 baseName = baseName.substr(0, baseName.find('@'));
249 // Add a leading underscore, as it would be in cdecl form.
250 std::string newName = ("_" + baseName).str();
251 Symbol *l;
252 if (newName != origName && (l = find(newName)) != nullptr) {
253 // If we found a symbol and it is lazy; load it.
254 if (l->isLazy() && !l->pendingArchiveLoad) {
255 Log(ctx) << "Loading lazy " << l->getName() << " from "
256 << l->getFile()->getName() << " for stdcall fixup";
257 forceLazy(l);
259 // If it's lazy or already defined, hook it up as weak alias.
260 if (l->isLazy() || isa<Defined>(l)) {
261 if (ctx.config.warnStdcallFixup)
262 Warn(ctx) << "Resolving " << origName << " by linking to "
263 << newName;
264 else
265 Log(ctx) << "Resolving " << origName << " by linking to "
266 << newName;
267 undef->setWeakAlias(l);
268 continue;
273 if (ctx.config.autoImport) {
274 if (name.starts_with("__imp_"))
275 continue;
276 // If we have an undefined symbol, but we have a lazy symbol we could
277 // load, load it.
278 Symbol *l = find(("__imp_" + name).str());
279 if (!l || l->pendingArchiveLoad || !l->isLazy())
280 continue;
282 Log(ctx) << "Loading lazy " << l->getName() << " from "
283 << l->getFile()->getName() << " for automatic import";
284 forceLazy(l);
289 Defined *SymbolTable::impSymbol(StringRef name) {
290 if (name.starts_with("__imp_"))
291 return nullptr;
292 return dyn_cast_or_null<Defined>(find(("__imp_" + name).str()));
295 bool SymbolTable::handleMinGWAutomaticImport(Symbol *sym, StringRef name) {
296 Defined *imp = impSymbol(name);
297 if (!imp)
298 return false;
300 // Replace the reference directly to a variable with a reference
301 // to the import address table instead. This obviously isn't right,
302 // but we mark the symbol as isRuntimePseudoReloc, and a later pass
303 // will add runtime pseudo relocations for every relocation against
304 // this Symbol. The runtime pseudo relocation framework expects the
305 // reference itself to point at the IAT entry.
306 size_t impSize = 0;
307 if (isa<DefinedImportData>(imp)) {
308 Log(ctx) << "Automatically importing " << name << " from "
309 << cast<DefinedImportData>(imp)->getDLLName();
310 impSize = sizeof(DefinedImportData);
311 } else if (isa<DefinedRegular>(imp)) {
312 Log(ctx) << "Automatically importing " << name << " from "
313 << toString(cast<DefinedRegular>(imp)->file);
314 impSize = sizeof(DefinedRegular);
315 } else {
316 Warn(ctx) << "unable to automatically import " << name << " from "
317 << imp->getName() << " from " << cast<DefinedRegular>(imp)->file
318 << "; unexpected symbol type";
319 return false;
321 sym->replaceKeepingName(imp, impSize);
322 sym->isRuntimePseudoReloc = true;
324 // There may exist symbols named .refptr.<name> which only consist
325 // of a single pointer to <name>. If it turns out <name> is
326 // automatically imported, we don't need to keep the .refptr.<name>
327 // pointer at all, but redirect all accesses to it to the IAT entry
328 // for __imp_<name> instead, and drop the whole .refptr.<name> chunk.
329 DefinedRegular *refptr =
330 dyn_cast_or_null<DefinedRegular>(find((".refptr." + name).str()));
331 if (refptr && refptr->getChunk()->getSize() == ctx.config.wordsize) {
332 SectionChunk *sc = dyn_cast_or_null<SectionChunk>(refptr->getChunk());
333 if (sc && sc->getRelocs().size() == 1 && *sc->symbols().begin() == sym) {
334 Log(ctx) << "Replacing .refptr." << name << " with " << imp->getName();
335 refptr->getChunk()->live = false;
336 refptr->replaceKeepingName(imp, impSize);
339 return true;
342 /// Helper function for reportUnresolvable and resolveRemainingUndefines.
343 /// This function emits an "undefined symbol" diagnostic for each symbol in
344 /// undefs. If localImports is not nullptr, it also emits a "locally
345 /// defined symbol imported" diagnostic for symbols in localImports.
346 /// objFiles and bitcodeFiles (if not nullptr) are used to report where
347 /// undefined symbols are referenced.
348 static void reportProblemSymbols(
349 COFFLinkerContext &ctx, const SmallPtrSetImpl<Symbol *> &undefs,
350 const DenseMap<Symbol *, Symbol *> *localImports, bool needBitcodeFiles) {
351 // Return early if there is nothing to report (which should be
352 // the common case).
353 if (undefs.empty() && (!localImports || localImports->empty()))
354 return;
356 for (Symbol *b : ctx.config.gcroot) {
357 if (undefs.count(b))
358 errorOrWarn(ctx) << "<root>: undefined symbol: " << b;
359 if (localImports)
360 if (Symbol *imp = localImports->lookup(b))
361 Warn(ctx) << "<root>: locally defined symbol imported: " << imp
362 << " (defined in " << toString(imp->getFile())
363 << ") [LNK4217]";
366 std::vector<UndefinedDiag> undefDiags;
367 DenseMap<Symbol *, int> firstDiag;
369 auto processFile = [&](InputFile *file, ArrayRef<Symbol *> symbols) {
370 uint32_t symIndex = (uint32_t)-1;
371 for (Symbol *sym : symbols) {
372 ++symIndex;
373 if (!sym)
374 continue;
375 if (undefs.count(sym)) {
376 auto [it, inserted] = firstDiag.try_emplace(sym, undefDiags.size());
377 if (inserted)
378 undefDiags.push_back({sym, {{file, symIndex}}});
379 else
380 undefDiags[it->second].files.push_back({file, symIndex});
382 if (localImports)
383 if (Symbol *imp = localImports->lookup(sym))
384 Warn(ctx) << file << ": locally defined symbol imported: " << imp
385 << " (defined in " << imp->getFile() << ") [LNK4217]";
389 for (ObjFile *file : ctx.objFileInstances)
390 processFile(file, file->getSymbols());
392 if (needBitcodeFiles)
393 for (BitcodeFile *file : ctx.bitcodeFileInstances)
394 processFile(file, file->getSymbols());
396 for (const UndefinedDiag &undefDiag : undefDiags)
397 reportUndefinedSymbol(ctx, undefDiag);
400 void SymbolTable::reportUnresolvable() {
401 SmallPtrSet<Symbol *, 8> undefs;
402 for (auto &i : symMap) {
403 Symbol *sym = i.second;
404 auto *undef = dyn_cast<Undefined>(sym);
405 if (!undef || sym->deferUndefined)
406 continue;
407 if (undef->getWeakAlias())
408 continue;
409 StringRef name = undef->getName();
410 if (name.starts_with("__imp_")) {
411 Symbol *imp = find(name.substr(strlen("__imp_")));
412 if (Defined *def = dyn_cast_or_null<Defined>(imp)) {
413 def->isUsedInRegularObj = true;
414 continue;
417 if (name.contains("_PchSym_"))
418 continue;
419 if (ctx.config.autoImport && impSymbol(name))
420 continue;
421 undefs.insert(sym);
424 reportProblemSymbols(ctx, undefs,
425 /* localImports */ nullptr, true);
428 bool SymbolTable::resolveRemainingUndefines() {
429 llvm::TimeTraceScope timeScope("Resolve remaining undefined symbols");
430 SmallPtrSet<Symbol *, 8> undefs;
431 DenseMap<Symbol *, Symbol *> localImports;
432 bool foundLazy = false;
434 for (auto &i : symMap) {
435 Symbol *sym = i.second;
436 auto *undef = dyn_cast<Undefined>(sym);
437 if (!undef)
438 continue;
439 if (!sym->isUsedInRegularObj)
440 continue;
442 StringRef name = undef->getName();
444 // A weak alias may have been resolved, so check for that.
445 if (undef->resolveWeakAlias())
446 continue;
448 // If we can resolve a symbol by removing __imp_ prefix, do that.
449 // This odd rule is for compatibility with MSVC linker.
450 if (name.starts_with("__imp_")) {
451 auto findLocalSym = [&](StringRef n) {
452 Symbol *sym = find(n);
453 if (auto undef = dyn_cast_or_null<Undefined>(sym)) {
454 // The unprefixed symbol might come later in symMap, so handle it now
455 // if needed.
456 if (!undef->resolveWeakAlias())
457 sym = nullptr;
459 return sym;
462 StringRef impName = name.substr(strlen("__imp_"));
463 Symbol *imp = findLocalSym(impName);
464 if (!imp && isEC()) {
465 // Try to use the mangled symbol on ARM64EC.
466 std::optional<std::string> mangledName =
467 getArm64ECMangledFunctionName(impName);
468 if (mangledName)
469 imp = findLocalSym(*mangledName);
470 if (!imp && impName.consume_front("aux_")) {
471 // If it's a __imp_aux_ symbol, try skipping the aux_ prefix.
472 imp = findLocalSym(impName);
473 if (!imp && (mangledName = getArm64ECMangledFunctionName(impName)))
474 imp = findLocalSym(*mangledName);
477 if (imp && imp->isLazy()) {
478 forceLazy(imp);
479 foundLazy = true;
480 continue;
482 if (imp && isa<Defined>(imp)) {
483 auto *d = cast<Defined>(imp);
484 replaceSymbol<DefinedLocalImport>(sym, ctx, name, d);
485 localImportChunks.push_back(cast<DefinedLocalImport>(sym)->getChunk());
486 localImports[sym] = d;
487 continue;
491 // We don't want to report missing Microsoft precompiled headers symbols.
492 // A proper message will be emitted instead in PDBLinker::aquirePrecompObj
493 if (name.contains("_PchSym_"))
494 continue;
496 if (ctx.config.autoImport && handleMinGWAutomaticImport(sym, name))
497 continue;
499 // Remaining undefined symbols are not fatal if /force is specified.
500 // They are replaced with dummy defined symbols.
501 if (ctx.config.forceUnresolved)
502 replaceSymbol<DefinedAbsolute>(sym, ctx, name, 0);
503 undefs.insert(sym);
506 reportProblemSymbols(
507 ctx, undefs,
508 ctx.config.warnLocallyDefinedImported ? &localImports : nullptr, false);
509 return foundLazy;
512 std::pair<Symbol *, bool> SymbolTable::insert(StringRef name) {
513 bool inserted = false;
514 Symbol *&sym = symMap[CachedHashStringRef(name)];
515 if (!sym) {
516 sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
517 sym->isUsedInRegularObj = false;
518 sym->pendingArchiveLoad = false;
519 sym->canInline = true;
520 inserted = true;
522 if (isEC() && name.starts_with("EXP+"))
523 expSymbols.push_back(sym);
525 return {sym, inserted};
528 std::pair<Symbol *, bool> SymbolTable::insert(StringRef name, InputFile *file) {
529 std::pair<Symbol *, bool> result = insert(name);
530 if (!file || !isa<BitcodeFile>(file))
531 result.first->isUsedInRegularObj = true;
532 return result;
535 void SymbolTable::initializeLoadConfig() {
536 auto sym =
537 dyn_cast_or_null<DefinedRegular>(findUnderscore("_load_config_used"));
538 if (!sym) {
539 if (isEC()) {
540 Warn(ctx) << "EC version of '_load_config_used' is missing";
541 return;
543 if (ctx.hybridSymtab) {
544 Warn(ctx) << "native version of '_load_config_used' is missing for "
545 "ARM64X target";
546 return;
548 if (ctx.config.guardCF != GuardCFLevel::Off)
549 Warn(ctx)
550 << "Control Flow Guard is enabled but '_load_config_used' is missing";
551 if (ctx.config.dependentLoadFlags)
552 Warn(ctx) << "_load_config_used not found, /dependentloadflag will have "
553 "no effect";
554 return;
557 SectionChunk *sc = sym->getChunk();
558 if (!sc->hasData) {
559 Err(ctx) << "_load_config_used points to uninitialized data";
560 return;
562 uint64_t offsetInChunk = sym->getValue();
563 if (offsetInChunk + 4 > sc->getSize()) {
564 Err(ctx) << "_load_config_used section chunk is too small";
565 return;
568 ArrayRef<uint8_t> secContents = sc->getContents();
569 loadConfigSize =
570 *reinterpret_cast<const ulittle32_t *>(&secContents[offsetInChunk]);
571 if (offsetInChunk + loadConfigSize > sc->getSize()) {
572 Err(ctx) << "_load_config_used specifies a size larger than its containing "
573 "section chunk";
574 return;
577 uint32_t expectedAlign = ctx.config.is64() ? 8 : 4;
578 if (sc->getAlignment() < expectedAlign)
579 Warn(ctx) << "'_load_config_used' is misaligned (expected alignment to be "
580 << expectedAlign << " bytes, got " << sc->getAlignment()
581 << " instead)";
582 else if (!isAligned(Align(expectedAlign), offsetInChunk))
583 Warn(ctx) << "'_load_config_used' is misaligned (section offset is 0x"
584 << Twine::utohexstr(sym->getValue()) << " not aligned to "
585 << expectedAlign << " bytes)";
587 loadConfigSym = sym;
590 void SymbolTable::addEntryThunk(Symbol *from, Symbol *to) {
591 entryThunks.push_back({from, to});
594 void SymbolTable::addExitThunk(Symbol *from, Symbol *to) {
595 exitThunks[from] = to;
598 void SymbolTable::initializeECThunks() {
599 if (!isArm64EC(ctx.config.machine))
600 return;
602 for (auto it : entryThunks) {
603 auto *to = dyn_cast<Defined>(it.second);
604 if (!to)
605 continue;
606 auto *from = dyn_cast<DefinedRegular>(it.first);
607 // We need to be able to add padding to the function and fill it with an
608 // offset to its entry thunks. To ensure that padding the function is
609 // feasible, functions are required to be COMDAT symbols with no offset.
610 if (!from || !from->getChunk()->isCOMDAT() ||
611 cast<DefinedRegular>(from)->getValue()) {
612 Err(ctx) << "non COMDAT symbol '" << from->getName() << "' in hybrid map";
613 continue;
615 from->getChunk()->setEntryThunk(to);
618 for (ImportFile *file : ctx.importFileInstances) {
619 if (!file->impchkThunk)
620 continue;
622 Symbol *sym = exitThunks.lookup(file->thunkSym);
623 if (!sym)
624 sym = exitThunks.lookup(file->impECSym);
625 file->impchkThunk->exitThunk = dyn_cast_or_null<Defined>(sym);
628 // On ARM64EC, the __imp_ symbol references the auxiliary IAT, while the
629 // __imp_aux_ symbol references the regular IAT. However, x86_64 code expects
630 // both to reference the regular IAT, so adjust the symbol if necessary.
631 parallelForEach(ctx.objFileInstances, [&](ObjFile *file) {
632 if (file->getMachineType() != AMD64)
633 return;
634 for (auto &sym : file->getMutableSymbols()) {
635 auto impSym = dyn_cast_or_null<DefinedImportData>(sym);
636 if (impSym && impSym->file->impchkThunk && sym == impSym->file->impECSym)
637 sym = impSym->file->impSym;
642 Symbol *SymbolTable::addUndefined(StringRef name, InputFile *f,
643 bool overrideLazy) {
644 auto [s, wasInserted] = insert(name, f);
645 if (wasInserted || (s->isLazy() && overrideLazy)) {
646 replaceSymbol<Undefined>(s, name);
647 return s;
649 if (s->isLazy())
650 forceLazy(s);
651 return s;
654 // On ARM64EC, a function symbol may appear in both mangled and demangled forms:
655 // - ARM64EC archives contain only the mangled name, while the demangled symbol
656 // is defined by the object file as an alias.
657 // - x86_64 archives contain only the demangled name (the mangled name is
658 // usually defined by an object referencing the symbol as an alias to a guess
659 // exit thunk).
660 // - ARM64EC import files contain both the mangled and demangled names for
661 // thunks.
662 // If more than one archive defines the same function, this could lead
663 // to different libraries being used for the same function depending on how they
664 // are referenced. Avoid this by checking if the paired symbol is already
665 // defined before adding a symbol to the table.
666 template <typename T>
667 bool checkLazyECPair(SymbolTable *symtab, StringRef name, InputFile *f) {
668 if (name.starts_with("__imp_"))
669 return true;
670 std::string pairName;
671 if (std::optional<std::string> mangledName =
672 getArm64ECMangledFunctionName(name))
673 pairName = std::move(*mangledName);
674 else if (std::optional<std::string> demangledName =
675 getArm64ECDemangledFunctionName(name))
676 pairName = std::move(*demangledName);
677 else
678 return true;
680 Symbol *sym = symtab->find(pairName);
681 if (!sym)
682 return true;
683 if (sym->pendingArchiveLoad)
684 return false;
685 if (auto u = dyn_cast<Undefined>(sym))
686 return !u->weakAlias || u->isAntiDep;
687 // If the symbol is lazy, allow it only if it originates from the same
688 // archive.
689 auto lazy = dyn_cast<T>(sym);
690 return lazy && lazy->file == f;
693 void SymbolTable::addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym) {
694 StringRef name = sym.getName();
695 if (isEC() && !checkLazyECPair<LazyArchive>(this, name, f))
696 return;
697 auto [s, wasInserted] = insert(name);
698 if (wasInserted) {
699 replaceSymbol<LazyArchive>(s, f, sym);
700 return;
702 auto *u = dyn_cast<Undefined>(s);
703 if (!u || (u->weakAlias && !u->isECAlias(machine)) || s->pendingArchiveLoad)
704 return;
705 s->pendingArchiveLoad = true;
706 f->addMember(sym);
709 void SymbolTable::addLazyObject(InputFile *f, StringRef n) {
710 assert(f->lazy);
711 if (isEC() && !checkLazyECPair<LazyObject>(this, n, f))
712 return;
713 auto [s, wasInserted] = insert(n, f);
714 if (wasInserted) {
715 replaceSymbol<LazyObject>(s, f, n);
716 return;
718 auto *u = dyn_cast<Undefined>(s);
719 if (!u || (u->weakAlias && !u->isECAlias(machine)) || s->pendingArchiveLoad)
720 return;
721 s->pendingArchiveLoad = true;
722 f->lazy = false;
723 ctx.driver.addFile(f);
726 void SymbolTable::addLazyDLLSymbol(DLLFile *f, DLLFile::Symbol *sym,
727 StringRef n) {
728 auto [s, wasInserted] = insert(n);
729 if (wasInserted) {
730 replaceSymbol<LazyDLLSymbol>(s, f, sym, n);
731 return;
733 auto *u = dyn_cast<Undefined>(s);
734 if (!u || u->weakAlias || s->pendingArchiveLoad)
735 return;
736 s->pendingArchiveLoad = true;
737 f->makeImport(sym);
740 static std::string getSourceLocationBitcode(BitcodeFile *file) {
741 std::string res("\n>>> defined at ");
742 StringRef source = file->obj->getSourceFileName();
743 if (!source.empty())
744 res += source.str() + "\n>>> ";
745 res += toString(file);
746 return res;
749 static std::string getSourceLocationObj(ObjFile *file, SectionChunk *sc,
750 uint32_t offset, StringRef name) {
751 std::optional<std::pair<StringRef, uint32_t>> fileLine;
752 if (sc)
753 fileLine = getFileLine(sc, offset);
754 if (!fileLine)
755 fileLine = file->getVariableLocation(name);
757 std::string res;
758 llvm::raw_string_ostream os(res);
759 os << "\n>>> defined at ";
760 if (fileLine)
761 os << fileLine->first << ":" << fileLine->second << "\n>>> ";
762 os << toString(file);
763 return res;
766 static std::string getSourceLocation(InputFile *file, SectionChunk *sc,
767 uint32_t offset, StringRef name) {
768 if (!file)
769 return "";
770 if (auto *o = dyn_cast<ObjFile>(file))
771 return getSourceLocationObj(o, sc, offset, name);
772 if (auto *b = dyn_cast<BitcodeFile>(file))
773 return getSourceLocationBitcode(b);
774 return "\n>>> defined at " + toString(file);
777 // Construct and print an error message in the form of:
779 // lld-link: error: duplicate symbol: foo
780 // >>> defined at bar.c:30
781 // >>> bar.o
782 // >>> defined at baz.c:563
783 // >>> baz.o
784 void SymbolTable::reportDuplicate(Symbol *existing, InputFile *newFile,
785 SectionChunk *newSc,
786 uint32_t newSectionOffset) {
787 COFFSyncStream diag(ctx, ctx.config.forceMultiple ? DiagLevel::Warn
788 : DiagLevel::Err);
789 diag << "duplicate symbol: " << existing;
791 DefinedRegular *d = dyn_cast<DefinedRegular>(existing);
792 if (d && isa<ObjFile>(d->getFile())) {
793 diag << getSourceLocation(d->getFile(), d->getChunk(), d->getValue(),
794 existing->getName());
795 } else {
796 diag << getSourceLocation(existing->getFile(), nullptr, 0, "");
798 diag << getSourceLocation(newFile, newSc, newSectionOffset,
799 existing->getName());
802 Symbol *SymbolTable::addAbsolute(StringRef n, COFFSymbolRef sym) {
803 auto [s, wasInserted] = insert(n, nullptr);
804 s->isUsedInRegularObj = true;
805 if (wasInserted || isa<Undefined>(s) || s->isLazy())
806 replaceSymbol<DefinedAbsolute>(s, ctx, n, sym);
807 else if (auto *da = dyn_cast<DefinedAbsolute>(s)) {
808 if (da->getVA() != sym.getValue())
809 reportDuplicate(s, nullptr);
810 } else if (!isa<DefinedCOFF>(s))
811 reportDuplicate(s, nullptr);
812 return s;
815 Symbol *SymbolTable::addAbsolute(StringRef n, uint64_t va) {
816 auto [s, wasInserted] = insert(n, nullptr);
817 s->isUsedInRegularObj = true;
818 if (wasInserted || isa<Undefined>(s) || s->isLazy())
819 replaceSymbol<DefinedAbsolute>(s, ctx, n, va);
820 else if (auto *da = dyn_cast<DefinedAbsolute>(s)) {
821 if (da->getVA() != va)
822 reportDuplicate(s, nullptr);
823 } else if (!isa<DefinedCOFF>(s))
824 reportDuplicate(s, nullptr);
825 return s;
828 Symbol *SymbolTable::addSynthetic(StringRef n, Chunk *c) {
829 auto [s, wasInserted] = insert(n, nullptr);
830 s->isUsedInRegularObj = true;
831 if (wasInserted || isa<Undefined>(s) || s->isLazy())
832 replaceSymbol<DefinedSynthetic>(s, n, c);
833 else if (!isa<DefinedCOFF>(s))
834 reportDuplicate(s, nullptr);
835 return s;
838 Symbol *SymbolTable::addRegular(InputFile *f, StringRef n,
839 const coff_symbol_generic *sym, SectionChunk *c,
840 uint32_t sectionOffset, bool isWeak) {
841 auto [s, wasInserted] = insert(n, f);
842 if (wasInserted || !isa<DefinedRegular>(s) || s->isWeak)
843 replaceSymbol<DefinedRegular>(s, f, n, /*IsCOMDAT*/ false,
844 /*IsExternal*/ true, sym, c, isWeak);
845 else if (!isWeak)
846 reportDuplicate(s, f, c, sectionOffset);
847 return s;
850 std::pair<DefinedRegular *, bool>
851 SymbolTable::addComdat(InputFile *f, StringRef n,
852 const coff_symbol_generic *sym) {
853 auto [s, wasInserted] = insert(n, f);
854 if (wasInserted || !isa<DefinedRegular>(s)) {
855 replaceSymbol<DefinedRegular>(s, f, n, /*IsCOMDAT*/ true,
856 /*IsExternal*/ true, sym, nullptr);
857 return {cast<DefinedRegular>(s), true};
859 auto *existingSymbol = cast<DefinedRegular>(s);
860 if (!existingSymbol->isCOMDAT)
861 reportDuplicate(s, f);
862 return {existingSymbol, false};
865 Symbol *SymbolTable::addCommon(InputFile *f, StringRef n, uint64_t size,
866 const coff_symbol_generic *sym, CommonChunk *c) {
867 auto [s, wasInserted] = insert(n, f);
868 if (wasInserted || !isa<DefinedCOFF>(s))
869 replaceSymbol<DefinedCommon>(s, f, n, size, sym, c);
870 else if (auto *dc = dyn_cast<DefinedCommon>(s))
871 if (size > dc->getSize())
872 replaceSymbol<DefinedCommon>(s, f, n, size, sym, c);
873 return s;
876 DefinedImportData *SymbolTable::addImportData(StringRef n, ImportFile *f,
877 Chunk *&location) {
878 auto [s, wasInserted] = insert(n, nullptr);
879 s->isUsedInRegularObj = true;
880 if (wasInserted || isa<Undefined>(s) || s->isLazy()) {
881 replaceSymbol<DefinedImportData>(s, n, f, location);
882 return cast<DefinedImportData>(s);
885 reportDuplicate(s, f);
886 return nullptr;
889 Defined *SymbolTable::addImportThunk(StringRef name, DefinedImportData *id,
890 ImportThunkChunk *chunk) {
891 auto [s, wasInserted] = insert(name, nullptr);
892 s->isUsedInRegularObj = true;
893 if (wasInserted || isa<Undefined>(s) || s->isLazy()) {
894 replaceSymbol<DefinedImportThunk>(s, ctx, name, id, chunk);
895 return cast<Defined>(s);
898 reportDuplicate(s, id->file);
899 return nullptr;
902 void SymbolTable::addLibcall(StringRef name) {
903 Symbol *sym = findUnderscore(name);
904 if (!sym)
905 return;
907 if (auto *l = dyn_cast<LazyArchive>(sym)) {
908 MemoryBufferRef mb = l->getMemberBuffer();
909 if (isBitcode(mb))
910 addUndefined(sym->getName());
911 } else if (LazyObject *o = dyn_cast<LazyObject>(sym)) {
912 if (isBitcode(o->file->mb))
913 addUndefined(sym->getName());
917 std::vector<Chunk *> SymbolTable::getChunks() const {
918 std::vector<Chunk *> res;
919 for (ObjFile *file : ctx.objFileInstances) {
920 ArrayRef<Chunk *> v = file->getChunks();
921 res.insert(res.end(), v.begin(), v.end());
923 return res;
926 Symbol *SymbolTable::find(StringRef name) const {
927 return symMap.lookup(CachedHashStringRef(name));
930 Symbol *SymbolTable::findUnderscore(StringRef name) const {
931 if (machine == I386)
932 return find(("_" + name).str());
933 return find(name);
936 // Return all symbols that start with Prefix, possibly ignoring the first
937 // character of Prefix or the first character symbol.
938 std::vector<Symbol *> SymbolTable::getSymsWithPrefix(StringRef prefix) {
939 std::vector<Symbol *> syms;
940 for (auto pair : symMap) {
941 StringRef name = pair.first.val();
942 if (name.starts_with(prefix) || name.starts_with(prefix.drop_front()) ||
943 name.drop_front().starts_with(prefix) ||
944 name.drop_front().starts_with(prefix.drop_front())) {
945 syms.push_back(pair.second);
948 return syms;
951 Symbol *SymbolTable::findMangle(StringRef name) {
952 if (Symbol *sym = find(name)) {
953 if (auto *u = dyn_cast<Undefined>(sym)) {
954 // We're specifically looking for weak aliases that ultimately resolve to
955 // defined symbols, hence the call to getWeakAlias() instead of just using
956 // the weakAlias member variable. This matches link.exe's behavior.
957 if (Symbol *weakAlias = u->getWeakAlias())
958 return weakAlias;
959 } else {
960 return sym;
964 // Efficient fuzzy string lookup is impossible with a hash table, so iterate
965 // the symbol table once and collect all possibly matching symbols into this
966 // vector. Then compare each possibly matching symbol with each possible
967 // mangling.
968 std::vector<Symbol *> syms = getSymsWithPrefix(name);
969 auto findByPrefix = [&syms](const Twine &t) -> Symbol * {
970 std::string prefix = t.str();
971 for (auto *s : syms)
972 if (s->getName().starts_with(prefix))
973 return s;
974 return nullptr;
977 // For non-x86, just look for C++ functions.
978 if (machine != I386)
979 return findByPrefix("?" + name + "@@Y");
981 if (!name.starts_with("_"))
982 return nullptr;
983 // Search for x86 stdcall function.
984 if (Symbol *s = findByPrefix(name + "@"))
985 return s;
986 // Search for x86 fastcall function.
987 if (Symbol *s = findByPrefix("@" + name.substr(1) + "@"))
988 return s;
989 // Search for x86 vectorcall function.
990 if (Symbol *s = findByPrefix(name.substr(1) + "@@"))
991 return s;
992 // Search for x86 C++ non-member function.
993 return findByPrefix("?" + name.substr(1) + "@@Y");
996 Symbol *SymbolTable::addUndefined(StringRef name) {
997 return addUndefined(name, nullptr, false);
1000 void SymbolTable::compileBitcodeFiles() {
1001 if (ctx.bitcodeFileInstances.empty())
1002 return;
1004 llvm::TimeTraceScope timeScope("Compile bitcode");
1005 ScopedTimer t(ctx.ltoTimer);
1006 lto.reset(new BitcodeCompiler(ctx));
1007 for (BitcodeFile *f : ctx.bitcodeFileInstances)
1008 lto->add(*f);
1009 for (InputFile *newObj : lto->compile()) {
1010 ObjFile *obj = cast<ObjFile>(newObj);
1011 obj->parse();
1012 ctx.objFileInstances.push_back(obj);
1016 } // namespace lld::coff