[flang] Support OPEN(..., FORM="BINARY") (#124657)
[llvm-project.git] / lld / ELF / SymbolTable.cpp
blobb8a70d4e898fc2ea5e9cb080a38c65b027844056
1 //===- SymbolTable.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Symbol table is a bag of all known symbols. We put all symbols of
10 // all input files to the symbol table. The symbol table is basically
11 // a hash table with the logic to resolve symbol name conflicts using
12 // the symbol types.
14 //===----------------------------------------------------------------------===//
16 #include "SymbolTable.h"
17 #include "Config.h"
18 #include "InputFiles.h"
19 #include "Symbols.h"
20 #include "lld/Common/ErrorHandler.h"
21 #include "lld/Common/Memory.h"
22 #include "lld/Common/Strings.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/Demangle/Demangle.h"
26 using namespace llvm;
27 using namespace llvm::object;
28 using namespace llvm::ELF;
29 using namespace lld;
30 using namespace lld::elf;
32 void SymbolTable::wrap(Symbol *sym, Symbol *real, Symbol *wrap) {
33 // Redirect __real_foo to the original foo and foo to the original __wrap_foo.
34 int &idx1 = symMap[CachedHashStringRef(sym->getName())];
35 int &idx2 = symMap[CachedHashStringRef(real->getName())];
36 int &idx3 = symMap[CachedHashStringRef(wrap->getName())];
38 idx2 = idx1;
39 idx1 = idx3;
41 // Propagate symbol usage information to the redirected symbols.
42 if (sym->isUsedInRegularObj)
43 wrap->isUsedInRegularObj = true;
44 if (real->isUsedInRegularObj)
45 sym->isUsedInRegularObj = true;
46 else if (!sym->isDefined())
47 // Now that all references to sym have been redirected to wrap, if there are
48 // no references to real (which has been redirected to sym), we only need to
49 // keep sym if it was defined, otherwise it's unused and can be dropped.
50 sym->isUsedInRegularObj = false;
52 // Now renaming is complete, and no one refers to real. We drop real from
53 // .symtab and .dynsym. If real is undefined, it is important that we don't
54 // leave it in .dynsym, because otherwise it might lead to an undefined symbol
55 // error in a subsequent link. If real is defined, we could emit real as an
56 // alias for sym, but that could degrade the user experience of some tools
57 // that can print out only one symbol for each location: sym is a preferred
58 // name than real, but they might print out real instead.
59 memcpy(static_cast<void *>(real), sym, sizeof(SymbolUnion));
60 real->isUsedInRegularObj = false;
63 // Find an existing symbol or create a new one.
64 Symbol *SymbolTable::insert(StringRef name) {
65 // <name>@@<version> means the symbol is the default version. In that
66 // case <name>@@<version> will be used to resolve references to <name>.
68 // Since this is a hot path, the following string search code is
69 // optimized for speed. StringRef::find(char) is much faster than
70 // StringRef::find(StringRef).
71 StringRef stem = name;
72 size_t pos = name.find('@');
73 if (pos != StringRef::npos && pos + 1 < name.size() && name[pos + 1] == '@')
74 stem = name.take_front(pos);
76 auto p = symMap.insert({CachedHashStringRef(stem), (int)symVector.size()});
77 if (!p.second) {
78 Symbol *sym = symVector[p.first->second];
79 if (stem.size() != name.size()) {
80 sym->setName(name);
81 sym->hasVersionSuffix = true;
83 return sym;
86 Symbol *sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
87 symVector.push_back(sym);
89 // *sym was not initialized by a constructor. Initialize all Symbol fields.
90 memset(static_cast<void *>(sym), 0, sizeof(Symbol));
91 sym->setName(name);
92 sym->partition = 1;
93 sym->versionId = VER_NDX_GLOBAL;
94 if (pos != StringRef::npos)
95 sym->hasVersionSuffix = true;
96 return sym;
99 // This variant of addSymbol is used by BinaryFile::parse to check duplicate
100 // symbol errors.
101 Symbol *SymbolTable::addAndCheckDuplicate(Ctx &ctx, const Defined &newSym) {
102 Symbol *sym = insert(newSym.getName());
103 if (sym->isDefined())
104 sym->checkDuplicate(ctx, newSym);
105 sym->resolve(ctx, newSym);
106 sym->isUsedInRegularObj = true;
107 return sym;
110 Symbol *SymbolTable::find(StringRef name) {
111 auto it = symMap.find(CachedHashStringRef(name));
112 if (it == symMap.end())
113 return nullptr;
114 return symVector[it->second];
117 // A version script/dynamic list is only meaningful for a Defined symbol.
118 // A CommonSymbol will be converted to a Defined in replaceCommonSymbols().
119 // A lazy symbol may be made Defined if an LTO libcall extracts it.
120 static bool canBeVersioned(const Symbol &sym) {
121 return sym.isDefined() || sym.isCommon() || sym.isLazy();
124 // Initialize demangledSyms with a map from demangled symbols to symbol
125 // objects. Used to handle "extern C++" directive in version scripts.
127 // The map will contain all demangled symbols. That can be very large,
128 // and in LLD we generally want to avoid do anything for each symbol.
129 // Then, why are we doing this? Here's why.
131 // Users can use "extern C++ {}" directive to match against demangled
132 // C++ symbols. For example, you can write a pattern such as
133 // "llvm::*::foo(int, ?)". Obviously, there's no way to handle this
134 // other than trying to match a pattern against all demangled symbols.
135 // So, if "extern C++" feature is used, we need to demangle all known
136 // symbols.
137 StringMap<SmallVector<Symbol *, 0>> &SymbolTable::getDemangledSyms() {
138 if (!demangledSyms) {
139 demangledSyms.emplace();
140 std::string demangled;
141 for (Symbol *sym : symVector)
142 if (canBeVersioned(*sym)) {
143 StringRef name = sym->getName();
144 size_t pos = name.find('@');
145 std::string substr;
146 if (pos == std::string::npos)
147 demangled = demangle(name);
148 else if (pos + 1 == name.size() || name[pos + 1] == '@') {
149 substr = name.substr(0, pos);
150 demangled = demangle(substr);
151 } else {
152 substr = name.substr(0, pos);
153 demangled = (demangle(substr) + name.substr(pos)).str();
155 (*demangledSyms)[demangled].push_back(sym);
158 return *demangledSyms;
161 SmallVector<Symbol *, 0> SymbolTable::findByVersion(SymbolVersion ver) {
162 if (ver.isExternCpp)
163 return getDemangledSyms().lookup(ver.name);
164 if (Symbol *sym = find(ver.name))
165 if (canBeVersioned(*sym))
166 return {sym};
167 return {};
170 SmallVector<Symbol *, 0> SymbolTable::findAllByVersion(SymbolVersion ver,
171 bool includeNonDefault) {
172 SmallVector<Symbol *, 0> res;
173 SingleStringMatcher m(ver.name);
174 auto check = [&](const Symbol &sym) -> bool {
175 if (!includeNonDefault)
176 return !sym.hasVersionSuffix;
177 StringRef name = sym.getName();
178 size_t pos = name.find('@');
179 return !(pos + 1 < name.size() && name[pos + 1] == '@');
182 if (ver.isExternCpp) {
183 for (auto &p : getDemangledSyms())
184 if (m.match(p.first()))
185 for (Symbol *sym : p.second)
186 if (check(*sym))
187 res.push_back(sym);
188 return res;
191 for (Symbol *sym : symVector)
192 if (canBeVersioned(*sym) && check(*sym) && m.match(sym->getName()))
193 res.push_back(sym);
194 return res;
197 void SymbolTable::handleDynamicList() {
198 SmallVector<Symbol *, 0> syms;
199 for (SymbolVersion &ver : ctx.arg.dynamicList) {
200 if (ver.hasWildcard)
201 syms = findAllByVersion(ver, /*includeNonDefault=*/true);
202 else
203 syms = findByVersion(ver);
205 for (Symbol *sym : syms)
206 sym->isExported = sym->inDynamicList = true;
210 // Set symbol versions to symbols. This function handles patterns containing no
211 // wildcard characters. Return false if no symbol definition matches ver.
212 bool SymbolTable::assignExactVersion(SymbolVersion ver, uint16_t versionId,
213 StringRef versionName,
214 bool includeNonDefault) {
215 // Get a list of symbols which we need to assign the version to.
216 SmallVector<Symbol *, 0> syms = findByVersion(ver);
218 auto getName = [&ctx = ctx](uint16_t ver) -> std::string {
219 if (ver == VER_NDX_LOCAL)
220 return "VER_NDX_LOCAL";
221 if (ver == VER_NDX_GLOBAL)
222 return "VER_NDX_GLOBAL";
223 return ("version '" + ctx.arg.versionDefinitions[ver].name + "'").str();
226 // Assign the version.
227 for (Symbol *sym : syms) {
228 // For a non-local versionId, skip symbols containing version info because
229 // symbol versions specified by symbol names take precedence over version
230 // scripts. See parseSymbolVersion(ctx).
231 if (!includeNonDefault && versionId != VER_NDX_LOCAL &&
232 sym->getName().contains('@'))
233 continue;
235 // If the version has not been assigned, assign versionId to the symbol.
236 if (!sym->versionScriptAssigned) {
237 sym->versionScriptAssigned = true;
238 sym->versionId = versionId;
240 if (sym->versionId == versionId)
241 continue;
243 Warn(ctx) << "attempt to reassign symbol '" << ver.name << "' of "
244 << getName(sym->versionId) << " to " << getName(versionId);
246 return !syms.empty();
249 void SymbolTable::assignWildcardVersion(SymbolVersion ver, uint16_t versionId,
250 bool includeNonDefault) {
251 // Exact matching takes precedence over fuzzy matching,
252 // so we set a version to a symbol only if no version has been assigned
253 // to the symbol. This behavior is compatible with GNU.
254 for (Symbol *sym : findAllByVersion(ver, includeNonDefault))
255 if (!sym->versionScriptAssigned) {
256 sym->versionScriptAssigned = true;
257 sym->versionId = versionId;
261 // This function processes version scripts by updating the versionId
262 // member of symbols.
263 // If there's only one anonymous version definition in a version
264 // script file, the script does not actually define any symbol version,
265 // but just specifies symbols visibilities.
266 void SymbolTable::scanVersionScript() {
267 SmallString<128> buf;
268 // First, we assign versions to exact matching symbols,
269 // i.e. version definitions not containing any glob meta-characters.
270 for (VersionDefinition &v : ctx.arg.versionDefinitions) {
271 auto assignExact = [&](SymbolVersion pat, uint16_t id, StringRef ver) {
272 bool found =
273 assignExactVersion(pat, id, ver, /*includeNonDefault=*/false);
274 buf.clear();
275 found |= assignExactVersion({(pat.name + "@" + v.name).toStringRef(buf),
276 pat.isExternCpp, /*hasWildCard=*/false},
277 id, ver, /*includeNonDefault=*/true);
278 if (!found && !ctx.arg.undefinedVersion)
279 Err(ctx) << "version script assignment of '" << ver << "' to symbol '"
280 << pat.name << "' failed: symbol not defined";
282 for (SymbolVersion &pat : v.nonLocalPatterns)
283 if (!pat.hasWildcard)
284 assignExact(pat, v.id, v.name);
285 for (SymbolVersion pat : v.localPatterns)
286 if (!pat.hasWildcard)
287 assignExact(pat, VER_NDX_LOCAL, "local");
290 // Next, assign versions to wildcards that are not "*". Note that because the
291 // last match takes precedence over previous matches, we iterate over the
292 // definitions in the reverse order.
293 auto assignWildcard = [&](SymbolVersion pat, uint16_t id, StringRef ver) {
294 assignWildcardVersion(pat, id, /*includeNonDefault=*/false);
295 buf.clear();
296 assignWildcardVersion({(pat.name + "@" + ver).toStringRef(buf),
297 pat.isExternCpp, /*hasWildCard=*/true},
299 /*includeNonDefault=*/true);
301 for (VersionDefinition &v : llvm::reverse(ctx.arg.versionDefinitions)) {
302 for (SymbolVersion &pat : v.nonLocalPatterns)
303 if (pat.hasWildcard && pat.name != "*")
304 assignWildcard(pat, v.id, v.name);
305 for (SymbolVersion &pat : v.localPatterns)
306 if (pat.hasWildcard && pat.name != "*")
307 assignWildcard(pat, VER_NDX_LOCAL, v.name);
310 // Then, assign versions to "*". In GNU linkers they have lower priority than
311 // other wildcards.
312 bool globalAsteriskFound = false;
313 bool localAsteriskFound = false;
314 bool asteriskReported = false;
315 auto assignAsterisk = [&](SymbolVersion &pat, VersionDefinition *ver,
316 bool isLocal) {
317 // Avoid issuing a warning if both '--retain-symbol-file' and a version
318 // script with `global: *` are used.
320 // '--retain-symbol-file' adds a "*" pattern to
321 // 'versionDefinitions[VER_NDX_LOCAL].nonLocalPatterns', see
322 // 'readConfigs()' in 'Driver.cpp'. Note that it is not '.localPatterns',
323 // and may seem counterintuitive, but still works as expected. Here we can
324 // exploit that and skip analyzing the pattern added for this option.
325 if (!asteriskReported && (isLocal || ver->id > VER_NDX_LOCAL)) {
326 if ((isLocal && globalAsteriskFound) ||
327 (!isLocal && localAsteriskFound)) {
328 Warn(ctx)
329 << "wildcard pattern '*' is used for both 'local' and 'global' "
330 "scopes in version script";
331 asteriskReported = true;
332 } else if (!isLocal && globalAsteriskFound) {
333 Warn(ctx) << "wildcard pattern '*' is used for multiple version "
334 "definitions in "
335 "version script";
336 asteriskReported = true;
337 } else {
338 localAsteriskFound = isLocal;
339 globalAsteriskFound = !isLocal;
342 assignWildcard(pat, isLocal ? (uint16_t)VER_NDX_LOCAL : ver->id, ver->name);
344 for (VersionDefinition &v : llvm::reverse(ctx.arg.versionDefinitions)) {
345 for (SymbolVersion &pat : v.nonLocalPatterns)
346 if (pat.hasWildcard && pat.name == "*")
347 assignAsterisk(pat, &v, false);
348 for (SymbolVersion &pat : v.localPatterns)
349 if (pat.hasWildcard && pat.name == "*")
350 assignAsterisk(pat, &v, true);
353 // Handle --dynamic-list. If a specified symbol is also matched by local: in a
354 // version script, the version script takes precedence.
355 handleDynamicList();
358 Symbol *SymbolTable::addUnusedUndefined(StringRef name, uint8_t binding) {
359 return addSymbol(Undefined{ctx.internalFile, name, binding, STV_DEFAULT, 0});