1 //===- Symbols.cpp --------------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
11 #include "InputFiles.h"
12 #include "InputSection.h"
13 #include "OutputSections.h"
14 #include "SyntheticSections.h"
17 #include "lld/Common/ErrorHandler.h"
18 #include "llvm/Demangle/Demangle.h"
19 #include "llvm/Support/Compiler.h"
23 using namespace llvm::object
;
24 using namespace llvm::ELF
;
26 using namespace lld::elf
;
28 static_assert(sizeof(SymbolUnion
) <= 64, "SymbolUnion too large");
30 template <typename T
> struct AssertSymbol
{
31 static_assert(std::is_trivially_destructible
<T
>(),
32 "Symbol types must be trivially destructible");
33 static_assert(sizeof(T
) <= sizeof(SymbolUnion
), "SymbolUnion too small");
34 static_assert(alignof(T
) <= alignof(SymbolUnion
),
35 "SymbolUnion not aligned enough");
38 LLVM_ATTRIBUTE_UNUSED
static inline void assertSymbols() {
39 AssertSymbol
<Defined
>();
40 AssertSymbol
<CommonSymbol
>();
41 AssertSymbol
<Undefined
>();
42 AssertSymbol
<SharedSymbol
>();
43 AssertSymbol
<LazyObject
>();
46 // Returns a symbol for an error message.
47 static std::string
maybeDemangleSymbol(StringRef symName
) {
48 if (elf::config
->demangle
)
49 return demangle(symName
.str());
53 std::string
lld::toString(const elf::Symbol
&sym
) {
54 StringRef name
= sym
.getName();
55 std::string ret
= maybeDemangleSymbol(name
);
57 const char *suffix
= sym
.getVersionSuffix();
64 Defined
*ElfSym::etext1
;
65 Defined
*ElfSym::etext2
;
66 Defined
*ElfSym::edata1
;
67 Defined
*ElfSym::edata2
;
68 Defined
*ElfSym::end1
;
69 Defined
*ElfSym::end2
;
70 Defined
*ElfSym::globalOffsetTable
;
71 Defined
*ElfSym::mipsGp
;
72 Defined
*ElfSym::mipsGpDisp
;
73 Defined
*ElfSym::mipsLocalGp
;
74 Defined
*ElfSym::relaIpltStart
;
75 Defined
*ElfSym::relaIpltEnd
;
76 Defined
*ElfSym::tlsModuleBase
;
77 SmallVector
<SymbolAux
, 0> elf::symAux
;
79 static uint64_t getSymVA(const Symbol
&sym
, int64_t addend
) {
81 case Symbol::DefinedKind
: {
82 auto &d
= cast
<Defined
>(sym
);
83 SectionBase
*isec
= d
.section
;
85 // This is an absolute symbol.
89 assert(isec
!= &InputSection::discarded
);
91 uint64_t offset
= d
.value
;
93 // An object in an SHF_MERGE section might be referenced via a
94 // section symbol (as a hack for reducing the number of local
96 // Depending on the addend, the reference via a section symbol
97 // refers to a different object in the merge section.
98 // Since the objects in the merge section are not necessarily
99 // contiguous in the output, the addend can thus affect the final
100 // VA in a non-linear way.
101 // To make this work, we incorporate the addend into the section
102 // offset (and zero out the addend for later processing) so that
103 // we find the right object in the section.
107 // In the typical case, this is actually very simple and boils
108 // down to adding together 3 numbers:
109 // 1. The address of the output section.
110 // 2. The offset of the input section within the output section.
111 // 3. The offset within the input section (this addition happens
112 // inside InputSection::getOffset).
114 // If you understand the data structures involved with this next
115 // line (and how they get built), then you have a pretty good
116 // understanding of the linker.
117 uint64_t va
= isec
->getVA(offset
);
121 // MIPS relocatable files can mix regular and microMIPS code.
122 // Linker needs to distinguish such code. To do so microMIPS
123 // symbols has the `STO_MIPS_MICROMIPS` flag in the `st_other`
124 // field. Unfortunately, the `MIPS::relocate()` method has
125 // a symbol value only. To pass type of the symbol (regular/microMIPS)
126 // to that routine as well as other places where we write
127 // a symbol value as-is (.dynamic section, `Elf_Ehdr::e_entry`
128 // field etc) do the same trick as compiler uses to mark microMIPS
129 // for CPU - set the less-significant bit.
130 if (config
->emachine
== EM_MIPS
&& isMicroMips() &&
131 ((sym
.stOther
& STO_MIPS_MICROMIPS
) || sym
.hasFlag(NEEDS_COPY
)))
134 if (d
.isTls() && !config
->relocatable
) {
135 // Use the address of the TLS segment's first section rather than the
136 // segment's address, because segment addresses aren't initialized until
137 // after sections are finalized. (e.g. Measuring the size of .rela.dyn
138 // for Android relocation packing requires knowing TLS symbol addresses
139 // during section finalization.)
140 if (!Out::tlsPhdr
|| !Out::tlsPhdr
->firstSec
)
141 fatal(toString(d
.file
) +
142 " has an STT_TLS symbol but doesn't have an SHF_TLS section");
143 return va
- Out::tlsPhdr
->firstSec
->addr
;
147 case Symbol::SharedKind
:
148 case Symbol::UndefinedKind
:
150 case Symbol::LazyObjectKind
:
151 llvm_unreachable("lazy symbol reached writer");
152 case Symbol::CommonKind
:
153 llvm_unreachable("common symbol reached writer");
154 case Symbol::PlaceholderKind
:
155 llvm_unreachable("placeholder symbol reached writer");
157 llvm_unreachable("invalid symbol kind");
160 uint64_t Symbol::getVA(int64_t addend
) const {
161 return getSymVA(*this, addend
) + addend
;
164 uint64_t Symbol::getGotVA() const {
166 return in
.igotPlt
->getVA() + getGotPltOffset();
167 return in
.got
->getVA() + getGotOffset();
170 uint64_t Symbol::getGotOffset() const {
171 return getGotIdx() * target
->gotEntrySize
;
174 uint64_t Symbol::getGotPltVA() const {
176 return in
.igotPlt
->getVA() + getGotPltOffset();
177 return in
.gotPlt
->getVA() + getGotPltOffset();
180 uint64_t Symbol::getGotPltOffset() const {
182 return getPltIdx() * target
->gotEntrySize
;
183 return (getPltIdx() + target
->gotPltHeaderEntriesNum
) * target
->gotEntrySize
;
186 uint64_t Symbol::getPltVA() const {
187 uint64_t outVA
= isInIplt
188 ? in
.iplt
->getVA() + getPltIdx() * target
->ipltEntrySize
189 : in
.plt
->getVA() + in
.plt
->headerSize
+
190 getPltIdx() * target
->pltEntrySize
;
192 // While linking microMIPS code PLT code are always microMIPS
193 // code. Set the less-significant bit to track that fact.
194 // See detailed comment in the `getSymVA` function.
195 if (config
->emachine
== EM_MIPS
&& isMicroMips())
200 uint64_t Symbol::getSize() const {
201 if (const auto *dr
= dyn_cast
<Defined
>(this))
203 return cast
<SharedSymbol
>(this)->size
;
206 OutputSection
*Symbol::getOutputSection() const {
207 if (auto *s
= dyn_cast
<Defined
>(this)) {
208 if (auto *sec
= s
->section
)
209 return sec
->getOutputSection();
215 // If a symbol name contains '@', the characters after that is
216 // a symbol version name. This function parses that.
217 void Symbol::parseSymbolVersion() {
218 // Return if localized by a local: pattern in a version script.
219 if (versionId
== VER_NDX_LOCAL
)
221 StringRef s
= getName();
222 size_t pos
= s
.find('@');
223 if (pos
== StringRef::npos
)
225 StringRef verstr
= s
.substr(pos
+ 1);
227 // Truncate the symbol name so that it doesn't include the version string.
233 // If this is not in this DSO, it is not a definition.
237 // '@@' in a symbol name means the default version.
238 // It is usually the most recent one.
239 bool isDefault
= (verstr
[0] == '@');
241 verstr
= verstr
.substr(1);
243 for (const VersionDefinition
&ver
: namedVersionDefs()) {
244 if (ver
.name
!= verstr
)
250 versionId
= ver
.id
| VERSYM_HIDDEN
;
254 // It is an error if the specified version is not defined.
255 // Usually version script is not provided when linking executable,
256 // but we may still want to override a versioned symbol from DSO,
257 // so we do not report error in this case. We also do not error
258 // if the symbol has a local version as it won't be in the dynamic
260 if (config
->shared
&& versionId
!= VER_NDX_LOCAL
)
261 error(toString(file
) + ": symbol " + s
+ " has undefined version " +
265 void Symbol::extract() const {
272 uint8_t Symbol::computeBinding() const {
273 auto v
= visibility();
274 if ((v
!= STV_DEFAULT
&& v
!= STV_PROTECTED
) || versionId
== VER_NDX_LOCAL
)
276 if (binding
== STB_GNU_UNIQUE
&& !config
->gnuUnique
)
281 bool Symbol::includeInDynsym() const {
282 if (computeBinding() == STB_LOCAL
)
284 if (!isDefined() && !isCommon())
285 // This should unconditionally return true, unfortunately glibc -static-pie
286 // expects undefined weak symbols not to exist in .dynsym, e.g.
287 // __pthread_mutex_lock reference in _dl_add_to_namespace_list,
288 // __pthread_initialize_minimal reference in csu/libc-start.c.
289 return !(isUndefWeak() && config
->noDynamicLinker
);
291 return exportDynamic
|| inDynamicList
;
294 // Print out a log message for --trace-symbol.
295 void elf::printTraceSymbol(const Symbol
&sym
, StringRef name
) {
297 if (sym
.isUndefined())
298 s
= ": reference to ";
299 else if (sym
.isLazy())
300 s
= ": lazy definition of ";
301 else if (sym
.isShared())
302 s
= ": shared definition of ";
303 else if (sym
.isCommon())
304 s
= ": common definition of ";
306 s
= ": definition of ";
308 message(toString(sym
.file
) + s
+ name
);
311 static void recordWhyExtract(const InputFile
*reference
,
312 const InputFile
&extracted
, const Symbol
&sym
) {
313 ctx
.whyExtractRecords
.emplace_back(toString(reference
), &extracted
, sym
);
316 void elf::maybeWarnUnorderableSymbol(const Symbol
*sym
) {
317 if (!config
->warnSymbolOrdering
)
320 // If UnresolvedPolicy::Ignore is used, no "undefined symbol" error/warning
321 // is emitted. It makes sense to not warn on undefined symbols.
323 // Note, ld.bfd --symbol-ordering-file= does not warn on undefined symbols,
324 // but we don't have to be compatible here.
325 if (sym
->isUndefined() &&
326 config
->unresolvedSymbols
== UnresolvedPolicy::Ignore
)
329 const InputFile
*file
= sym
->file
;
330 auto *d
= dyn_cast
<Defined
>(sym
);
332 auto report
= [&](StringRef s
) { warn(toString(file
) + s
+ sym
->getName()); };
334 if (sym
->isUndefined())
335 report(": unable to order undefined symbol: ");
336 else if (sym
->isShared())
337 report(": unable to order shared symbol: ");
338 else if (d
&& !d
->section
)
339 report(": unable to order absolute symbol: ");
340 else if (d
&& isa
<OutputSection
>(d
->section
))
341 report(": unable to order synthetic symbol: ");
342 else if (d
&& !d
->section
->isLive())
343 report(": unable to order discarded symbol: ");
346 // Returns true if a symbol can be replaced at load-time by a symbol
347 // with the same name defined in other ELF executable or DSO.
348 bool elf::computeIsPreemptible(const Symbol
&sym
) {
349 assert(!sym
.isLocal() || sym
.isPlaceholder());
351 // Only symbols with default visibility that appear in dynsym can be
352 // preempted. Symbols with protected visibility cannot be preempted.
353 if (!sym
.includeInDynsym() || sym
.visibility() != STV_DEFAULT
)
356 // At this point copy relocations have not been created yet, so any
357 // symbol that is not defined locally is preemptible.
358 if (!sym
.isDefined())
364 // If -Bsymbolic or --dynamic-list is specified, or -Bsymbolic-functions is
365 // specified and the symbol is STT_FUNC, the symbol is preemptible iff it is
366 // in the dynamic list. -Bsymbolic-non-weak-functions is a non-weak subset of
367 // -Bsymbolic-functions.
368 if (config
->symbolic
||
369 (config
->bsymbolic
== BsymbolicKind::Functions
&& sym
.isFunc()) ||
370 (config
->bsymbolic
== BsymbolicKind::NonWeakFunctions
&& sym
.isFunc() &&
371 sym
.binding
!= STB_WEAK
))
372 return sym
.inDynamicList
;
376 // Merge symbol properties.
378 // When we have many symbols of the same name, we choose one of them,
379 // and that's the result of symbol resolution. However, symbols that
380 // were not chosen still affect some symbol properties.
381 void Symbol::mergeProperties(const Symbol
&other
) {
382 if (other
.exportDynamic
)
383 exportDynamic
= true;
385 // DSO symbols do not affect visibility in the output.
386 if (!other
.isShared() && other
.visibility() != STV_DEFAULT
) {
387 uint8_t v
= visibility(), ov
= other
.visibility();
388 setVisibility(v
== STV_DEFAULT
? ov
: std::min(v
, ov
));
392 void Symbol::resolve(const Undefined
&other
) {
393 if (other
.visibility() != STV_DEFAULT
) {
394 uint8_t v
= visibility(), ov
= other
.visibility();
395 setVisibility(v
== STV_DEFAULT
? ov
: std::min(v
, ov
));
397 // An undefined symbol with non default visibility must be satisfied
400 // If this is a non-weak defined symbol in a discarded section, override the
401 // existing undefined symbol for better error message later.
402 if (isPlaceholder() || (isShared() && other
.visibility() != STV_DEFAULT
) ||
403 (isUndefined() && other
.binding
!= STB_WEAK
&& other
.discardedSecIdx
)) {
404 other
.overwrite(*this);
409 printTraceSymbol(other
, getName());
412 // An undefined weak will not extract archive members. See comment on Lazy
413 // in Symbols.h for the details.
414 if (other
.binding
== STB_WEAK
) {
420 // Do extra check for --warn-backrefs.
422 // --warn-backrefs is an option to prevent an undefined reference from
423 // extracting an archive member written earlier in the command line. It can
424 // be used to keep compatibility with GNU linkers to some degree. I'll
425 // explain the feature and why you may find it useful in this comment.
427 // lld's symbol resolution semantics is more relaxed than traditional Unix
428 // linkers. For example,
430 // ld.lld foo.a bar.o
432 // succeeds even if bar.o contains an undefined symbol that has to be
433 // resolved by some object file in foo.a. Traditional Unix linkers don't
434 // allow this kind of backward reference, as they visit each file only once
435 // from left to right in the command line while resolving all undefined
436 // symbols at the moment of visiting.
438 // In the above case, since there's no undefined symbol when a linker visits
439 // foo.a, no files are pulled out from foo.a, and because the linker forgets
440 // about foo.a after visiting, it can't resolve undefined symbols in bar.o
441 // that could have been resolved otherwise.
443 // That lld accepts more relaxed form means that (besides it'd make more
444 // sense) you can accidentally write a command line or a build file that
445 // works only with lld, even if you have a plan to distribute it to wider
446 // users who may be using GNU linkers. With --warn-backrefs, you can detect
447 // a library order that doesn't work with other Unix linkers.
449 // The option is also useful to detect cyclic dependencies between static
450 // archives. Again, lld accepts
452 // ld.lld foo.a bar.a
454 // even if foo.a and bar.a depend on each other. With --warn-backrefs, it is
455 // handled as an error.
457 // Here is how the option works. We assign a group ID to each file. A file
458 // with a smaller group ID can pull out object files from an archive file
459 // with an equal or greater group ID. Otherwise, it is a reverse dependency
462 // A file outside --{start,end}-group gets a fresh ID when instantiated. All
463 // files within the same --{start,end}-group get the same group ID. E.g.
465 // ld.lld A B --start-group C D --end-group E
467 // A forms group 0. B form group 1. C and D (including their member object
468 // files) form group 2. E forms group 3. I think that you can see how this
469 // group assignment rule simulates the traditional linker's semantics.
470 bool backref
= config
->warnBackrefs
&& other
.file
&&
471 file
->groupId
< other
.file
->groupId
;
474 if (!config
->whyExtract
.empty())
475 recordWhyExtract(other
.file
, *file
, *this);
477 // We don't report backward references to weak symbols as they can be
480 // A traditional linker does not error for -ldef1 -lref -ldef2 (linking
481 // sandwich), where def2 may or may not be the same as def1. We don't want
482 // to warn for this case, so dismiss the warning if we see a subsequent lazy
483 // definition. this->file needs to be saved because in the case of LTO it
484 // may be reset to nullptr or be replaced with a file named lto.tmp.
485 if (backref
&& !isWeak())
486 ctx
.backwardReferences
.try_emplace(this,
487 std::make_pair(other
.file
, file
));
491 // Undefined symbols in a SharedFile do not change the binding.
492 if (isa_and_nonnull
<SharedFile
>(other
.file
))
495 if (isUndefined() || isShared()) {
496 // The binding will be weak if there is at least one reference and all are
497 // weak. The binding has one opportunity to change to weak: if the first
498 // reference is weak.
499 if (other
.binding
!= STB_WEAK
|| !referenced
)
500 binding
= other
.binding
;
504 // Compare two symbols. Return true if the new symbol should win.
505 bool Symbol::shouldReplace(const Defined
&other
) const {
506 if (LLVM_UNLIKELY(isCommon())) {
507 if (config
->warnCommon
)
508 warn("common " + getName() + " is overridden");
509 return !other
.isWeak();
514 // Incoming STB_GLOBAL overrides STB_WEAK/STB_GNU_UNIQUE. -fgnu-unique changes
515 // some vague linkage data in COMDAT from STB_WEAK to STB_GNU_UNIQUE. Treat
516 // STB_GNU_UNIQUE like STB_WEAK so that we prefer the first among all
517 // STB_WEAK/STB_GNU_UNIQUE copies. If we prefer an incoming STB_GNU_UNIQUE to
518 // an existing STB_WEAK, there may be discarded section errors because the
519 // selected copy may be in a non-prevailing COMDAT.
520 return !isGlobal() && other
.isGlobal();
523 void elf::reportDuplicate(const Symbol
&sym
, const InputFile
*newFile
,
524 InputSectionBase
*errSec
, uint64_t errOffset
) {
525 if (config
->allowMultipleDefinition
)
527 // In glibc<2.32, crti.o has .gnu.linkonce.t.__x86.get_pc_thunk.bx, which
528 // is sort of proto-comdat. There is actually no duplicate if we have
529 // full support for .gnu.linkonce.
530 const Defined
*d
= dyn_cast
<Defined
>(&sym
);
531 if (!d
|| d
->getName() == "__x86.get_pc_thunk.bx")
533 // Allow absolute symbols with the same value for GNU ld compatibility.
534 if (!d
->section
&& !errSec
&& errOffset
&& d
->value
== errOffset
)
536 if (!d
->section
|| !errSec
) {
537 error("duplicate symbol: " + toString(sym
) + "\n>>> defined in " +
538 toString(sym
.file
) + "\n>>> defined in " + toString(newFile
));
542 // Construct and print an error message in the form of:
544 // ld.lld: error: duplicate symbol: foo
545 // >>> defined at bar.c:30
546 // >>> bar.o (/home/alice/src/bar.o)
547 // >>> defined at baz.c:563
548 // >>> baz.o in archive libbaz.a
549 auto *sec1
= cast
<InputSectionBase
>(d
->section
);
550 std::string src1
= sec1
->getSrcMsg(sym
, d
->value
);
551 std::string obj1
= sec1
->getObjMsg(d
->value
);
552 std::string src2
= errSec
->getSrcMsg(sym
, errOffset
);
553 std::string obj2
= errSec
->getObjMsg(errOffset
);
555 std::string msg
= "duplicate symbol: " + toString(sym
) + "\n>>> defined at ";
557 msg
+= src1
+ "\n>>> ";
558 msg
+= obj1
+ "\n>>> defined at ";
560 msg
+= src2
+ "\n>>> ";
565 void Symbol::checkDuplicate(const Defined
&other
) const {
566 if (isDefined() && !isWeak() && !other
.isWeak())
567 reportDuplicate(*this, other
.file
,
568 dyn_cast_or_null
<InputSectionBase
>(other
.section
),
572 void Symbol::resolve(const CommonSymbol
&other
) {
573 if (other
.exportDynamic
)
574 exportDynamic
= true;
575 if (other
.visibility() != STV_DEFAULT
) {
576 uint8_t v
= visibility(), ov
= other
.visibility();
577 setVisibility(v
== STV_DEFAULT
? ov
: std::min(v
, ov
));
579 if (isDefined() && !isWeak()) {
580 if (config
->warnCommon
)
581 warn("common " + getName() + " is overridden");
585 if (CommonSymbol
*oldSym
= dyn_cast
<CommonSymbol
>(this)) {
586 if (config
->warnCommon
)
587 warn("multiple common of " + getName());
588 oldSym
->alignment
= std::max(oldSym
->alignment
, other
.alignment
);
589 if (oldSym
->size
< other
.size
) {
590 oldSym
->file
= other
.file
;
591 oldSym
->size
= other
.size
;
596 if (auto *s
= dyn_cast
<SharedSymbol
>(this)) {
597 // Increase st_size if the shared symbol has a larger st_size. The shared
598 // symbol may be created from common symbols. The fact that some object
599 // files were linked into a shared object first should not change the
600 // regular rule that picks the largest st_size.
601 uint64_t size
= s
->size
;
602 other
.overwrite(*this);
603 if (size
> cast
<CommonSymbol
>(this)->size
)
604 cast
<CommonSymbol
>(this)->size
= size
;
606 other
.overwrite(*this);
610 void Symbol::resolve(const Defined
&other
) {
611 if (other
.exportDynamic
)
612 exportDynamic
= true;
613 if (other
.visibility() != STV_DEFAULT
) {
614 uint8_t v
= visibility(), ov
= other
.visibility();
615 setVisibility(v
== STV_DEFAULT
? ov
: std::min(v
, ov
));
617 if (shouldReplace(other
))
618 other
.overwrite(*this);
621 void Symbol::resolve(const LazyObject
&other
) {
622 if (isPlaceholder()) {
623 other
.overwrite(*this);
627 // For common objects, we want to look for global or weak definitions that
628 // should be extracted as the canonical definition instead.
629 if (LLVM_UNLIKELY(isCommon()) && elf::config
->fortranCommon
&&
630 other
.file
->shouldExtractForCommon(getName())) {
631 ctx
.backwardReferences
.erase(this);
632 other
.overwrite(*this);
637 if (!isUndefined()) {
638 // See the comment in resolveUndefined().
640 ctx
.backwardReferences
.erase(this);
644 // An undefined weak will not extract archive members. See comment on Lazy in
645 // Symbols.h for the details.
648 other
.overwrite(*this);
654 const InputFile
*oldFile
= file
;
656 if (!config
->whyExtract
.empty())
657 recordWhyExtract(oldFile
, *file
, *this);
660 void Symbol::resolve(const SharedSymbol
&other
) {
661 exportDynamic
= true;
662 if (isPlaceholder()) {
663 other
.overwrite(*this);
667 // See the comment in resolveCommon() above.
668 if (other
.size
> cast
<CommonSymbol
>(this)->size
)
669 cast
<CommonSymbol
>(this)->size
= other
.size
;
672 if (visibility() == STV_DEFAULT
&& (isUndefined() || isLazy())) {
673 // An undefined symbol with non default visibility must be satisfied
675 uint8_t bind
= binding
;
676 other
.overwrite(*this);
679 printTraceSymbol(other
, getName());