1 //===- Symbols.cpp --------------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
11 #include "InputFiles.h"
12 #include "InputSection.h"
13 #include "OutputSections.h"
14 #include "SyntheticSections.h"
17 #include "lld/Common/ErrorHandler.h"
18 #include "llvm/Demangle/Demangle.h"
19 #include "llvm/Support/Compiler.h"
23 using namespace llvm::object
;
24 using namespace llvm::ELF
;
26 using namespace lld::elf
;
28 static_assert(sizeof(SymbolUnion
) <= 64, "SymbolUnion too large");
30 template <typename T
> struct AssertSymbol
{
31 static_assert(std::is_trivially_destructible
<T
>(),
32 "Symbol types must be trivially destructible");
33 static_assert(sizeof(T
) <= sizeof(SymbolUnion
), "SymbolUnion too small");
34 static_assert(alignof(T
) <= alignof(SymbolUnion
),
35 "SymbolUnion not aligned enough");
38 LLVM_ATTRIBUTE_UNUSED
static inline void assertSymbols() {
39 AssertSymbol
<Defined
>();
40 AssertSymbol
<CommonSymbol
>();
41 AssertSymbol
<Undefined
>();
42 AssertSymbol
<SharedSymbol
>();
43 AssertSymbol
<LazyObject
>();
46 // Returns a symbol for an error message.
47 static std::string
maybeDemangleSymbol(StringRef symName
) {
48 return elf::config
->demangle
? demangle(symName
.str()) : symName
.str();
51 std::string
lld::toString(const elf::Symbol
&sym
) {
52 StringRef name
= sym
.getName();
53 std::string ret
= maybeDemangleSymbol(name
);
55 const char *suffix
= sym
.getVersionSuffix();
62 Defined
*ElfSym::etext1
;
63 Defined
*ElfSym::etext2
;
64 Defined
*ElfSym::edata1
;
65 Defined
*ElfSym::edata2
;
66 Defined
*ElfSym::end1
;
67 Defined
*ElfSym::end2
;
68 Defined
*ElfSym::globalOffsetTable
;
69 Defined
*ElfSym::mipsGp
;
70 Defined
*ElfSym::mipsGpDisp
;
71 Defined
*ElfSym::mipsLocalGp
;
72 Defined
*ElfSym::riscvGlobalPointer
;
73 Defined
*ElfSym::relaIpltStart
;
74 Defined
*ElfSym::relaIpltEnd
;
75 Defined
*ElfSym::tlsModuleBase
;
76 SmallVector
<SymbolAux
, 0> elf::symAux
;
78 static uint64_t getSymVA(const Symbol
&sym
, int64_t addend
) {
80 case Symbol::DefinedKind
: {
81 auto &d
= cast
<Defined
>(sym
);
82 SectionBase
*isec
= d
.section
;
84 // This is an absolute symbol.
88 assert(isec
!= &InputSection::discarded
);
90 uint64_t offset
= d
.value
;
92 // An object in an SHF_MERGE section might be referenced via a
93 // section symbol (as a hack for reducing the number of local
95 // Depending on the addend, the reference via a section symbol
96 // refers to a different object in the merge section.
97 // Since the objects in the merge section are not necessarily
98 // contiguous in the output, the addend can thus affect the final
99 // VA in a non-linear way.
100 // To make this work, we incorporate the addend into the section
101 // offset (and zero out the addend for later processing) so that
102 // we find the right object in the section.
106 // In the typical case, this is actually very simple and boils
107 // down to adding together 3 numbers:
108 // 1. The address of the output section.
109 // 2. The offset of the input section within the output section.
110 // 3. The offset within the input section (this addition happens
111 // inside InputSection::getOffset).
113 // If you understand the data structures involved with this next
114 // line (and how they get built), then you have a pretty good
115 // understanding of the linker.
116 uint64_t va
= isec
->getVA(offset
);
120 // MIPS relocatable files can mix regular and microMIPS code.
121 // Linker needs to distinguish such code. To do so microMIPS
122 // symbols has the `STO_MIPS_MICROMIPS` flag in the `st_other`
123 // field. Unfortunately, the `MIPS::relocate()` method has
124 // a symbol value only. To pass type of the symbol (regular/microMIPS)
125 // to that routine as well as other places where we write
126 // a symbol value as-is (.dynamic section, `Elf_Ehdr::e_entry`
127 // field etc) do the same trick as compiler uses to mark microMIPS
128 // for CPU - set the less-significant bit.
129 if (config
->emachine
== EM_MIPS
&& isMicroMips() &&
130 ((sym
.stOther
& STO_MIPS_MICROMIPS
) || sym
.hasFlag(NEEDS_COPY
)))
133 if (d
.isTls() && !config
->relocatable
) {
134 // Use the address of the TLS segment's first section rather than the
135 // segment's address, because segment addresses aren't initialized until
136 // after sections are finalized. (e.g. Measuring the size of .rela.dyn
137 // for Android relocation packing requires knowing TLS symbol addresses
138 // during section finalization.)
139 if (!Out::tlsPhdr
|| !Out::tlsPhdr
->firstSec
)
140 fatal(toString(d
.file
) +
141 " has an STT_TLS symbol but doesn't have an SHF_TLS section");
142 return va
- Out::tlsPhdr
->firstSec
->addr
;
146 case Symbol::SharedKind
:
147 case Symbol::UndefinedKind
:
149 case Symbol::LazyObjectKind
:
150 llvm_unreachable("lazy symbol reached writer");
151 case Symbol::CommonKind
:
152 llvm_unreachable("common symbol reached writer");
153 case Symbol::PlaceholderKind
:
154 llvm_unreachable("placeholder symbol reached writer");
156 llvm_unreachable("invalid symbol kind");
159 uint64_t Symbol::getVA(int64_t addend
) const {
160 return getSymVA(*this, addend
) + addend
;
163 uint64_t Symbol::getGotVA() const {
165 return in
.igotPlt
->getVA() + getGotPltOffset();
166 return in
.got
->getVA() + getGotOffset();
169 uint64_t Symbol::getGotOffset() const {
170 return getGotIdx() * target
->gotEntrySize
;
173 uint64_t Symbol::getGotPltVA() const {
175 return in
.igotPlt
->getVA() + getGotPltOffset();
176 return in
.gotPlt
->getVA() + getGotPltOffset();
179 uint64_t Symbol::getGotPltOffset() const {
181 return getPltIdx() * target
->gotEntrySize
;
182 return (getPltIdx() + target
->gotPltHeaderEntriesNum
) * target
->gotEntrySize
;
185 uint64_t Symbol::getPltVA() const {
186 uint64_t outVA
= isInIplt
187 ? in
.iplt
->getVA() + getPltIdx() * target
->ipltEntrySize
188 : in
.plt
->getVA() + in
.plt
->headerSize
+
189 getPltIdx() * target
->pltEntrySize
;
191 // While linking microMIPS code PLT code are always microMIPS
192 // code. Set the less-significant bit to track that fact.
193 // See detailed comment in the `getSymVA` function.
194 if (config
->emachine
== EM_MIPS
&& isMicroMips())
199 uint64_t Symbol::getSize() const {
200 if (const auto *dr
= dyn_cast
<Defined
>(this))
202 return cast
<SharedSymbol
>(this)->size
;
205 OutputSection
*Symbol::getOutputSection() const {
206 if (auto *s
= dyn_cast
<Defined
>(this)) {
207 if (auto *sec
= s
->section
)
208 return sec
->getOutputSection();
214 // If a symbol name contains '@', the characters after that is
215 // a symbol version name. This function parses that.
216 void Symbol::parseSymbolVersion() {
217 // Return if localized by a local: pattern in a version script.
218 if (versionId
== VER_NDX_LOCAL
)
220 StringRef s
= getName();
221 size_t pos
= s
.find('@');
222 if (pos
== StringRef::npos
)
224 StringRef verstr
= s
.substr(pos
+ 1);
226 // Truncate the symbol name so that it doesn't include the version string.
232 // If this is not in this DSO, it is not a definition.
236 // '@@' in a symbol name means the default version.
237 // It is usually the most recent one.
238 bool isDefault
= (verstr
[0] == '@');
240 verstr
= verstr
.substr(1);
242 for (const VersionDefinition
&ver
: namedVersionDefs()) {
243 if (ver
.name
!= verstr
)
249 versionId
= ver
.id
| VERSYM_HIDDEN
;
253 // It is an error if the specified version is not defined.
254 // Usually version script is not provided when linking executable,
255 // but we may still want to override a versioned symbol from DSO,
256 // so we do not report error in this case. We also do not error
257 // if the symbol has a local version as it won't be in the dynamic
259 if (config
->shared
&& versionId
!= VER_NDX_LOCAL
)
260 error(toString(file
) + ": symbol " + s
+ " has undefined version " +
264 void Symbol::extract() const {
271 uint8_t Symbol::computeBinding() const {
272 auto v
= visibility();
273 if ((v
!= STV_DEFAULT
&& v
!= STV_PROTECTED
) || versionId
== VER_NDX_LOCAL
)
275 if (binding
== STB_GNU_UNIQUE
&& !config
->gnuUnique
)
280 bool Symbol::includeInDynsym() const {
281 if (computeBinding() == STB_LOCAL
)
283 if (!isDefined() && !isCommon())
284 // This should unconditionally return true, unfortunately glibc -static-pie
285 // expects undefined weak symbols not to exist in .dynsym, e.g.
286 // __pthread_mutex_lock reference in _dl_add_to_namespace_list,
287 // __pthread_initialize_minimal reference in csu/libc-start.c.
288 return !(isUndefWeak() && config
->noDynamicLinker
);
290 return exportDynamic
|| inDynamicList
;
293 // Print out a log message for --trace-symbol.
294 void elf::printTraceSymbol(const Symbol
&sym
, StringRef name
) {
296 if (sym
.isUndefined())
297 s
= ": reference to ";
298 else if (sym
.isLazy())
299 s
= ": lazy definition of ";
300 else if (sym
.isShared())
301 s
= ": shared definition of ";
302 else if (sym
.isCommon())
303 s
= ": common definition of ";
305 s
= ": definition of ";
307 message(toString(sym
.file
) + s
+ name
);
310 static void recordWhyExtract(const InputFile
*reference
,
311 const InputFile
&extracted
, const Symbol
&sym
) {
312 ctx
.whyExtractRecords
.emplace_back(toString(reference
), &extracted
, sym
);
315 void elf::maybeWarnUnorderableSymbol(const Symbol
*sym
) {
316 if (!config
->warnSymbolOrdering
)
319 // If UnresolvedPolicy::Ignore is used, no "undefined symbol" error/warning is
320 // emitted. It makes sense to not warn on undefined symbols (excluding those
321 // demoted by demoteSymbols).
323 // Note, ld.bfd --symbol-ordering-file= does not warn on undefined symbols,
324 // but we don't have to be compatible here.
325 if (sym
->isUndefined() && !cast
<Undefined
>(sym
)->discardedSecIdx
&&
326 config
->unresolvedSymbols
== UnresolvedPolicy::Ignore
)
329 const InputFile
*file
= sym
->file
;
330 auto *d
= dyn_cast
<Defined
>(sym
);
332 auto report
= [&](StringRef s
) { warn(toString(file
) + s
+ sym
->getName()); };
334 if (sym
->isUndefined()) {
335 if (cast
<Undefined
>(sym
)->discardedSecIdx
)
336 report(": unable to order discarded symbol: ");
338 report(": unable to order undefined symbol: ");
339 } else if (sym
->isShared())
340 report(": unable to order shared symbol: ");
341 else if (d
&& !d
->section
)
342 report(": unable to order absolute symbol: ");
343 else if (d
&& isa
<OutputSection
>(d
->section
))
344 report(": unable to order synthetic symbol: ");
345 else if (d
&& !d
->section
->isLive())
346 report(": unable to order discarded symbol: ");
349 // Returns true if a symbol can be replaced at load-time by a symbol
350 // with the same name defined in other ELF executable or DSO.
351 bool elf::computeIsPreemptible(const Symbol
&sym
) {
352 assert(!sym
.isLocal() || sym
.isPlaceholder());
354 // Only symbols with default visibility that appear in dynsym can be
355 // preempted. Symbols with protected visibility cannot be preempted.
356 if (!sym
.includeInDynsym() || sym
.visibility() != STV_DEFAULT
)
359 // At this point copy relocations have not been created yet, so any
360 // symbol that is not defined locally is preemptible.
361 if (!sym
.isDefined())
367 // If -Bsymbolic or --dynamic-list is specified, or -Bsymbolic-functions is
368 // specified and the symbol is STT_FUNC, the symbol is preemptible iff it is
369 // in the dynamic list. -Bsymbolic-non-weak-functions is a non-weak subset of
370 // -Bsymbolic-functions.
371 if (config
->symbolic
||
372 (config
->bsymbolic
== BsymbolicKind::NonWeak
&&
373 sym
.binding
!= STB_WEAK
) ||
374 (config
->bsymbolic
== BsymbolicKind::Functions
&& sym
.isFunc()) ||
375 (config
->bsymbolic
== BsymbolicKind::NonWeakFunctions
&& sym
.isFunc() &&
376 sym
.binding
!= STB_WEAK
))
377 return sym
.inDynamicList
;
381 // Merge symbol properties.
383 // When we have many symbols of the same name, we choose one of them,
384 // and that's the result of symbol resolution. However, symbols that
385 // were not chosen still affect some symbol properties.
386 void Symbol::mergeProperties(const Symbol
&other
) {
387 if (other
.exportDynamic
)
388 exportDynamic
= true;
390 // DSO symbols do not affect visibility in the output.
391 if (!other
.isShared() && other
.visibility() != STV_DEFAULT
) {
392 uint8_t v
= visibility(), ov
= other
.visibility();
393 setVisibility(v
== STV_DEFAULT
? ov
: std::min(v
, ov
));
397 void Symbol::resolve(const Undefined
&other
) {
398 if (other
.visibility() != STV_DEFAULT
) {
399 uint8_t v
= visibility(), ov
= other
.visibility();
400 setVisibility(v
== STV_DEFAULT
? ov
: std::min(v
, ov
));
402 // An undefined symbol with non default visibility must be satisfied
405 // If this is a non-weak defined symbol in a discarded section, override the
406 // existing undefined symbol for better error message later.
407 if (isPlaceholder() || (isShared() && other
.visibility() != STV_DEFAULT
) ||
408 (isUndefined() && other
.binding
!= STB_WEAK
&& other
.discardedSecIdx
)) {
409 other
.overwrite(*this);
414 printTraceSymbol(other
, getName());
417 // An undefined weak will not extract archive members. See comment on Lazy
418 // in Symbols.h for the details.
419 if (other
.binding
== STB_WEAK
) {
425 // Do extra check for --warn-backrefs.
427 // --warn-backrefs is an option to prevent an undefined reference from
428 // extracting an archive member written earlier in the command line. It can
429 // be used to keep compatibility with GNU linkers to some degree. I'll
430 // explain the feature and why you may find it useful in this comment.
432 // lld's symbol resolution semantics is more relaxed than traditional Unix
433 // linkers. For example,
435 // ld.lld foo.a bar.o
437 // succeeds even if bar.o contains an undefined symbol that has to be
438 // resolved by some object file in foo.a. Traditional Unix linkers don't
439 // allow this kind of backward reference, as they visit each file only once
440 // from left to right in the command line while resolving all undefined
441 // symbols at the moment of visiting.
443 // In the above case, since there's no undefined symbol when a linker visits
444 // foo.a, no files are pulled out from foo.a, and because the linker forgets
445 // about foo.a after visiting, it can't resolve undefined symbols in bar.o
446 // that could have been resolved otherwise.
448 // That lld accepts more relaxed form means that (besides it'd make more
449 // sense) you can accidentally write a command line or a build file that
450 // works only with lld, even if you have a plan to distribute it to wider
451 // users who may be using GNU linkers. With --warn-backrefs, you can detect
452 // a library order that doesn't work with other Unix linkers.
454 // The option is also useful to detect cyclic dependencies between static
455 // archives. Again, lld accepts
457 // ld.lld foo.a bar.a
459 // even if foo.a and bar.a depend on each other. With --warn-backrefs, it is
460 // handled as an error.
462 // Here is how the option works. We assign a group ID to each file. A file
463 // with a smaller group ID can pull out object files from an archive file
464 // with an equal or greater group ID. Otherwise, it is a reverse dependency
467 // A file outside --{start,end}-group gets a fresh ID when instantiated. All
468 // files within the same --{start,end}-group get the same group ID. E.g.
470 // ld.lld A B --start-group C D --end-group E
472 // A forms group 0. B form group 1. C and D (including their member object
473 // files) form group 2. E forms group 3. I think that you can see how this
474 // group assignment rule simulates the traditional linker's semantics.
475 bool backref
= config
->warnBackrefs
&& other
.file
&&
476 file
->groupId
< other
.file
->groupId
;
479 if (!config
->whyExtract
.empty())
480 recordWhyExtract(other
.file
, *file
, *this);
482 // We don't report backward references to weak symbols as they can be
485 // A traditional linker does not error for -ldef1 -lref -ldef2 (linking
486 // sandwich), where def2 may or may not be the same as def1. We don't want
487 // to warn for this case, so dismiss the warning if we see a subsequent lazy
488 // definition. this->file needs to be saved because in the case of LTO it
489 // may be reset to nullptr or be replaced with a file named lto.tmp.
490 if (backref
&& !isWeak())
491 ctx
.backwardReferences
.try_emplace(this,
492 std::make_pair(other
.file
, file
));
496 // Undefined symbols in a SharedFile do not change the binding.
497 if (isa_and_nonnull
<SharedFile
>(other
.file
))
500 if (isUndefined() || isShared()) {
501 // The binding will be weak if there is at least one reference and all are
502 // weak. The binding has one opportunity to change to weak: if the first
503 // reference is weak.
504 if (other
.binding
!= STB_WEAK
|| !referenced
)
505 binding
= other
.binding
;
509 // Compare two symbols. Return true if the new symbol should win.
510 bool Symbol::shouldReplace(const Defined
&other
) const {
511 if (LLVM_UNLIKELY(isCommon())) {
512 if (config
->warnCommon
)
513 warn("common " + getName() + " is overridden");
514 return !other
.isWeak();
519 // Incoming STB_GLOBAL overrides STB_WEAK/STB_GNU_UNIQUE. -fgnu-unique changes
520 // some vague linkage data in COMDAT from STB_WEAK to STB_GNU_UNIQUE. Treat
521 // STB_GNU_UNIQUE like STB_WEAK so that we prefer the first among all
522 // STB_WEAK/STB_GNU_UNIQUE copies. If we prefer an incoming STB_GNU_UNIQUE to
523 // an existing STB_WEAK, there may be discarded section errors because the
524 // selected copy may be in a non-prevailing COMDAT.
525 return !isGlobal() && other
.isGlobal();
528 void elf::reportDuplicate(const Symbol
&sym
, const InputFile
*newFile
,
529 InputSectionBase
*errSec
, uint64_t errOffset
) {
530 if (config
->allowMultipleDefinition
)
532 // In glibc<2.32, crti.o has .gnu.linkonce.t.__x86.get_pc_thunk.bx, which
533 // is sort of proto-comdat. There is actually no duplicate if we have
534 // full support for .gnu.linkonce.
535 const Defined
*d
= dyn_cast
<Defined
>(&sym
);
536 if (!d
|| d
->getName() == "__x86.get_pc_thunk.bx")
538 // Allow absolute symbols with the same value for GNU ld compatibility.
539 if (!d
->section
&& !errSec
&& errOffset
&& d
->value
== errOffset
)
541 if (!d
->section
|| !errSec
) {
542 error("duplicate symbol: " + toString(sym
) + "\n>>> defined in " +
543 toString(sym
.file
) + "\n>>> defined in " + toString(newFile
));
547 // Construct and print an error message in the form of:
549 // ld.lld: error: duplicate symbol: foo
550 // >>> defined at bar.c:30
551 // >>> bar.o (/home/alice/src/bar.o)
552 // >>> defined at baz.c:563
553 // >>> baz.o in archive libbaz.a
554 auto *sec1
= cast
<InputSectionBase
>(d
->section
);
555 std::string src1
= sec1
->getSrcMsg(sym
, d
->value
);
556 std::string obj1
= sec1
->getObjMsg(d
->value
);
557 std::string src2
= errSec
->getSrcMsg(sym
, errOffset
);
558 std::string obj2
= errSec
->getObjMsg(errOffset
);
560 std::string msg
= "duplicate symbol: " + toString(sym
) + "\n>>> defined at ";
562 msg
+= src1
+ "\n>>> ";
563 msg
+= obj1
+ "\n>>> defined at ";
565 msg
+= src2
+ "\n>>> ";
570 void Symbol::checkDuplicate(const Defined
&other
) const {
571 if (isDefined() && !isWeak() && !other
.isWeak())
572 reportDuplicate(*this, other
.file
,
573 dyn_cast_or_null
<InputSectionBase
>(other
.section
),
577 void Symbol::resolve(const CommonSymbol
&other
) {
578 if (other
.exportDynamic
)
579 exportDynamic
= true;
580 if (other
.visibility() != STV_DEFAULT
) {
581 uint8_t v
= visibility(), ov
= other
.visibility();
582 setVisibility(v
== STV_DEFAULT
? ov
: std::min(v
, ov
));
584 if (isDefined() && !isWeak()) {
585 if (config
->warnCommon
)
586 warn("common " + getName() + " is overridden");
590 if (CommonSymbol
*oldSym
= dyn_cast
<CommonSymbol
>(this)) {
591 if (config
->warnCommon
)
592 warn("multiple common of " + getName());
593 oldSym
->alignment
= std::max(oldSym
->alignment
, other
.alignment
);
594 if (oldSym
->size
< other
.size
) {
595 oldSym
->file
= other
.file
;
596 oldSym
->size
= other
.size
;
601 if (auto *s
= dyn_cast
<SharedSymbol
>(this)) {
602 // Increase st_size if the shared symbol has a larger st_size. The shared
603 // symbol may be created from common symbols. The fact that some object
604 // files were linked into a shared object first should not change the
605 // regular rule that picks the largest st_size.
606 uint64_t size
= s
->size
;
607 other
.overwrite(*this);
608 if (size
> cast
<CommonSymbol
>(this)->size
)
609 cast
<CommonSymbol
>(this)->size
= size
;
611 other
.overwrite(*this);
615 void Symbol::resolve(const Defined
&other
) {
616 if (other
.exportDynamic
)
617 exportDynamic
= true;
618 if (other
.visibility() != STV_DEFAULT
) {
619 uint8_t v
= visibility(), ov
= other
.visibility();
620 setVisibility(v
== STV_DEFAULT
? ov
: std::min(v
, ov
));
622 if (shouldReplace(other
))
623 other
.overwrite(*this);
626 void Symbol::resolve(const LazyObject
&other
) {
627 if (isPlaceholder()) {
628 other
.overwrite(*this);
632 // For common objects, we want to look for global or weak definitions that
633 // should be extracted as the canonical definition instead.
634 if (LLVM_UNLIKELY(isCommon()) && elf::config
->fortranCommon
&&
635 other
.file
->shouldExtractForCommon(getName())) {
636 ctx
.backwardReferences
.erase(this);
637 other
.overwrite(*this);
642 if (!isUndefined()) {
643 // See the comment in resolveUndefined().
645 ctx
.backwardReferences
.erase(this);
649 // An undefined weak will not extract archive members. See comment on Lazy in
650 // Symbols.h for the details.
653 other
.overwrite(*this);
659 const InputFile
*oldFile
= file
;
661 if (!config
->whyExtract
.empty())
662 recordWhyExtract(oldFile
, *file
, *this);
665 void Symbol::resolve(const SharedSymbol
&other
) {
666 exportDynamic
= true;
667 if (isPlaceholder()) {
668 other
.overwrite(*this);
672 // See the comment in resolveCommon() above.
673 if (other
.size
> cast
<CommonSymbol
>(this)->size
)
674 cast
<CommonSymbol
>(this)->size
= other
.size
;
677 if (visibility() == STV_DEFAULT
&& (isUndefined() || isLazy())) {
678 // An undefined symbol with non default visibility must be satisfied
680 uint8_t bind
= binding
;
681 other
.overwrite(*this);
684 printTraceSymbol(other
, getName());