1 //===- InputFiles.cpp -----------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "InputFiles.h"
10 #include "COFFLinkerContext.h"
13 #include "DebugTypes.h"
15 #include "SymbolTable.h"
17 #include "lld/Common/DWARF.h"
18 #include "llvm-c/lto.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/BinaryFormat/COFF.h"
22 #include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
23 #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
24 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
25 #include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
26 #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
27 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
28 #include "llvm/LTO/LTO.h"
29 #include "llvm/Object/Binary.h"
30 #include "llvm/Object/COFF.h"
31 #include "llvm/Support/Casting.h"
32 #include "llvm/Support/Endian.h"
33 #include "llvm/Support/Error.h"
34 #include "llvm/Support/ErrorOr.h"
35 #include "llvm/Support/FileSystem.h"
36 #include "llvm/Support/Path.h"
37 #include "llvm/Target/TargetOptions.h"
38 #include "llvm/TargetParser/Triple.h"
41 #include <system_error>
45 using namespace llvm::COFF
;
46 using namespace llvm::codeview
;
47 using namespace llvm::object
;
48 using namespace llvm::support::endian
;
50 using namespace lld::coff
;
53 using llvm::support::ulittle32_t
;
55 // Returns the last element of a path, which is supposed to be a filename.
56 static StringRef
getBasename(StringRef path
) {
57 return sys::path::filename(path
, sys::path::Style::windows
);
60 // Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)".
61 std::string
lld::toString(const coff::InputFile
*file
) {
64 if (file
->parentName
.empty() || file
->kind() == coff::InputFile::ImportKind
)
65 return std::string(file
->getName());
67 return (getBasename(file
->parentName
) + "(" + getBasename(file
->getName()) +
72 /// Checks that Source is compatible with being a weak alias to Target.
73 /// If Source is Undefined and has no weak alias set, makes it a weak
75 static void checkAndSetWeakAlias(COFFLinkerContext
&ctx
, InputFile
*f
,
76 Symbol
*source
, Symbol
*target
) {
77 if (auto *u
= dyn_cast
<Undefined
>(source
)) {
78 if (u
->weakAlias
&& u
->weakAlias
!= target
) {
79 // Weak aliases as produced by GCC are named in the form
80 // .weak.<weaksymbol>.<othersymbol>, where <othersymbol> is the name
81 // of another symbol emitted near the weak symbol.
82 // Just use the definition from the first object file that defined
86 ctx
.symtab
.reportDuplicate(source
, f
);
88 u
->weakAlias
= target
;
92 static bool ignoredSymbolName(StringRef name
) {
93 return name
== "@feat.00" || name
== "@comp.id";
96 ArchiveFile::ArchiveFile(COFFLinkerContext
&ctx
, MemoryBufferRef m
)
97 : InputFile(ctx
, ArchiveKind
, m
) {}
99 void ArchiveFile::parse() {
100 // Parse a MemoryBufferRef as an archive file.
101 file
= CHECK(Archive::create(mb
), this);
103 // Read the symbol table to construct Lazy objects.
104 for (const Archive::Symbol
&sym
: file
->symbols())
105 ctx
.symtab
.addLazyArchive(this, sym
);
108 // Returns a buffer pointing to a member file containing a given symbol.
109 void ArchiveFile::addMember(const Archive::Symbol
&sym
) {
110 const Archive::Child
&c
=
111 CHECK(sym
.getMember(),
112 "could not get the member for symbol " + toCOFFString(ctx
, sym
));
114 // Return an empty buffer if we have already returned the same buffer.
115 if (!seen
.insert(c
.getChildOffset()).second
)
118 ctx
.driver
.enqueueArchiveMember(c
, sym
, getName());
121 std::vector
<MemoryBufferRef
> lld::coff::getArchiveMembers(Archive
*file
) {
122 std::vector
<MemoryBufferRef
> v
;
123 Error err
= Error::success();
124 for (const Archive::Child
&c
: file
->children(err
)) {
125 MemoryBufferRef mbref
=
126 CHECK(c
.getMemoryBufferRef(),
127 file
->getFileName() +
128 ": could not get the buffer for a child of the archive");
132 fatal(file
->getFileName() +
133 ": Archive::children failed: " + toString(std::move(err
)));
137 void ObjFile::parseLazy() {
138 // Native object file.
139 std::unique_ptr
<Binary
> coffObjPtr
= CHECK(createBinary(mb
), this);
140 COFFObjectFile
*coffObj
= cast
<COFFObjectFile
>(coffObjPtr
.get());
141 uint32_t numSymbols
= coffObj
->getNumberOfSymbols();
142 for (uint32_t i
= 0; i
< numSymbols
; ++i
) {
143 COFFSymbolRef coffSym
= check(coffObj
->getSymbol(i
));
144 if (coffSym
.isUndefined() || !coffSym
.isExternal() ||
145 coffSym
.isWeakExternal())
147 StringRef name
= check(coffObj
->getSymbolName(coffSym
));
148 if (coffSym
.isAbsolute() && ignoredSymbolName(name
))
150 ctx
.symtab
.addLazyObject(this, name
);
151 i
+= coffSym
.getNumberOfAuxSymbols();
155 void ObjFile::parse() {
156 // Parse a memory buffer as a COFF file.
157 std::unique_ptr
<Binary
> bin
= CHECK(createBinary(mb
), this);
159 if (auto *obj
= dyn_cast
<COFFObjectFile
>(bin
.get())) {
163 fatal(toString(this) + " is not a COFF file");
166 // Read section and symbol tables.
170 initializeDependencies();
173 const coff_section
*ObjFile::getSection(uint32_t i
) {
174 auto sec
= coffObj
->getSection(i
);
176 fatal("getSection failed: #" + Twine(i
) + ": " + toString(sec
.takeError()));
180 // We set SectionChunk pointers in the SparseChunks vector to this value
181 // temporarily to mark comdat sections as having an unknown resolution. As we
182 // walk the object file's symbol table, once we visit either a leader symbol or
183 // an associative section definition together with the parent comdat's leader,
184 // we set the pointer to either nullptr (to mark the section as discarded) or a
185 // valid SectionChunk for that section.
186 static SectionChunk
*const pendingComdat
= reinterpret_cast<SectionChunk
*>(1);
188 void ObjFile::initializeChunks() {
189 uint32_t numSections
= coffObj
->getNumberOfSections();
190 sparseChunks
.resize(numSections
+ 1);
191 for (uint32_t i
= 1; i
< numSections
+ 1; ++i
) {
192 const coff_section
*sec
= getSection(i
);
193 if (sec
->Characteristics
& IMAGE_SCN_LNK_COMDAT
)
194 sparseChunks
[i
] = pendingComdat
;
196 sparseChunks
[i
] = readSection(i
, nullptr, "");
200 SectionChunk
*ObjFile::readSection(uint32_t sectionNumber
,
201 const coff_aux_section_definition
*def
,
202 StringRef leaderName
) {
203 const coff_section
*sec
= getSection(sectionNumber
);
206 if (Expected
<StringRef
> e
= coffObj
->getSectionName(sec
))
209 fatal("getSectionName failed: #" + Twine(sectionNumber
) + ": " +
210 toString(e
.takeError()));
212 if (name
== ".drectve") {
213 ArrayRef
<uint8_t> data
;
214 cantFail(coffObj
->getSectionContents(sec
, data
));
215 directives
= StringRef((const char *)data
.data(), data
.size());
219 if (name
== ".llvm_addrsig") {
224 if (name
== ".llvm.call-graph-profile") {
229 // Object files may have DWARF debug info or MS CodeView debug info
232 // DWARF sections don't need any special handling from the perspective
233 // of the linker; they are just a data section containing relocations.
234 // We can just link them to complete debug info.
236 // CodeView needs linker support. We need to interpret debug info,
237 // and then write it to a separate .pdb file.
239 // Ignore DWARF debug info unless /debug is given.
240 if (!ctx
.config
.debug
&& name
.starts_with(".debug_"))
243 if (sec
->Characteristics
& llvm::COFF::IMAGE_SCN_LNK_REMOVE
)
245 auto *c
= make
<SectionChunk
>(this, sec
);
247 c
->checksum
= def
->CheckSum
;
249 // CodeView sections are stored to a different vector because they are not
250 // linked in the regular manner.
252 debugChunks
.push_back(c
);
253 else if (name
== ".gfids$y")
254 guardFidChunks
.push_back(c
);
255 else if (name
== ".giats$y")
256 guardIATChunks
.push_back(c
);
257 else if (name
== ".gljmp$y")
258 guardLJmpChunks
.push_back(c
);
259 else if (name
== ".gehcont$y")
260 guardEHContChunks
.push_back(c
);
261 else if (name
== ".sxdata")
262 sxDataChunks
.push_back(c
);
263 else if (ctx
.config
.tailMerge
&& sec
->NumberOfRelocations
== 0 &&
264 name
== ".rdata" && leaderName
.starts_with("??_C@"))
265 // COFF sections that look like string literal sections (i.e. no
266 // relocations, in .rdata, leader symbol name matches the MSVC name mangling
267 // for string literals) are subject to string tail merging.
268 MergeChunk::addSection(ctx
, c
);
269 else if (name
== ".rsrc" || name
.starts_with(".rsrc$"))
270 resourceChunks
.push_back(c
);
277 void ObjFile::includeResourceChunks() {
278 chunks
.insert(chunks
.end(), resourceChunks
.begin(), resourceChunks
.end());
281 void ObjFile::readAssociativeDefinition(
282 COFFSymbolRef sym
, const coff_aux_section_definition
*def
) {
283 readAssociativeDefinition(sym
, def
, def
->getNumber(sym
.isBigObj()));
286 void ObjFile::readAssociativeDefinition(COFFSymbolRef sym
,
287 const coff_aux_section_definition
*def
,
288 uint32_t parentIndex
) {
289 SectionChunk
*parent
= sparseChunks
[parentIndex
];
290 int32_t sectionNumber
= sym
.getSectionNumber();
293 StringRef name
= check(coffObj
->getSymbolName(sym
));
295 StringRef parentName
;
296 const coff_section
*parentSec
= getSection(parentIndex
);
297 if (Expected
<StringRef
> e
= coffObj
->getSectionName(parentSec
))
299 error(toString(this) + ": associative comdat " + name
+ " (sec " +
300 Twine(sectionNumber
) + ") has invalid reference to section " +
301 parentName
+ " (sec " + Twine(parentIndex
) + ")");
304 if (parent
== pendingComdat
) {
305 // This can happen if an associative comdat refers to another associative
306 // comdat that appears after it (invalid per COFF spec) or to a section
307 // without any symbols.
312 // Check whether the parent is prevailing. If it is, so are we, and we read
313 // the section; otherwise mark it as discarded.
315 SectionChunk
*c
= readSection(sectionNumber
, def
, "");
316 sparseChunks
[sectionNumber
] = c
;
318 c
->selection
= IMAGE_COMDAT_SELECT_ASSOCIATIVE
;
319 parent
->addAssociative(c
);
322 sparseChunks
[sectionNumber
] = nullptr;
326 void ObjFile::recordPrevailingSymbolForMingw(
327 COFFSymbolRef sym
, DenseMap
<StringRef
, uint32_t> &prevailingSectionMap
) {
328 // For comdat symbols in executable sections, where this is the copy
329 // of the section chunk we actually include instead of discarding it,
330 // add the symbol to a map to allow using it for implicitly
331 // associating .[px]data$<func> sections to it.
332 // Use the suffix from the .text$<func> instead of the leader symbol
333 // name, for cases where the names differ (i386 mangling/decorations,
334 // cases where the leader is a weak symbol named .weak.func.default*).
335 int32_t sectionNumber
= sym
.getSectionNumber();
336 SectionChunk
*sc
= sparseChunks
[sectionNumber
];
337 if (sc
&& sc
->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE
) {
338 StringRef name
= sc
->getSectionName().split('$').second
;
339 prevailingSectionMap
[name
] = sectionNumber
;
343 void ObjFile::maybeAssociateSEHForMingw(
344 COFFSymbolRef sym
, const coff_aux_section_definition
*def
,
345 const DenseMap
<StringRef
, uint32_t> &prevailingSectionMap
) {
346 StringRef name
= check(coffObj
->getSymbolName(sym
));
347 if (name
.consume_front(".pdata$") || name
.consume_front(".xdata$") ||
348 name
.consume_front(".eh_frame$")) {
349 // For MinGW, treat .[px]data$<func> and .eh_frame$<func> as implicitly
350 // associative to the symbol <func>.
351 auto parentSym
= prevailingSectionMap
.find(name
);
352 if (parentSym
!= prevailingSectionMap
.end())
353 readAssociativeDefinition(sym
, def
, parentSym
->second
);
357 Symbol
*ObjFile::createRegular(COFFSymbolRef sym
) {
358 SectionChunk
*sc
= sparseChunks
[sym
.getSectionNumber()];
359 if (sym
.isExternal()) {
360 StringRef name
= check(coffObj
->getSymbolName(sym
));
362 return ctx
.symtab
.addRegular(this, name
, sym
.getGeneric(), sc
,
364 // For MinGW symbols named .weak.* that point to a discarded section,
365 // don't create an Undefined symbol. If nothing ever refers to the symbol,
366 // everything should be fine. If something actually refers to the symbol
367 // (e.g. the undefined weak alias), linking will fail due to undefined
368 // references at the end.
369 if (ctx
.config
.mingw
&& name
.starts_with(".weak."))
371 return ctx
.symtab
.addUndefined(name
, this, false);
374 return make
<DefinedRegular
>(this, /*Name*/ "", /*IsCOMDAT*/ false,
375 /*IsExternal*/ false, sym
.getGeneric(), sc
);
379 void ObjFile::initializeSymbols() {
380 uint32_t numSymbols
= coffObj
->getNumberOfSymbols();
381 symbols
.resize(numSymbols
);
383 SmallVector
<std::pair
<Symbol
*, uint32_t>, 8> weakAliases
;
384 std::vector
<uint32_t> pendingIndexes
;
385 pendingIndexes
.reserve(numSymbols
);
387 DenseMap
<StringRef
, uint32_t> prevailingSectionMap
;
388 std::vector
<const coff_aux_section_definition
*> comdatDefs(
389 coffObj
->getNumberOfSections() + 1);
391 for (uint32_t i
= 0; i
< numSymbols
; ++i
) {
392 COFFSymbolRef coffSym
= check(coffObj
->getSymbol(i
));
393 bool prevailingComdat
;
394 if (coffSym
.isUndefined()) {
395 symbols
[i
] = createUndefined(coffSym
);
396 } else if (coffSym
.isWeakExternal()) {
397 symbols
[i
] = createUndefined(coffSym
);
398 uint32_t tagIndex
= coffSym
.getAux
<coff_aux_weak_external
>()->TagIndex
;
399 weakAliases
.emplace_back(symbols
[i
], tagIndex
);
400 } else if (std::optional
<Symbol
*> optSym
=
401 createDefined(coffSym
, comdatDefs
, prevailingComdat
)) {
402 symbols
[i
] = *optSym
;
403 if (ctx
.config
.mingw
&& prevailingComdat
)
404 recordPrevailingSymbolForMingw(coffSym
, prevailingSectionMap
);
406 // createDefined() returns std::nullopt if a symbol belongs to a section
407 // that was pending at the point when the symbol was read. This can happen
409 // 1) section definition symbol for a comdat leader;
410 // 2) symbol belongs to a comdat section associated with another section.
411 // In both of these cases, we can expect the section to be resolved by
412 // the time we finish visiting the remaining symbols in the symbol
413 // table. So we postpone the handling of this symbol until that time.
414 pendingIndexes
.push_back(i
);
416 i
+= coffSym
.getNumberOfAuxSymbols();
419 for (uint32_t i
: pendingIndexes
) {
420 COFFSymbolRef sym
= check(coffObj
->getSymbol(i
));
421 if (const coff_aux_section_definition
*def
= sym
.getSectionDefinition()) {
422 if (def
->Selection
== IMAGE_COMDAT_SELECT_ASSOCIATIVE
)
423 readAssociativeDefinition(sym
, def
);
424 else if (ctx
.config
.mingw
)
425 maybeAssociateSEHForMingw(sym
, def
, prevailingSectionMap
);
427 if (sparseChunks
[sym
.getSectionNumber()] == pendingComdat
) {
428 StringRef name
= check(coffObj
->getSymbolName(sym
));
429 log("comdat section " + name
+
430 " without leader and unassociated, discarding");
433 symbols
[i
] = createRegular(sym
);
436 for (auto &kv
: weakAliases
) {
437 Symbol
*sym
= kv
.first
;
438 uint32_t idx
= kv
.second
;
439 checkAndSetWeakAlias(ctx
, this, sym
, symbols
[idx
]);
442 // Free the memory used by sparseChunks now that symbol loading is finished.
443 decltype(sparseChunks
)().swap(sparseChunks
);
446 Symbol
*ObjFile::createUndefined(COFFSymbolRef sym
) {
447 StringRef name
= check(coffObj
->getSymbolName(sym
));
448 return ctx
.symtab
.addUndefined(name
, this, sym
.isWeakExternal());
451 static const coff_aux_section_definition
*findSectionDef(COFFObjectFile
*obj
,
453 uint32_t numSymbols
= obj
->getNumberOfSymbols();
454 for (uint32_t i
= 0; i
< numSymbols
; ++i
) {
455 COFFSymbolRef sym
= check(obj
->getSymbol(i
));
456 if (sym
.getSectionNumber() != section
)
458 if (const coff_aux_section_definition
*def
= sym
.getSectionDefinition())
464 void ObjFile::handleComdatSelection(
465 COFFSymbolRef sym
, COMDATType
&selection
, bool &prevailing
,
466 DefinedRegular
*leader
,
467 const llvm::object::coff_aux_section_definition
*def
) {
470 // There's already an existing comdat for this symbol: `Leader`.
471 // Use the comdats's selection field to determine if the new
472 // symbol in `Sym` should be discarded, produce a duplicate symbol
475 SectionChunk
*leaderChunk
= leader
->getChunk();
476 COMDATType leaderSelection
= leaderChunk
->selection
;
478 assert(leader
->data
&& "Comdat leader without SectionChunk?");
479 if (isa
<BitcodeFile
>(leader
->file
)) {
480 // If the leader is only a LTO symbol, we don't know e.g. its final size
481 // yet, so we can't do the full strict comdat selection checking yet.
482 selection
= leaderSelection
= IMAGE_COMDAT_SELECT_ANY
;
485 if ((selection
== IMAGE_COMDAT_SELECT_ANY
&&
486 leaderSelection
== IMAGE_COMDAT_SELECT_LARGEST
) ||
487 (selection
== IMAGE_COMDAT_SELECT_LARGEST
&&
488 leaderSelection
== IMAGE_COMDAT_SELECT_ANY
)) {
489 // cl.exe picks "any" for vftables when building with /GR- and
490 // "largest" when building with /GR. To be able to link object files
491 // compiled with each flag, "any" and "largest" are merged as "largest".
492 leaderSelection
= selection
= IMAGE_COMDAT_SELECT_LARGEST
;
495 // GCCs __declspec(selectany) doesn't actually pick "any" but "same size as".
496 // Clang on the other hand picks "any". To be able to link two object files
497 // with a __declspec(selectany) declaration, one compiled with gcc and the
498 // other with clang, we merge them as proper "same size as"
499 if (ctx
.config
.mingw
&& ((selection
== IMAGE_COMDAT_SELECT_ANY
&&
500 leaderSelection
== IMAGE_COMDAT_SELECT_SAME_SIZE
) ||
501 (selection
== IMAGE_COMDAT_SELECT_SAME_SIZE
&&
502 leaderSelection
== IMAGE_COMDAT_SELECT_ANY
))) {
503 leaderSelection
= selection
= IMAGE_COMDAT_SELECT_SAME_SIZE
;
506 // Other than that, comdat selections must match. This is a bit more
507 // strict than link.exe which allows merging "any" and "largest" if "any"
508 // is the first symbol the linker sees, and it allows merging "largest"
509 // with everything (!) if "largest" is the first symbol the linker sees.
510 // Making this symmetric independent of which selection is seen first
511 // seems better though.
512 // (This behavior matches ModuleLinker::getComdatResult().)
513 if (selection
!= leaderSelection
) {
514 log(("conflicting comdat type for " + toString(ctx
, *leader
) + ": " +
515 Twine((int)leaderSelection
) + " in " + toString(leader
->getFile()) +
516 " and " + Twine((int)selection
) + " in " + toString(this))
518 ctx
.symtab
.reportDuplicate(leader
, this);
523 case IMAGE_COMDAT_SELECT_NODUPLICATES
:
524 ctx
.symtab
.reportDuplicate(leader
, this);
527 case IMAGE_COMDAT_SELECT_ANY
:
531 case IMAGE_COMDAT_SELECT_SAME_SIZE
:
532 if (leaderChunk
->getSize() != getSection(sym
)->SizeOfRawData
) {
533 if (!ctx
.config
.mingw
) {
534 ctx
.symtab
.reportDuplicate(leader
, this);
536 const coff_aux_section_definition
*leaderDef
= nullptr;
537 if (leaderChunk
->file
)
538 leaderDef
= findSectionDef(leaderChunk
->file
->getCOFFObj(),
539 leaderChunk
->getSectionNumber());
540 if (!leaderDef
|| leaderDef
->Length
!= def
->Length
)
541 ctx
.symtab
.reportDuplicate(leader
, this);
546 case IMAGE_COMDAT_SELECT_EXACT_MATCH
: {
547 SectionChunk
newChunk(this, getSection(sym
));
548 // link.exe only compares section contents here and doesn't complain
549 // if the two comdat sections have e.g. different alignment.
551 if (leaderChunk
->getContents() != newChunk
.getContents())
552 ctx
.symtab
.reportDuplicate(leader
, this, &newChunk
, sym
.getValue());
556 case IMAGE_COMDAT_SELECT_ASSOCIATIVE
:
557 // createDefined() is never called for IMAGE_COMDAT_SELECT_ASSOCIATIVE.
558 // (This means lld-link doesn't produce duplicate symbol errors for
559 // associative comdats while link.exe does, but associate comdats
560 // are never extern in practice.)
561 llvm_unreachable("createDefined not called for associative comdats");
563 case IMAGE_COMDAT_SELECT_LARGEST
:
564 if (leaderChunk
->getSize() < getSection(sym
)->SizeOfRawData
) {
565 // Replace the existing comdat symbol with the new one.
566 StringRef name
= check(coffObj
->getSymbolName(sym
));
567 // FIXME: This is incorrect: With /opt:noref, the previous sections
568 // make it into the final executable as well. Correct handling would
569 // be to undo reading of the whole old section that's being replaced,
570 // or doing one pass that determines what the final largest comdat
571 // is for all IMAGE_COMDAT_SELECT_LARGEST comdats and then reading
572 // only the largest one.
573 replaceSymbol
<DefinedRegular
>(leader
, this, name
, /*IsCOMDAT*/ true,
574 /*IsExternal*/ true, sym
.getGeneric(),
580 case IMAGE_COMDAT_SELECT_NEWEST
:
581 llvm_unreachable("should have been rejected earlier");
585 std::optional
<Symbol
*> ObjFile::createDefined(
587 std::vector
<const coff_aux_section_definition
*> &comdatDefs
,
590 auto getName
= [&]() { return check(coffObj
->getSymbolName(sym
)); };
592 if (sym
.isCommon()) {
593 auto *c
= make
<CommonChunk
>(sym
);
595 return ctx
.symtab
.addCommon(this, getName(), sym
.getValue(),
596 sym
.getGeneric(), c
);
599 if (sym
.isAbsolute()) {
600 StringRef name
= getName();
602 if (name
== "@feat.00")
603 feat00Flags
= sym
.getValue();
604 // Skip special symbols.
605 if (ignoredSymbolName(name
))
608 if (sym
.isExternal())
609 return ctx
.symtab
.addAbsolute(name
, sym
);
610 return make
<DefinedAbsolute
>(ctx
, name
, sym
);
613 int32_t sectionNumber
= sym
.getSectionNumber();
614 if (sectionNumber
== llvm::COFF::IMAGE_SYM_DEBUG
)
617 if (llvm::COFF::isReservedSectionNumber(sectionNumber
))
618 fatal(toString(this) + ": " + getName() +
619 " should not refer to special section " + Twine(sectionNumber
));
621 if ((uint32_t)sectionNumber
>= sparseChunks
.size())
622 fatal(toString(this) + ": " + getName() +
623 " should not refer to non-existent section " + Twine(sectionNumber
));
626 // A comdat symbol consists of two symbol table entries.
627 // The first symbol entry has the name of the section (e.g. .text), fixed
628 // values for the other fields, and one auxiliary record.
629 // The second symbol entry has the name of the comdat symbol, called the
631 // When this function is called for the first symbol entry of a comdat,
632 // it sets comdatDefs and returns std::nullopt, and when it's called for the
633 // second symbol entry it reads comdatDefs and then sets it back to nullptr.
635 // Handle comdat leader.
636 if (const coff_aux_section_definition
*def
= comdatDefs
[sectionNumber
]) {
637 comdatDefs
[sectionNumber
] = nullptr;
638 DefinedRegular
*leader
;
640 if (sym
.isExternal()) {
641 std::tie(leader
, prevailing
) =
642 ctx
.symtab
.addComdat(this, getName(), sym
.getGeneric());
644 leader
= make
<DefinedRegular
>(this, /*Name*/ "", /*IsCOMDAT*/ false,
645 /*IsExternal*/ false, sym
.getGeneric());
649 if (def
->Selection
< (int)IMAGE_COMDAT_SELECT_NODUPLICATES
||
650 // Intentionally ends at IMAGE_COMDAT_SELECT_LARGEST: link.exe
651 // doesn't understand IMAGE_COMDAT_SELECT_NEWEST either.
652 def
->Selection
> (int)IMAGE_COMDAT_SELECT_LARGEST
) {
653 fatal("unknown comdat type " + std::to_string((int)def
->Selection
) +
654 " for " + getName() + " in " + toString(this));
656 COMDATType selection
= (COMDATType
)def
->Selection
;
658 if (leader
->isCOMDAT
)
659 handleComdatSelection(sym
, selection
, prevailing
, leader
, def
);
662 SectionChunk
*c
= readSection(sectionNumber
, def
, getName());
663 sparseChunks
[sectionNumber
] = c
;
664 c
->sym
= cast
<DefinedRegular
>(leader
);
665 c
->selection
= selection
;
666 cast
<DefinedRegular
>(leader
)->data
= &c
->repl
;
668 sparseChunks
[sectionNumber
] = nullptr;
673 // Prepare to handle the comdat leader symbol by setting the section's
674 // ComdatDefs pointer if we encounter a non-associative comdat.
675 if (sparseChunks
[sectionNumber
] == pendingComdat
) {
676 if (const coff_aux_section_definition
*def
= sym
.getSectionDefinition()) {
677 if (def
->Selection
!= IMAGE_COMDAT_SELECT_ASSOCIATIVE
)
678 comdatDefs
[sectionNumber
] = def
;
683 return createRegular(sym
);
686 MachineTypes
ObjFile::getMachineType() {
688 return static_cast<MachineTypes
>(coffObj
->getMachine());
689 return IMAGE_FILE_MACHINE_UNKNOWN
;
692 ArrayRef
<uint8_t> ObjFile::getDebugSection(StringRef secName
) {
693 if (SectionChunk
*sec
= SectionChunk::findByName(debugChunks
, secName
))
694 return sec
->consumeDebugMagic();
698 // OBJ files systematically store critical information in a .debug$S stream,
699 // even if the TU was compiled with no debug info. At least two records are
700 // always there. S_OBJNAME stores a 32-bit signature, which is loaded into the
701 // PCHSignature member. S_COMPILE3 stores compile-time cmd-line flags. This is
702 // currently used to initialize the hotPatchable member.
703 void ObjFile::initializeFlags() {
704 ArrayRef
<uint8_t> data
= getDebugSection(".debug$S");
708 DebugSubsectionArray subsections
;
710 BinaryStreamReader
reader(data
, support::little
);
711 ExitOnError exitOnErr
;
712 exitOnErr(reader
.readArray(subsections
, data
.size()));
714 for (const DebugSubsectionRecord
&ss
: subsections
) {
715 if (ss
.kind() != DebugSubsectionKind::Symbols
)
720 // Only parse the first two records. We are only looking for S_OBJNAME
721 // and S_COMPILE3, and they usually appear at the beginning of the
723 for (unsigned i
= 0; i
< 2; ++i
) {
724 Expected
<CVSymbol
> sym
= readSymbolFromStream(ss
.getRecordData(), offset
);
726 consumeError(sym
.takeError());
729 if (sym
->kind() == SymbolKind::S_COMPILE3
) {
731 cantFail(SymbolDeserializer::deserializeAs
<Compile3Sym
>(sym
.get()));
733 (cs
.Flags
& CompileSym3Flags::HotPatch
) != CompileSym3Flags::None
;
735 if (sym
->kind() == SymbolKind::S_OBJNAME
) {
736 auto objName
= cantFail(SymbolDeserializer::deserializeAs
<ObjNameSym
>(
738 if (objName
.Signature
)
739 pchSignature
= objName
.Signature
;
741 offset
+= sym
->length();
746 // Depending on the compilation flags, OBJs can refer to external files,
747 // necessary to merge this OBJ into the final PDB. We currently support two
748 // types of external files: Precomp/PCH OBJs, when compiling with /Yc and /Yu.
749 // And PDB type servers, when compiling with /Zi. This function extracts these
750 // dependencies and makes them available as a TpiSource interface (see
751 // DebugTypes.h). Both cases only happen with cl.exe: clang-cl produces regular
752 // output even with /Yc and /Yu and with /Zi.
753 void ObjFile::initializeDependencies() {
754 if (!ctx
.config
.debug
)
759 ArrayRef
<uint8_t> data
= getDebugSection(".debug$P");
763 data
= getDebugSection(".debug$T");
765 // symbols but no types, make a plain, empty TpiSource anyway, because it
766 // simplifies adding the symbols later.
768 if (!debugChunks
.empty())
769 debugTypesObj
= makeTpiSource(ctx
, this);
773 // Get the first type record. It will indicate if this object uses a type
774 // server (/Zi) or a PCH file (/Yu).
776 BinaryStreamReader
reader(data
, support::little
);
777 cantFail(reader
.readArray(types
, reader
.getLength()));
778 CVTypeArray::Iterator firstType
= types
.begin();
779 if (firstType
== types
.end())
782 // Remember the .debug$T or .debug$P section.
785 // This object file is a PCH file that others will depend on.
787 debugTypesObj
= makePrecompSource(ctx
, this);
791 // This object file was compiled with /Zi. Enqueue the PDB dependency.
792 if (firstType
->kind() == LF_TYPESERVER2
) {
793 TypeServer2Record ts
= cantFail(
794 TypeDeserializer::deserializeAs
<TypeServer2Record
>(firstType
->data()));
795 debugTypesObj
= makeUseTypeServerSource(ctx
, this, ts
);
796 enqueuePdbFile(ts
.getName(), this);
800 // This object was compiled with /Yu. It uses types from another object file
801 // with a matching signature.
802 if (firstType
->kind() == LF_PRECOMP
) {
803 PrecompRecord precomp
= cantFail(
804 TypeDeserializer::deserializeAs
<PrecompRecord
>(firstType
->data()));
805 // We're better off trusting the LF_PRECOMP signature. In some cases the
806 // S_OBJNAME record doesn't contain a valid PCH signature.
807 if (precomp
.Signature
)
808 pchSignature
= precomp
.Signature
;
809 debugTypesObj
= makeUsePrecompSource(ctx
, this, precomp
);
810 // Drop the LF_PRECOMP record from the input stream.
811 debugTypes
= debugTypes
.drop_front(firstType
->RecordData
.size());
815 // This is a plain old object file.
816 debugTypesObj
= makeTpiSource(ctx
, this);
819 // Make a PDB path assuming the PDB is in the same folder as the OBJ
820 static std::string
getPdbBaseName(ObjFile
*file
, StringRef tSPath
) {
821 StringRef localPath
=
822 !file
->parentName
.empty() ? file
->parentName
: file
->getName();
823 SmallString
<128> path
= sys::path::parent_path(localPath
);
825 // Currently, type server PDBs are only created by MSVC cl, which only runs
826 // on Windows, so we can assume type server paths are Windows style.
827 sys::path::append(path
,
828 sys::path::filename(tSPath
, sys::path::Style::windows
));
829 return std::string(path
.str());
832 // The casing of the PDB path stamped in the OBJ can differ from the actual path
833 // on disk. With this, we ensure to always use lowercase as a key for the
834 // pdbInputFileInstances map, at least on Windows.
835 static std::string
normalizePdbPath(StringRef path
) {
839 return std::string(path
);
843 // If existing, return the actual PDB path on disk.
844 static std::optional
<std::string
> findPdbPath(StringRef pdbPath
,
845 ObjFile
*dependentFile
) {
846 // Ensure the file exists before anything else. In some cases, if the path
847 // points to a removable device, Driver::enqueuePath() would fail with an
848 // error (EAGAIN, "resource unavailable try again") which we want to skip
850 if (llvm::sys::fs::exists(pdbPath
))
851 return normalizePdbPath(pdbPath
);
852 std::string ret
= getPdbBaseName(dependentFile
, pdbPath
);
853 if (llvm::sys::fs::exists(ret
))
854 return normalizePdbPath(ret
);
858 PDBInputFile::PDBInputFile(COFFLinkerContext
&ctx
, MemoryBufferRef m
)
859 : InputFile(ctx
, PDBKind
, m
) {}
861 PDBInputFile::~PDBInputFile() = default;
863 PDBInputFile
*PDBInputFile::findFromRecordPath(const COFFLinkerContext
&ctx
,
866 auto p
= findPdbPath(path
.str(), fromFile
);
869 auto it
= ctx
.pdbInputFileInstances
.find(*p
);
870 if (it
!= ctx
.pdbInputFileInstances
.end())
875 void PDBInputFile::parse() {
876 ctx
.pdbInputFileInstances
[mb
.getBufferIdentifier().str()] = this;
878 std::unique_ptr
<pdb::IPDBSession
> thisSession
;
879 Error E
= pdb::NativeSession::createFromPdb(
880 MemoryBuffer::getMemBuffer(mb
, false), thisSession
);
882 loadErrorStr
.emplace(toString(std::move(E
)));
883 return; // fail silently at this point - the error will be handled later,
884 // when merging the debug type stream
887 session
.reset(static_cast<pdb::NativeSession
*>(thisSession
.release()));
889 pdb::PDBFile
&pdbFile
= session
->getPDBFile();
890 auto expectedInfo
= pdbFile
.getPDBInfoStream();
891 // All PDB Files should have an Info stream.
893 loadErrorStr
.emplace(toString(expectedInfo
.takeError()));
896 debugTypesObj
= makeTypeServerSource(ctx
, this);
899 // Used only for DWARF debug info, which is not common (except in MinGW
900 // environments). This returns an optional pair of file name and line
901 // number for where the variable was defined.
902 std::optional
<std::pair
<StringRef
, uint32_t>>
903 ObjFile::getVariableLocation(StringRef var
) {
905 dwarf
= make
<DWARFCache
>(DWARFContext::create(*getCOFFObj()));
909 if (ctx
.config
.machine
== I386
)
910 var
.consume_front("_");
911 std::optional
<std::pair
<std::string
, unsigned>> ret
=
912 dwarf
->getVariableLoc(var
);
915 return std::make_pair(saver().save(ret
->first
), ret
->second
);
918 // Used only for DWARF debug info, which is not common (except in MinGW
920 std::optional
<DILineInfo
> ObjFile::getDILineInfo(uint32_t offset
,
921 uint32_t sectionIndex
) {
923 dwarf
= make
<DWARFCache
>(DWARFContext::create(*getCOFFObj()));
928 return dwarf
->getDILineInfo(offset
, sectionIndex
);
931 void ObjFile::enqueuePdbFile(StringRef path
, ObjFile
*fromFile
) {
932 auto p
= findPdbPath(path
.str(), fromFile
);
935 auto it
= ctx
.pdbInputFileInstances
.emplace(*p
, nullptr);
937 return; // already scheduled for load
938 ctx
.driver
.enqueuePDB(*p
);
941 ImportFile::ImportFile(COFFLinkerContext
&ctx
, MemoryBufferRef m
)
942 : InputFile(ctx
, ImportKind
, m
), live(!ctx
.config
.doGC
), thunkLive(live
) {}
944 void ImportFile::parse() {
945 const char *buf
= mb
.getBufferStart();
946 const auto *hdr
= reinterpret_cast<const coff_import_header
*>(buf
);
948 // Check if the total size is valid.
949 if (mb
.getBufferSize() != sizeof(*hdr
) + hdr
->SizeOfData
)
950 fatal("broken import library");
952 // Read names and create an __imp_ symbol.
953 StringRef name
= saver().save(StringRef(buf
+ sizeof(*hdr
)));
954 StringRef impName
= saver().save("__imp_" + name
);
955 const char *nameStart
= buf
+ sizeof(coff_import_header
) + name
.size() + 1;
956 dllName
= std::string(StringRef(nameStart
));
958 switch (hdr
->getNameType()) {
965 case IMPORT_NAME_NOPREFIX
:
966 extName
= ltrim1(name
, "?@_");
968 case IMPORT_NAME_UNDECORATE
:
969 extName
= ltrim1(name
, "?@_");
970 extName
= extName
.substr(0, extName
.find('@'));
975 externalName
= extName
;
977 impSym
= ctx
.symtab
.addImportData(impName
, this);
978 // If this was a duplicate, we logged an error but may continue;
979 // in this case, impSym is nullptr.
983 if (hdr
->getType() == llvm::COFF::IMPORT_CONST
)
984 static_cast<void>(ctx
.symtab
.addImportData(name
, this));
986 // If type is function, we need to create a thunk which jump to an
987 // address pointed by the __imp_ symbol. (This allows you to call
988 // DLL functions just like regular non-DLL functions.)
989 if (hdr
->getType() == llvm::COFF::IMPORT_CODE
)
990 thunkSym
= ctx
.symtab
.addImportThunk(
991 name
, cast_or_null
<DefinedImportData
>(impSym
), hdr
->Machine
);
994 BitcodeFile::BitcodeFile(COFFLinkerContext
&ctx
, MemoryBufferRef mb
,
995 StringRef archiveName
, uint64_t offsetInArchive
,
997 : InputFile(ctx
, BitcodeKind
, mb
, lazy
) {
998 std::string path
= mb
.getBufferIdentifier().str();
999 if (ctx
.config
.thinLTOIndexOnly
)
1000 path
= replaceThinLTOSuffix(mb
.getBufferIdentifier(),
1001 ctx
.config
.thinLTOObjectSuffixReplace
.first
,
1002 ctx
.config
.thinLTOObjectSuffixReplace
.second
);
1004 // ThinLTO assumes that all MemoryBufferRefs given to it have a unique
1005 // name. If two archives define two members with the same name, this
1006 // causes a collision which result in only one of the objects being taken
1007 // into consideration at LTO time (which very likely causes undefined
1008 // symbols later in the link stage). So we append file offset to make
1010 MemoryBufferRef
mbref(mb
.getBuffer(),
1011 saver().save(archiveName
.empty()
1014 sys::path::filename(path
) +
1015 utostr(offsetInArchive
)));
1017 obj
= check(lto::InputFile::create(mbref
));
1020 BitcodeFile::~BitcodeFile() = default;
1022 void BitcodeFile::parse() {
1023 llvm::StringSaver
&saver
= lld::saver();
1025 std::vector
<std::pair
<Symbol
*, bool>> comdat(obj
->getComdatTable().size());
1026 for (size_t i
= 0; i
!= obj
->getComdatTable().size(); ++i
)
1027 // FIXME: Check nodeduplicate
1029 ctx
.symtab
.addComdat(this, saver
.save(obj
->getComdatTable()[i
].first
));
1030 for (const lto::InputFile::Symbol
&objSym
: obj
->symbols()) {
1031 StringRef symName
= saver
.save(objSym
.getName());
1032 int comdatIndex
= objSym
.getComdatIndex();
1034 SectionChunk
*fakeSC
= nullptr;
1035 if (objSym
.isExecutable())
1036 fakeSC
= &ctx
.ltoTextSectionChunk
.chunk
;
1038 fakeSC
= &ctx
.ltoDataSectionChunk
.chunk
;
1039 if (objSym
.isUndefined()) {
1040 sym
= ctx
.symtab
.addUndefined(symName
, this, false);
1041 } else if (objSym
.isCommon()) {
1042 sym
= ctx
.symtab
.addCommon(this, symName
, objSym
.getCommonSize());
1043 } else if (objSym
.isWeak() && objSym
.isIndirect()) {
1045 sym
= ctx
.symtab
.addUndefined(symName
, this, true);
1046 std::string fallback
= std::string(objSym
.getCOFFWeakExternalFallback());
1047 Symbol
*alias
= ctx
.symtab
.addUndefined(saver
.save(fallback
));
1048 checkAndSetWeakAlias(ctx
, this, sym
, alias
);
1049 } else if (comdatIndex
!= -1) {
1050 if (symName
== obj
->getComdatTable()[comdatIndex
].first
) {
1051 sym
= comdat
[comdatIndex
].first
;
1052 if (cast
<DefinedRegular
>(sym
)->data
== nullptr)
1053 cast
<DefinedRegular
>(sym
)->data
= &fakeSC
->repl
;
1054 } else if (comdat
[comdatIndex
].second
) {
1055 sym
= ctx
.symtab
.addRegular(this, symName
, nullptr, fakeSC
);
1057 sym
= ctx
.symtab
.addUndefined(symName
, this, false);
1060 sym
= ctx
.symtab
.addRegular(this, symName
, nullptr, fakeSC
, 0,
1063 symbols
.push_back(sym
);
1064 if (objSym
.isUsed())
1065 ctx
.config
.gcroot
.push_back(sym
);
1067 directives
= obj
->getCOFFLinkerOpts();
1070 void BitcodeFile::parseLazy() {
1071 for (const lto::InputFile::Symbol
&sym
: obj
->symbols())
1072 if (!sym
.isUndefined())
1073 ctx
.symtab
.addLazyObject(this, sym
.getName());
1076 MachineTypes
BitcodeFile::getMachineType() {
1077 switch (Triple(obj
->getTargetTriple()).getArch()) {
1078 case Triple::x86_64
:
1084 case Triple::aarch64
:
1087 return IMAGE_FILE_MACHINE_UNKNOWN
;
1091 std::string
lld::coff::replaceThinLTOSuffix(StringRef path
, StringRef suffix
,
1093 if (path
.consume_back(suffix
))
1094 return (path
+ repl
).str();
1095 return std::string(path
);
1098 static bool isRVACode(COFFObjectFile
*coffObj
, uint64_t rva
, InputFile
*file
) {
1099 for (size_t i
= 1, e
= coffObj
->getNumberOfSections(); i
<= e
; i
++) {
1100 const coff_section
*sec
= CHECK(coffObj
->getSection(i
), file
);
1101 if (rva
>= sec
->VirtualAddress
&&
1102 rva
<= sec
->VirtualAddress
+ sec
->VirtualSize
) {
1103 return (sec
->Characteristics
& COFF::IMAGE_SCN_CNT_CODE
) != 0;
1109 void DLLFile::parse() {
1110 // Parse a memory buffer as a PE-COFF executable.
1111 std::unique_ptr
<Binary
> bin
= CHECK(createBinary(mb
), this);
1113 if (auto *obj
= dyn_cast
<COFFObjectFile
>(bin
.get())) {
1117 error(toString(this) + " is not a COFF file");
1121 if (!coffObj
->getPE32Header() && !coffObj
->getPE32PlusHeader()) {
1122 error(toString(this) + " is not a PE-COFF executable");
1126 for (const auto &exp
: coffObj
->export_directories()) {
1127 StringRef dllName
, symbolName
;
1129 checkError(exp
.getDllName(dllName
));
1130 checkError(exp
.getSymbolName(symbolName
));
1131 checkError(exp
.getExportRVA(exportRVA
));
1133 if (symbolName
.empty())
1136 bool code
= isRVACode(coffObj
.get(), exportRVA
, this);
1138 Symbol
*s
= make
<Symbol
>();
1139 s
->dllName
= dllName
;
1140 s
->symbolName
= symbolName
;
1141 s
->importType
= code
? ImportType::IMPORT_CODE
: ImportType::IMPORT_DATA
;
1142 s
->nameType
= ImportNameType::IMPORT_NAME
;
1144 if (coffObj
->getMachine() == I386
) {
1145 s
->symbolName
= symbolName
= saver().save("_" + symbolName
);
1146 s
->nameType
= ImportNameType::IMPORT_NAME_NOPREFIX
;
1149 StringRef impName
= saver().save("__imp_" + symbolName
);
1150 ctx
.symtab
.addLazyDLLSymbol(this, s
, impName
);
1152 ctx
.symtab
.addLazyDLLSymbol(this, s
, symbolName
);
1156 MachineTypes
DLLFile::getMachineType() {
1158 return static_cast<MachineTypes
>(coffObj
->getMachine());
1159 return IMAGE_FILE_MACHINE_UNKNOWN
;
1162 void DLLFile::makeImport(DLLFile::Symbol
*s
) {
1163 if (!seen
.insert(s
->symbolName
).second
)
1166 size_t impSize
= s
->dllName
.size() + s
->symbolName
.size() + 2; // +2 for NULs
1167 size_t size
= sizeof(coff_import_header
) + impSize
;
1168 char *buf
= bAlloc().Allocate
<char>(size
);
1169 memset(buf
, 0, size
);
1171 auto *imp
= reinterpret_cast<coff_import_header
*>(p
);
1174 imp
->Machine
= coffObj
->getMachine();
1175 imp
->SizeOfData
= impSize
;
1176 imp
->OrdinalHint
= 0; // Only linking by name
1177 imp
->TypeInfo
= (s
->nameType
<< 2) | s
->importType
;
1179 // Write symbol name and DLL name.
1180 memcpy(p
, s
->symbolName
.data(), s
->symbolName
.size());
1181 p
+= s
->symbolName
.size() + 1;
1182 memcpy(p
, s
->dllName
.data(), s
->dllName
.size());
1183 MemoryBufferRef mbref
= MemoryBufferRef(StringRef(buf
, size
), s
->dllName
);
1184 ImportFile
*impFile
= make
<ImportFile
>(ctx
, mbref
);
1185 ctx
.symtab
.addFile(impFile
);