1 //===- InputFiles.cpp -----------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "InputFiles.h"
10 #include "COFFLinkerContext.h"
13 #include "DebugTypes.h"
15 #include "SymbolTable.h"
17 #include "lld/Common/DWARF.h"
18 #include "llvm-c/lto.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/Triple.h"
21 #include "llvm/ADT/Twine.h"
22 #include "llvm/BinaryFormat/COFF.h"
23 #include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
24 #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
25 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
26 #include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
27 #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
28 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
29 #include "llvm/LTO/LTO.h"
30 #include "llvm/Object/Binary.h"
31 #include "llvm/Object/COFF.h"
32 #include "llvm/Support/Casting.h"
33 #include "llvm/Support/Endian.h"
34 #include "llvm/Support/Error.h"
35 #include "llvm/Support/ErrorOr.h"
36 #include "llvm/Support/FileSystem.h"
37 #include "llvm/Support/Path.h"
38 #include "llvm/Target/TargetOptions.h"
40 #include <system_error>
44 using namespace llvm::COFF
;
45 using namespace llvm::codeview
;
46 using namespace llvm::object
;
47 using namespace llvm::support::endian
;
49 using namespace lld::coff
;
52 using llvm::support::ulittle32_t
;
54 // Returns the last element of a path, which is supposed to be a filename.
55 static StringRef
getBasename(StringRef path
) {
56 return sys::path::filename(path
, sys::path::Style::windows
);
59 // Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)".
60 std::string
lld::toString(const coff::InputFile
*file
) {
63 if (file
->parentName
.empty() || file
->kind() == coff::InputFile::ImportKind
)
64 return std::string(file
->getName());
66 return (getBasename(file
->parentName
) + "(" + getBasename(file
->getName()) +
71 /// Checks that Source is compatible with being a weak alias to Target.
72 /// If Source is Undefined and has no weak alias set, makes it a weak
74 static void checkAndSetWeakAlias(SymbolTable
*symtab
, InputFile
*f
,
75 Symbol
*source
, Symbol
*target
) {
76 if (auto *u
= dyn_cast
<Undefined
>(source
)) {
77 if (u
->weakAlias
&& u
->weakAlias
!= target
) {
78 // Weak aliases as produced by GCC are named in the form
79 // .weak.<weaksymbol>.<othersymbol>, where <othersymbol> is the name
80 // of another symbol emitted near the weak symbol.
81 // Just use the definition from the first object file that defined
85 symtab
->reportDuplicate(source
, f
);
87 u
->weakAlias
= target
;
91 static bool ignoredSymbolName(StringRef name
) {
92 return name
== "@feat.00" || name
== "@comp.id";
95 ArchiveFile::ArchiveFile(COFFLinkerContext
&ctx
, MemoryBufferRef m
)
96 : InputFile(ctx
, ArchiveKind
, m
) {}
98 void ArchiveFile::parse() {
99 // Parse a MemoryBufferRef as an archive file.
100 file
= CHECK(Archive::create(mb
), this);
102 // Read the symbol table to construct Lazy objects.
103 for (const Archive::Symbol
&sym
: file
->symbols())
104 ctx
.symtab
.addLazyArchive(this, sym
);
107 // Returns a buffer pointing to a member file containing a given symbol.
108 void ArchiveFile::addMember(const Archive::Symbol
&sym
) {
109 const Archive::Child
&c
=
110 CHECK(sym
.getMember(),
111 "could not get the member for symbol " + toCOFFString(sym
));
113 // Return an empty buffer if we have already returned the same buffer.
114 if (!seen
.insert(c
.getChildOffset()).second
)
117 driver
->enqueueArchiveMember(c
, sym
, getName());
120 std::vector
<MemoryBufferRef
> lld::coff::getArchiveMembers(Archive
*file
) {
121 std::vector
<MemoryBufferRef
> v
;
122 Error err
= Error::success();
123 for (const Archive::Child
&c
: file
->children(err
)) {
124 MemoryBufferRef mbref
=
125 CHECK(c
.getMemoryBufferRef(),
126 file
->getFileName() +
127 ": could not get the buffer for a child of the archive");
131 fatal(file
->getFileName() +
132 ": Archive::children failed: " + toString(std::move(err
)));
136 void ObjFile::parseLazy() {
137 // Native object file.
138 std::unique_ptr
<Binary
> coffObjPtr
= CHECK(createBinary(mb
), this);
139 COFFObjectFile
*coffObj
= cast
<COFFObjectFile
>(coffObjPtr
.get());
140 uint32_t numSymbols
= coffObj
->getNumberOfSymbols();
141 for (uint32_t i
= 0; i
< numSymbols
; ++i
) {
142 COFFSymbolRef coffSym
= check(coffObj
->getSymbol(i
));
143 if (coffSym
.isUndefined() || !coffSym
.isExternal() ||
144 coffSym
.isWeakExternal())
146 StringRef name
= check(coffObj
->getSymbolName(coffSym
));
147 if (coffSym
.isAbsolute() && ignoredSymbolName(name
))
149 ctx
.symtab
.addLazyObject(this, name
);
150 i
+= coffSym
.getNumberOfAuxSymbols();
154 void ObjFile::parse() {
155 // Parse a memory buffer as a COFF file.
156 std::unique_ptr
<Binary
> bin
= CHECK(createBinary(mb
), this);
158 if (auto *obj
= dyn_cast
<COFFObjectFile
>(bin
.get())) {
162 fatal(toString(this) + " is not a COFF file");
165 // Read section and symbol tables.
169 initializeDependencies();
172 const coff_section
*ObjFile::getSection(uint32_t i
) {
173 auto sec
= coffObj
->getSection(i
);
175 fatal("getSection failed: #" + Twine(i
) + ": " + toString(sec
.takeError()));
179 // We set SectionChunk pointers in the SparseChunks vector to this value
180 // temporarily to mark comdat sections as having an unknown resolution. As we
181 // walk the object file's symbol table, once we visit either a leader symbol or
182 // an associative section definition together with the parent comdat's leader,
183 // we set the pointer to either nullptr (to mark the section as discarded) or a
184 // valid SectionChunk for that section.
185 static SectionChunk
*const pendingComdat
= reinterpret_cast<SectionChunk
*>(1);
187 void ObjFile::initializeChunks() {
188 uint32_t numSections
= coffObj
->getNumberOfSections();
189 sparseChunks
.resize(numSections
+ 1);
190 for (uint32_t i
= 1; i
< numSections
+ 1; ++i
) {
191 const coff_section
*sec
= getSection(i
);
192 if (sec
->Characteristics
& IMAGE_SCN_LNK_COMDAT
)
193 sparseChunks
[i
] = pendingComdat
;
195 sparseChunks
[i
] = readSection(i
, nullptr, "");
199 SectionChunk
*ObjFile::readSection(uint32_t sectionNumber
,
200 const coff_aux_section_definition
*def
,
201 StringRef leaderName
) {
202 const coff_section
*sec
= getSection(sectionNumber
);
205 if (Expected
<StringRef
> e
= coffObj
->getSectionName(sec
))
208 fatal("getSectionName failed: #" + Twine(sectionNumber
) + ": " +
209 toString(e
.takeError()));
211 if (name
== ".drectve") {
212 ArrayRef
<uint8_t> data
;
213 cantFail(coffObj
->getSectionContents(sec
, data
));
214 directives
= StringRef((const char *)data
.data(), data
.size());
218 if (name
== ".llvm_addrsig") {
223 if (name
== ".llvm.call-graph-profile") {
228 // Object files may have DWARF debug info or MS CodeView debug info
231 // DWARF sections don't need any special handling from the perspective
232 // of the linker; they are just a data section containing relocations.
233 // We can just link them to complete debug info.
235 // CodeView needs linker support. We need to interpret debug info,
236 // and then write it to a separate .pdb file.
238 // Ignore DWARF debug info unless /debug is given.
239 if (!config
->debug
&& name
.startswith(".debug_"))
242 if (sec
->Characteristics
& llvm::COFF::IMAGE_SCN_LNK_REMOVE
)
244 auto *c
= make
<SectionChunk
>(this, sec
);
246 c
->checksum
= def
->CheckSum
;
248 // CodeView sections are stored to a different vector because they are not
249 // linked in the regular manner.
251 debugChunks
.push_back(c
);
252 else if (name
== ".gfids$y")
253 guardFidChunks
.push_back(c
);
254 else if (name
== ".giats$y")
255 guardIATChunks
.push_back(c
);
256 else if (name
== ".gljmp$y")
257 guardLJmpChunks
.push_back(c
);
258 else if (name
== ".gehcont$y")
259 guardEHContChunks
.push_back(c
);
260 else if (name
== ".sxdata")
261 sxDataChunks
.push_back(c
);
262 else if (config
->tailMerge
&& sec
->NumberOfRelocations
== 0 &&
263 name
== ".rdata" && leaderName
.startswith("??_C@"))
264 // COFF sections that look like string literal sections (i.e. no
265 // relocations, in .rdata, leader symbol name matches the MSVC name mangling
266 // for string literals) are subject to string tail merging.
267 MergeChunk::addSection(ctx
, c
);
268 else if (name
== ".rsrc" || name
.startswith(".rsrc$"))
269 resourceChunks
.push_back(c
);
276 void ObjFile::includeResourceChunks() {
277 chunks
.insert(chunks
.end(), resourceChunks
.begin(), resourceChunks
.end());
280 void ObjFile::readAssociativeDefinition(
281 COFFSymbolRef sym
, const coff_aux_section_definition
*def
) {
282 readAssociativeDefinition(sym
, def
, def
->getNumber(sym
.isBigObj()));
285 void ObjFile::readAssociativeDefinition(COFFSymbolRef sym
,
286 const coff_aux_section_definition
*def
,
287 uint32_t parentIndex
) {
288 SectionChunk
*parent
= sparseChunks
[parentIndex
];
289 int32_t sectionNumber
= sym
.getSectionNumber();
292 StringRef name
= check(coffObj
->getSymbolName(sym
));
294 StringRef parentName
;
295 const coff_section
*parentSec
= getSection(parentIndex
);
296 if (Expected
<StringRef
> e
= coffObj
->getSectionName(parentSec
))
298 error(toString(this) + ": associative comdat " + name
+ " (sec " +
299 Twine(sectionNumber
) + ") has invalid reference to section " +
300 parentName
+ " (sec " + Twine(parentIndex
) + ")");
303 if (parent
== pendingComdat
) {
304 // This can happen if an associative comdat refers to another associative
305 // comdat that appears after it (invalid per COFF spec) or to a section
306 // without any symbols.
311 // Check whether the parent is prevailing. If it is, so are we, and we read
312 // the section; otherwise mark it as discarded.
314 SectionChunk
*c
= readSection(sectionNumber
, def
, "");
315 sparseChunks
[sectionNumber
] = c
;
317 c
->selection
= IMAGE_COMDAT_SELECT_ASSOCIATIVE
;
318 parent
->addAssociative(c
);
321 sparseChunks
[sectionNumber
] = nullptr;
325 void ObjFile::recordPrevailingSymbolForMingw(
326 COFFSymbolRef sym
, DenseMap
<StringRef
, uint32_t> &prevailingSectionMap
) {
327 // For comdat symbols in executable sections, where this is the copy
328 // of the section chunk we actually include instead of discarding it,
329 // add the symbol to a map to allow using it for implicitly
330 // associating .[px]data$<func> sections to it.
331 // Use the suffix from the .text$<func> instead of the leader symbol
332 // name, for cases where the names differ (i386 mangling/decorations,
333 // cases where the leader is a weak symbol named .weak.func.default*).
334 int32_t sectionNumber
= sym
.getSectionNumber();
335 SectionChunk
*sc
= sparseChunks
[sectionNumber
];
336 if (sc
&& sc
->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE
) {
337 StringRef name
= sc
->getSectionName().split('$').second
;
338 prevailingSectionMap
[name
] = sectionNumber
;
342 void ObjFile::maybeAssociateSEHForMingw(
343 COFFSymbolRef sym
, const coff_aux_section_definition
*def
,
344 const DenseMap
<StringRef
, uint32_t> &prevailingSectionMap
) {
345 StringRef name
= check(coffObj
->getSymbolName(sym
));
346 if (name
.consume_front(".pdata$") || name
.consume_front(".xdata$") ||
347 name
.consume_front(".eh_frame$")) {
348 // For MinGW, treat .[px]data$<func> and .eh_frame$<func> as implicitly
349 // associative to the symbol <func>.
350 auto parentSym
= prevailingSectionMap
.find(name
);
351 if (parentSym
!= prevailingSectionMap
.end())
352 readAssociativeDefinition(sym
, def
, parentSym
->second
);
356 Symbol
*ObjFile::createRegular(COFFSymbolRef sym
) {
357 SectionChunk
*sc
= sparseChunks
[sym
.getSectionNumber()];
358 if (sym
.isExternal()) {
359 StringRef name
= check(coffObj
->getSymbolName(sym
));
361 return ctx
.symtab
.addRegular(this, name
, sym
.getGeneric(), sc
,
363 // For MinGW symbols named .weak.* that point to a discarded section,
364 // don't create an Undefined symbol. If nothing ever refers to the symbol,
365 // everything should be fine. If something actually refers to the symbol
366 // (e.g. the undefined weak alias), linking will fail due to undefined
367 // references at the end.
368 if (config
->mingw
&& name
.startswith(".weak."))
370 return ctx
.symtab
.addUndefined(name
, this, false);
373 return make
<DefinedRegular
>(this, /*Name*/ "", /*IsCOMDAT*/ false,
374 /*IsExternal*/ false, sym
.getGeneric(), sc
);
378 void ObjFile::initializeSymbols() {
379 uint32_t numSymbols
= coffObj
->getNumberOfSymbols();
380 symbols
.resize(numSymbols
);
382 SmallVector
<std::pair
<Symbol
*, uint32_t>, 8> weakAliases
;
383 std::vector
<uint32_t> pendingIndexes
;
384 pendingIndexes
.reserve(numSymbols
);
386 DenseMap
<StringRef
, uint32_t> prevailingSectionMap
;
387 std::vector
<const coff_aux_section_definition
*> comdatDefs(
388 coffObj
->getNumberOfSections() + 1);
390 for (uint32_t i
= 0; i
< numSymbols
; ++i
) {
391 COFFSymbolRef coffSym
= check(coffObj
->getSymbol(i
));
392 bool prevailingComdat
;
393 if (coffSym
.isUndefined()) {
394 symbols
[i
] = createUndefined(coffSym
);
395 } else if (coffSym
.isWeakExternal()) {
396 symbols
[i
] = createUndefined(coffSym
);
397 uint32_t tagIndex
= coffSym
.getAux
<coff_aux_weak_external
>()->TagIndex
;
398 weakAliases
.emplace_back(symbols
[i
], tagIndex
);
399 } else if (Optional
<Symbol
*> optSym
=
400 createDefined(coffSym
, comdatDefs
, prevailingComdat
)) {
401 symbols
[i
] = *optSym
;
402 if (config
->mingw
&& prevailingComdat
)
403 recordPrevailingSymbolForMingw(coffSym
, prevailingSectionMap
);
405 // createDefined() returns None if a symbol belongs to a section that
406 // was pending at the point when the symbol was read. This can happen in
408 // 1) section definition symbol for a comdat leader;
409 // 2) symbol belongs to a comdat section associated with another section.
410 // In both of these cases, we can expect the section to be resolved by
411 // the time we finish visiting the remaining symbols in the symbol
412 // table. So we postpone the handling of this symbol until that time.
413 pendingIndexes
.push_back(i
);
415 i
+= coffSym
.getNumberOfAuxSymbols();
418 for (uint32_t i
: pendingIndexes
) {
419 COFFSymbolRef sym
= check(coffObj
->getSymbol(i
));
420 if (const coff_aux_section_definition
*def
= sym
.getSectionDefinition()) {
421 if (def
->Selection
== IMAGE_COMDAT_SELECT_ASSOCIATIVE
)
422 readAssociativeDefinition(sym
, def
);
423 else if (config
->mingw
)
424 maybeAssociateSEHForMingw(sym
, def
, prevailingSectionMap
);
426 if (sparseChunks
[sym
.getSectionNumber()] == pendingComdat
) {
427 StringRef name
= check(coffObj
->getSymbolName(sym
));
428 log("comdat section " + name
+
429 " without leader and unassociated, discarding");
432 symbols
[i
] = createRegular(sym
);
435 for (auto &kv
: weakAliases
) {
436 Symbol
*sym
= kv
.first
;
437 uint32_t idx
= kv
.second
;
438 checkAndSetWeakAlias(&ctx
.symtab
, this, sym
, symbols
[idx
]);
441 // Free the memory used by sparseChunks now that symbol loading is finished.
442 decltype(sparseChunks
)().swap(sparseChunks
);
445 Symbol
*ObjFile::createUndefined(COFFSymbolRef sym
) {
446 StringRef name
= check(coffObj
->getSymbolName(sym
));
447 return ctx
.symtab
.addUndefined(name
, this, sym
.isWeakExternal());
450 static const coff_aux_section_definition
*findSectionDef(COFFObjectFile
*obj
,
452 uint32_t numSymbols
= obj
->getNumberOfSymbols();
453 for (uint32_t i
= 0; i
< numSymbols
; ++i
) {
454 COFFSymbolRef sym
= check(obj
->getSymbol(i
));
455 if (sym
.getSectionNumber() != section
)
457 if (const coff_aux_section_definition
*def
= sym
.getSectionDefinition())
463 void ObjFile::handleComdatSelection(
464 COFFSymbolRef sym
, COMDATType
&selection
, bool &prevailing
,
465 DefinedRegular
*leader
,
466 const llvm::object::coff_aux_section_definition
*def
) {
469 // There's already an existing comdat for this symbol: `Leader`.
470 // Use the comdats's selection field to determine if the new
471 // symbol in `Sym` should be discarded, produce a duplicate symbol
474 SectionChunk
*leaderChunk
= leader
->getChunk();
475 COMDATType leaderSelection
= leaderChunk
->selection
;
477 assert(leader
->data
&& "Comdat leader without SectionChunk?");
478 if (isa
<BitcodeFile
>(leader
->file
)) {
479 // If the leader is only a LTO symbol, we don't know e.g. its final size
480 // yet, so we can't do the full strict comdat selection checking yet.
481 selection
= leaderSelection
= IMAGE_COMDAT_SELECT_ANY
;
484 if ((selection
== IMAGE_COMDAT_SELECT_ANY
&&
485 leaderSelection
== IMAGE_COMDAT_SELECT_LARGEST
) ||
486 (selection
== IMAGE_COMDAT_SELECT_LARGEST
&&
487 leaderSelection
== IMAGE_COMDAT_SELECT_ANY
)) {
488 // cl.exe picks "any" for vftables when building with /GR- and
489 // "largest" when building with /GR. To be able to link object files
490 // compiled with each flag, "any" and "largest" are merged as "largest".
491 leaderSelection
= selection
= IMAGE_COMDAT_SELECT_LARGEST
;
494 // GCCs __declspec(selectany) doesn't actually pick "any" but "same size as".
495 // Clang on the other hand picks "any". To be able to link two object files
496 // with a __declspec(selectany) declaration, one compiled with gcc and the
497 // other with clang, we merge them as proper "same size as"
498 if (config
->mingw
&& ((selection
== IMAGE_COMDAT_SELECT_ANY
&&
499 leaderSelection
== IMAGE_COMDAT_SELECT_SAME_SIZE
) ||
500 (selection
== IMAGE_COMDAT_SELECT_SAME_SIZE
&&
501 leaderSelection
== IMAGE_COMDAT_SELECT_ANY
))) {
502 leaderSelection
= selection
= IMAGE_COMDAT_SELECT_SAME_SIZE
;
505 // Other than that, comdat selections must match. This is a bit more
506 // strict than link.exe which allows merging "any" and "largest" if "any"
507 // is the first symbol the linker sees, and it allows merging "largest"
508 // with everything (!) if "largest" is the first symbol the linker sees.
509 // Making this symmetric independent of which selection is seen first
510 // seems better though.
511 // (This behavior matches ModuleLinker::getComdatResult().)
512 if (selection
!= leaderSelection
) {
513 log(("conflicting comdat type for " + toString(*leader
) + ": " +
514 Twine((int)leaderSelection
) + " in " + toString(leader
->getFile()) +
515 " and " + Twine((int)selection
) + " in " + toString(this))
517 ctx
.symtab
.reportDuplicate(leader
, this);
522 case IMAGE_COMDAT_SELECT_NODUPLICATES
:
523 ctx
.symtab
.reportDuplicate(leader
, this);
526 case IMAGE_COMDAT_SELECT_ANY
:
530 case IMAGE_COMDAT_SELECT_SAME_SIZE
:
531 if (leaderChunk
->getSize() != getSection(sym
)->SizeOfRawData
) {
532 if (!config
->mingw
) {
533 ctx
.symtab
.reportDuplicate(leader
, this);
535 const coff_aux_section_definition
*leaderDef
= nullptr;
536 if (leaderChunk
->file
)
537 leaderDef
= findSectionDef(leaderChunk
->file
->getCOFFObj(),
538 leaderChunk
->getSectionNumber());
539 if (!leaderDef
|| leaderDef
->Length
!= def
->Length
)
540 ctx
.symtab
.reportDuplicate(leader
, this);
545 case IMAGE_COMDAT_SELECT_EXACT_MATCH
: {
546 SectionChunk
newChunk(this, getSection(sym
));
547 // link.exe only compares section contents here and doesn't complain
548 // if the two comdat sections have e.g. different alignment.
550 if (leaderChunk
->getContents() != newChunk
.getContents())
551 ctx
.symtab
.reportDuplicate(leader
, this, &newChunk
, sym
.getValue());
555 case IMAGE_COMDAT_SELECT_ASSOCIATIVE
:
556 // createDefined() is never called for IMAGE_COMDAT_SELECT_ASSOCIATIVE.
557 // (This means lld-link doesn't produce duplicate symbol errors for
558 // associative comdats while link.exe does, but associate comdats
559 // are never extern in practice.)
560 llvm_unreachable("createDefined not called for associative comdats");
562 case IMAGE_COMDAT_SELECT_LARGEST
:
563 if (leaderChunk
->getSize() < getSection(sym
)->SizeOfRawData
) {
564 // Replace the existing comdat symbol with the new one.
565 StringRef name
= check(coffObj
->getSymbolName(sym
));
566 // FIXME: This is incorrect: With /opt:noref, the previous sections
567 // make it into the final executable as well. Correct handling would
568 // be to undo reading of the whole old section that's being replaced,
569 // or doing one pass that determines what the final largest comdat
570 // is for all IMAGE_COMDAT_SELECT_LARGEST comdats and then reading
571 // only the largest one.
572 replaceSymbol
<DefinedRegular
>(leader
, this, name
, /*IsCOMDAT*/ true,
573 /*IsExternal*/ true, sym
.getGeneric(),
579 case IMAGE_COMDAT_SELECT_NEWEST
:
580 llvm_unreachable("should have been rejected earlier");
584 Optional
<Symbol
*> ObjFile::createDefined(
586 std::vector
<const coff_aux_section_definition
*> &comdatDefs
,
589 auto getName
= [&]() { return check(coffObj
->getSymbolName(sym
)); };
591 if (sym
.isCommon()) {
592 auto *c
= make
<CommonChunk
>(sym
);
594 return ctx
.symtab
.addCommon(this, getName(), sym
.getValue(),
595 sym
.getGeneric(), c
);
598 if (sym
.isAbsolute()) {
599 StringRef name
= getName();
601 if (name
== "@feat.00")
602 feat00Flags
= sym
.getValue();
603 // Skip special symbols.
604 if (ignoredSymbolName(name
))
607 if (sym
.isExternal())
608 return ctx
.symtab
.addAbsolute(name
, sym
);
609 return make
<DefinedAbsolute
>(name
, sym
);
612 int32_t sectionNumber
= sym
.getSectionNumber();
613 if (sectionNumber
== llvm::COFF::IMAGE_SYM_DEBUG
)
616 if (llvm::COFF::isReservedSectionNumber(sectionNumber
))
617 fatal(toString(this) + ": " + getName() +
618 " should not refer to special section " + Twine(sectionNumber
));
620 if ((uint32_t)sectionNumber
>= sparseChunks
.size())
621 fatal(toString(this) + ": " + getName() +
622 " should not refer to non-existent section " + Twine(sectionNumber
));
625 // A comdat symbol consists of two symbol table entries.
626 // The first symbol entry has the name of the section (e.g. .text), fixed
627 // values for the other fields, and one auxiliary record.
628 // The second symbol entry has the name of the comdat symbol, called the
630 // When this function is called for the first symbol entry of a comdat,
631 // it sets comdatDefs and returns None, and when it's called for the second
632 // symbol entry it reads comdatDefs and then sets it back to nullptr.
634 // Handle comdat leader.
635 if (const coff_aux_section_definition
*def
= comdatDefs
[sectionNumber
]) {
636 comdatDefs
[sectionNumber
] = nullptr;
637 DefinedRegular
*leader
;
639 if (sym
.isExternal()) {
640 std::tie(leader
, prevailing
) =
641 ctx
.symtab
.addComdat(this, getName(), sym
.getGeneric());
643 leader
= make
<DefinedRegular
>(this, /*Name*/ "", /*IsCOMDAT*/ false,
644 /*IsExternal*/ false, sym
.getGeneric());
648 if (def
->Selection
< (int)IMAGE_COMDAT_SELECT_NODUPLICATES
||
649 // Intentionally ends at IMAGE_COMDAT_SELECT_LARGEST: link.exe
650 // doesn't understand IMAGE_COMDAT_SELECT_NEWEST either.
651 def
->Selection
> (int)IMAGE_COMDAT_SELECT_LARGEST
) {
652 fatal("unknown comdat type " + std::to_string((int)def
->Selection
) +
653 " for " + getName() + " in " + toString(this));
655 COMDATType selection
= (COMDATType
)def
->Selection
;
657 if (leader
->isCOMDAT
)
658 handleComdatSelection(sym
, selection
, prevailing
, leader
, def
);
661 SectionChunk
*c
= readSection(sectionNumber
, def
, getName());
662 sparseChunks
[sectionNumber
] = c
;
663 c
->sym
= cast
<DefinedRegular
>(leader
);
664 c
->selection
= selection
;
665 cast
<DefinedRegular
>(leader
)->data
= &c
->repl
;
667 sparseChunks
[sectionNumber
] = nullptr;
672 // Prepare to handle the comdat leader symbol by setting the section's
673 // ComdatDefs pointer if we encounter a non-associative comdat.
674 if (sparseChunks
[sectionNumber
] == pendingComdat
) {
675 if (const coff_aux_section_definition
*def
= sym
.getSectionDefinition()) {
676 if (def
->Selection
!= IMAGE_COMDAT_SELECT_ASSOCIATIVE
)
677 comdatDefs
[sectionNumber
] = def
;
682 return createRegular(sym
);
685 MachineTypes
ObjFile::getMachineType() {
687 return static_cast<MachineTypes
>(coffObj
->getMachine());
688 return IMAGE_FILE_MACHINE_UNKNOWN
;
691 ArrayRef
<uint8_t> ObjFile::getDebugSection(StringRef secName
) {
692 if (SectionChunk
*sec
= SectionChunk::findByName(debugChunks
, secName
))
693 return sec
->consumeDebugMagic();
697 // OBJ files systematically store critical information in a .debug$S stream,
698 // even if the TU was compiled with no debug info. At least two records are
699 // always there. S_OBJNAME stores a 32-bit signature, which is loaded into the
700 // PCHSignature member. S_COMPILE3 stores compile-time cmd-line flags. This is
701 // currently used to initialize the hotPatchable member.
702 void ObjFile::initializeFlags() {
703 ArrayRef
<uint8_t> data
= getDebugSection(".debug$S");
707 DebugSubsectionArray subsections
;
709 BinaryStreamReader
reader(data
, support::little
);
710 ExitOnError exitOnErr
;
711 exitOnErr(reader
.readArray(subsections
, data
.size()));
713 for (const DebugSubsectionRecord
&ss
: subsections
) {
714 if (ss
.kind() != DebugSubsectionKind::Symbols
)
719 // Only parse the first two records. We are only looking for S_OBJNAME
720 // and S_COMPILE3, and they usually appear at the beginning of the
722 for (unsigned i
= 0; i
< 2; ++i
) {
723 Expected
<CVSymbol
> sym
= readSymbolFromStream(ss
.getRecordData(), offset
);
725 consumeError(sym
.takeError());
728 if (sym
->kind() == SymbolKind::S_COMPILE3
) {
730 cantFail(SymbolDeserializer::deserializeAs
<Compile3Sym
>(sym
.get()));
732 (cs
.Flags
& CompileSym3Flags::HotPatch
) != CompileSym3Flags::None
;
734 if (sym
->kind() == SymbolKind::S_OBJNAME
) {
735 auto objName
= cantFail(SymbolDeserializer::deserializeAs
<ObjNameSym
>(
737 pchSignature
= objName
.Signature
;
739 offset
+= sym
->length();
744 // Depending on the compilation flags, OBJs can refer to external files,
745 // necessary to merge this OBJ into the final PDB. We currently support two
746 // types of external files: Precomp/PCH OBJs, when compiling with /Yc and /Yu.
747 // And PDB type servers, when compiling with /Zi. This function extracts these
748 // dependencies and makes them available as a TpiSource interface (see
749 // DebugTypes.h). Both cases only happen with cl.exe: clang-cl produces regular
750 // output even with /Yc and /Yu and with /Zi.
751 void ObjFile::initializeDependencies() {
757 ArrayRef
<uint8_t> data
= getDebugSection(".debug$P");
761 data
= getDebugSection(".debug$T");
763 // symbols but no types, make a plain, empty TpiSource anyway, because it
764 // simplifies adding the symbols later.
766 if (!debugChunks
.empty())
767 debugTypesObj
= makeTpiSource(ctx
, this);
771 // Get the first type record. It will indicate if this object uses a type
772 // server (/Zi) or a PCH file (/Yu).
774 BinaryStreamReader
reader(data
, support::little
);
775 cantFail(reader
.readArray(types
, reader
.getLength()));
776 CVTypeArray::Iterator firstType
= types
.begin();
777 if (firstType
== types
.end())
780 // Remember the .debug$T or .debug$P section.
783 // This object file is a PCH file that others will depend on.
785 debugTypesObj
= makePrecompSource(ctx
, this);
789 // This object file was compiled with /Zi. Enqueue the PDB dependency.
790 if (firstType
->kind() == LF_TYPESERVER2
) {
791 TypeServer2Record ts
= cantFail(
792 TypeDeserializer::deserializeAs
<TypeServer2Record
>(firstType
->data()));
793 debugTypesObj
= makeUseTypeServerSource(ctx
, this, ts
);
794 enqueuePdbFile(ts
.getName(), this);
798 // This object was compiled with /Yu. It uses types from another object file
799 // with a matching signature.
800 if (firstType
->kind() == LF_PRECOMP
) {
801 PrecompRecord precomp
= cantFail(
802 TypeDeserializer::deserializeAs
<PrecompRecord
>(firstType
->data()));
803 debugTypesObj
= makeUsePrecompSource(ctx
, this, precomp
);
804 // Drop the LF_PRECOMP record from the input stream.
805 debugTypes
= debugTypes
.drop_front(firstType
->RecordData
.size());
809 // This is a plain old object file.
810 debugTypesObj
= makeTpiSource(ctx
, this);
813 // Make a PDB path assuming the PDB is in the same folder as the OBJ
814 static std::string
getPdbBaseName(ObjFile
*file
, StringRef tSPath
) {
815 StringRef localPath
=
816 !file
->parentName
.empty() ? file
->parentName
: file
->getName();
817 SmallString
<128> path
= sys::path::parent_path(localPath
);
819 // Currently, type server PDBs are only created by MSVC cl, which only runs
820 // on Windows, so we can assume type server paths are Windows style.
821 sys::path::append(path
,
822 sys::path::filename(tSPath
, sys::path::Style::windows
));
823 return std::string(path
.str());
826 // The casing of the PDB path stamped in the OBJ can differ from the actual path
827 // on disk. With this, we ensure to always use lowercase as a key for the
828 // pdbInputFileInstances map, at least on Windows.
829 static std::string
normalizePdbPath(StringRef path
) {
833 return std::string(path
);
837 // If existing, return the actual PDB path on disk.
838 static Optional
<std::string
> findPdbPath(StringRef pdbPath
,
839 ObjFile
*dependentFile
) {
840 // Ensure the file exists before anything else. In some cases, if the path
841 // points to a removable device, Driver::enqueuePath() would fail with an
842 // error (EAGAIN, "resource unavailable try again") which we want to skip
844 if (llvm::sys::fs::exists(pdbPath
))
845 return normalizePdbPath(pdbPath
);
846 std::string ret
= getPdbBaseName(dependentFile
, pdbPath
);
847 if (llvm::sys::fs::exists(ret
))
848 return normalizePdbPath(ret
);
852 PDBInputFile::PDBInputFile(COFFLinkerContext
&ctx
, MemoryBufferRef m
)
853 : InputFile(ctx
, PDBKind
, m
) {}
855 PDBInputFile::~PDBInputFile() = default;
857 PDBInputFile
*PDBInputFile::findFromRecordPath(const COFFLinkerContext
&ctx
,
860 auto p
= findPdbPath(path
.str(), fromFile
);
863 auto it
= ctx
.pdbInputFileInstances
.find(*p
);
864 if (it
!= ctx
.pdbInputFileInstances
.end())
869 void PDBInputFile::parse() {
870 ctx
.pdbInputFileInstances
[mb
.getBufferIdentifier().str()] = this;
872 std::unique_ptr
<pdb::IPDBSession
> thisSession
;
873 loadErr
.emplace(pdb::NativeSession::createFromPdb(
874 MemoryBuffer::getMemBuffer(mb
, false), thisSession
));
876 return; // fail silently at this point - the error will be handled later,
877 // when merging the debug type stream
879 session
.reset(static_cast<pdb::NativeSession
*>(thisSession
.release()));
881 pdb::PDBFile
&pdbFile
= session
->getPDBFile();
882 auto expectedInfo
= pdbFile
.getPDBInfoStream();
883 // All PDB Files should have an Info stream.
885 loadErr
.emplace(expectedInfo
.takeError());
888 debugTypesObj
= makeTypeServerSource(ctx
, this);
891 // Used only for DWARF debug info, which is not common (except in MinGW
892 // environments). This returns an optional pair of file name and line
893 // number for where the variable was defined.
894 Optional
<std::pair
<StringRef
, uint32_t>>
895 ObjFile::getVariableLocation(StringRef var
) {
897 dwarf
= make
<DWARFCache
>(DWARFContext::create(*getCOFFObj()));
901 if (config
->machine
== I386
)
902 var
.consume_front("_");
903 Optional
<std::pair
<std::string
, unsigned>> ret
= dwarf
->getVariableLoc(var
);
906 return std::make_pair(saver().save(ret
->first
), ret
->second
);
909 // Used only for DWARF debug info, which is not common (except in MinGW
911 Optional
<DILineInfo
> ObjFile::getDILineInfo(uint32_t offset
,
912 uint32_t sectionIndex
) {
914 dwarf
= make
<DWARFCache
>(DWARFContext::create(*getCOFFObj()));
919 return dwarf
->getDILineInfo(offset
, sectionIndex
);
922 void ObjFile::enqueuePdbFile(StringRef path
, ObjFile
*fromFile
) {
923 auto p
= findPdbPath(path
.str(), fromFile
);
926 auto it
= ctx
.pdbInputFileInstances
.emplace(*p
, nullptr);
928 return; // already scheduled for load
929 driver
->enqueuePDB(*p
);
932 void ImportFile::parse() {
933 const char *buf
= mb
.getBufferStart();
934 const auto *hdr
= reinterpret_cast<const coff_import_header
*>(buf
);
936 // Check if the total size is valid.
937 if (mb
.getBufferSize() != sizeof(*hdr
) + hdr
->SizeOfData
)
938 fatal("broken import library");
940 // Read names and create an __imp_ symbol.
941 StringRef name
= saver().save(StringRef(buf
+ sizeof(*hdr
)));
942 StringRef impName
= saver().save("__imp_" + name
);
943 const char *nameStart
= buf
+ sizeof(coff_import_header
) + name
.size() + 1;
944 dllName
= std::string(StringRef(nameStart
));
946 switch (hdr
->getNameType()) {
953 case IMPORT_NAME_NOPREFIX
:
954 extName
= ltrim1(name
, "?@_");
956 case IMPORT_NAME_UNDECORATE
:
957 extName
= ltrim1(name
, "?@_");
958 extName
= extName
.substr(0, extName
.find('@'));
963 externalName
= extName
;
965 impSym
= ctx
.symtab
.addImportData(impName
, this);
966 // If this was a duplicate, we logged an error but may continue;
967 // in this case, impSym is nullptr.
971 if (hdr
->getType() == llvm::COFF::IMPORT_CONST
)
972 static_cast<void>(ctx
.symtab
.addImportData(name
, this));
974 // If type is function, we need to create a thunk which jump to an
975 // address pointed by the __imp_ symbol. (This allows you to call
976 // DLL functions just like regular non-DLL functions.)
977 if (hdr
->getType() == llvm::COFF::IMPORT_CODE
)
978 thunkSym
= ctx
.symtab
.addImportThunk(
979 name
, cast_or_null
<DefinedImportData
>(impSym
), hdr
->Machine
);
982 BitcodeFile::BitcodeFile(COFFLinkerContext
&ctx
, MemoryBufferRef mb
,
983 StringRef archiveName
, uint64_t offsetInArchive
,
985 : InputFile(ctx
, BitcodeKind
, mb
, lazy
) {
986 std::string path
= mb
.getBufferIdentifier().str();
987 if (config
->thinLTOIndexOnly
)
988 path
= replaceThinLTOSuffix(mb
.getBufferIdentifier());
990 // ThinLTO assumes that all MemoryBufferRefs given to it have a unique
991 // name. If two archives define two members with the same name, this
992 // causes a collision which result in only one of the objects being taken
993 // into consideration at LTO time (which very likely causes undefined
994 // symbols later in the link stage). So we append file offset to make
996 MemoryBufferRef
mbref(mb
.getBuffer(),
997 saver().save(archiveName
.empty()
1000 sys::path::filename(path
) +
1001 utostr(offsetInArchive
)));
1003 obj
= check(lto::InputFile::create(mbref
));
1006 BitcodeFile::~BitcodeFile() = default;
1009 // Convenience class for initializing a coff_section with specific flags.
1012 FakeSection(int c
) { section
.Characteristics
= c
; }
1014 coff_section section
;
1017 // Convenience class for initializing a SectionChunk with specific flags.
1018 class FakeSectionChunk
{
1020 FakeSectionChunk(const coff_section
*section
) : chunk(nullptr, section
) {
1021 // Comdats from LTO files can't be fully treated as regular comdats
1022 // at this point; we don't know what size or contents they are going to
1023 // have, so we can't do proper checking of such aspects of them.
1024 chunk
.selection
= IMAGE_COMDAT_SELECT_ANY
;
1030 FakeSection
ltoTextSection(IMAGE_SCN_MEM_EXECUTE
);
1031 FakeSection
ltoDataSection(IMAGE_SCN_CNT_INITIALIZED_DATA
);
1032 FakeSectionChunk
ltoTextSectionChunk(<oTextSection
.section
);
1033 FakeSectionChunk
ltoDataSectionChunk(<oDataSection
.section
);
1036 void BitcodeFile::parse() {
1037 llvm::StringSaver
&saver
= lld::saver();
1038 std::vector
<std::pair
<Symbol
*, bool>> comdat(obj
->getComdatTable().size());
1039 for (size_t i
= 0; i
!= obj
->getComdatTable().size(); ++i
)
1040 // FIXME: Check nodeduplicate
1042 ctx
.symtab
.addComdat(this, saver
.save(obj
->getComdatTable()[i
].first
));
1043 for (const lto::InputFile::Symbol
&objSym
: obj
->symbols()) {
1044 StringRef symName
= saver
.save(objSym
.getName());
1045 int comdatIndex
= objSym
.getComdatIndex();
1047 SectionChunk
*fakeSC
= nullptr;
1048 if (objSym
.isExecutable())
1049 fakeSC
= <oTextSectionChunk
.chunk
;
1051 fakeSC
= <oDataSectionChunk
.chunk
;
1052 if (objSym
.isUndefined()) {
1053 sym
= ctx
.symtab
.addUndefined(symName
, this, false);
1054 } else if (objSym
.isCommon()) {
1055 sym
= ctx
.symtab
.addCommon(this, symName
, objSym
.getCommonSize());
1056 } else if (objSym
.isWeak() && objSym
.isIndirect()) {
1058 sym
= ctx
.symtab
.addUndefined(symName
, this, true);
1059 std::string fallback
= std::string(objSym
.getCOFFWeakExternalFallback());
1060 Symbol
*alias
= ctx
.symtab
.addUndefined(saver
.save(fallback
));
1061 checkAndSetWeakAlias(&ctx
.symtab
, this, sym
, alias
);
1062 } else if (comdatIndex
!= -1) {
1063 if (symName
== obj
->getComdatTable()[comdatIndex
].first
) {
1064 sym
= comdat
[comdatIndex
].first
;
1065 if (cast
<DefinedRegular
>(sym
)->data
== nullptr)
1066 cast
<DefinedRegular
>(sym
)->data
= &fakeSC
->repl
;
1067 } else if (comdat
[comdatIndex
].second
) {
1068 sym
= ctx
.symtab
.addRegular(this, symName
, nullptr, fakeSC
);
1070 sym
= ctx
.symtab
.addUndefined(symName
, this, false);
1073 sym
= ctx
.symtab
.addRegular(this, symName
, nullptr, fakeSC
);
1075 symbols
.push_back(sym
);
1076 if (objSym
.isUsed())
1077 config
->gcroot
.push_back(sym
);
1079 directives
= obj
->getCOFFLinkerOpts();
1082 void BitcodeFile::parseLazy() {
1083 for (const lto::InputFile::Symbol
&sym
: obj
->symbols())
1084 if (!sym
.isUndefined())
1085 ctx
.symtab
.addLazyObject(this, sym
.getName());
1088 MachineTypes
BitcodeFile::getMachineType() {
1089 switch (Triple(obj
->getTargetTriple()).getArch()) {
1090 case Triple::x86_64
:
1096 case Triple::aarch64
:
1099 return IMAGE_FILE_MACHINE_UNKNOWN
;
1103 std::string
lld::coff::replaceThinLTOSuffix(StringRef path
) {
1104 StringRef suffix
= config
->thinLTOObjectSuffixReplace
.first
;
1105 StringRef repl
= config
->thinLTOObjectSuffixReplace
.second
;
1107 if (path
.consume_back(suffix
))
1108 return (path
+ repl
).str();
1109 return std::string(path
);
1112 static bool isRVACode(COFFObjectFile
*coffObj
, uint64_t rva
, InputFile
*file
) {
1113 for (size_t i
= 1, e
= coffObj
->getNumberOfSections(); i
<= e
; i
++) {
1114 const coff_section
*sec
= CHECK(coffObj
->getSection(i
), file
);
1115 if (rva
>= sec
->VirtualAddress
&&
1116 rva
<= sec
->VirtualAddress
+ sec
->VirtualSize
) {
1117 return (sec
->Characteristics
& COFF::IMAGE_SCN_CNT_CODE
) != 0;
1123 void DLLFile::parse() {
1124 // Parse a memory buffer as a PE-COFF executable.
1125 std::unique_ptr
<Binary
> bin
= CHECK(createBinary(mb
), this);
1127 if (auto *obj
= dyn_cast
<COFFObjectFile
>(bin
.get())) {
1131 error(toString(this) + " is not a COFF file");
1135 if (!coffObj
->getPE32Header() && !coffObj
->getPE32PlusHeader()) {
1136 error(toString(this) + " is not a PE-COFF executable");
1140 for (const auto &exp
: coffObj
->export_directories()) {
1141 StringRef dllName
, symbolName
;
1143 checkError(exp
.getDllName(dllName
));
1144 checkError(exp
.getSymbolName(symbolName
));
1145 checkError(exp
.getExportRVA(exportRVA
));
1147 if (symbolName
.empty())
1150 bool code
= isRVACode(coffObj
.get(), exportRVA
, this);
1152 Symbol
*s
= make
<Symbol
>();
1153 s
->dllName
= dllName
;
1154 s
->symbolName
= symbolName
;
1155 s
->importType
= code
? ImportType::IMPORT_CODE
: ImportType::IMPORT_DATA
;
1156 s
->nameType
= ImportNameType::IMPORT_NAME
;
1158 if (coffObj
->getMachine() == I386
) {
1159 s
->symbolName
= symbolName
= saver().save("_" + symbolName
);
1160 s
->nameType
= ImportNameType::IMPORT_NAME_NOPREFIX
;
1163 StringRef impName
= saver().save("__imp_" + symbolName
);
1164 ctx
.symtab
.addLazyDLLSymbol(this, s
, impName
);
1166 ctx
.symtab
.addLazyDLLSymbol(this, s
, symbolName
);
1170 MachineTypes
DLLFile::getMachineType() {
1172 return static_cast<MachineTypes
>(coffObj
->getMachine());
1173 return IMAGE_FILE_MACHINE_UNKNOWN
;
1176 void DLLFile::makeImport(DLLFile::Symbol
*s
) {
1177 if (!seen
.insert(s
->symbolName
).second
)
1180 size_t impSize
= s
->dllName
.size() + s
->symbolName
.size() + 2; // +2 for NULs
1181 size_t size
= sizeof(coff_import_header
) + impSize
;
1182 char *buf
= bAlloc().Allocate
<char>(size
);
1183 memset(buf
, 0, size
);
1185 auto *imp
= reinterpret_cast<coff_import_header
*>(p
);
1188 imp
->Machine
= coffObj
->getMachine();
1189 imp
->SizeOfData
= impSize
;
1190 imp
->OrdinalHint
= 0; // Only linking by name
1191 imp
->TypeInfo
= (s
->nameType
<< 2) | s
->importType
;
1193 // Write symbol name and DLL name.
1194 memcpy(p
, s
->symbolName
.data(), s
->symbolName
.size());
1195 p
+= s
->symbolName
.size() + 1;
1196 memcpy(p
, s
->dllName
.data(), s
->dllName
.size());
1197 MemoryBufferRef mbref
= MemoryBufferRef(StringRef(buf
, size
), s
->dllName
);
1198 ImportFile
*impFile
= make
<ImportFile
>(ctx
, mbref
);
1199 ctx
.symtab
.addFile(impFile
);