[LLD][COFF] Fix TypeServerSource matcher with more than one collision
[llvm-project.git] / lld / COFF / InputFiles.cpp
blob0f3f5e0ffe7c56635426bce57e66a84f6ae19b73
1 //===- InputFiles.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "InputFiles.h"
10 #include "COFFLinkerContext.h"
11 #include "Chunks.h"
12 #include "Config.h"
13 #include "DebugTypes.h"
14 #include "Driver.h"
15 #include "SymbolTable.h"
16 #include "Symbols.h"
17 #include "lld/Common/DWARF.h"
18 #include "llvm-c/lto.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/Triple.h"
21 #include "llvm/ADT/Twine.h"
22 #include "llvm/BinaryFormat/COFF.h"
23 #include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
24 #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
25 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
26 #include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
27 #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
28 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
29 #include "llvm/LTO/LTO.h"
30 #include "llvm/Object/Binary.h"
31 #include "llvm/Object/COFF.h"
32 #include "llvm/Support/Casting.h"
33 #include "llvm/Support/Endian.h"
34 #include "llvm/Support/Error.h"
35 #include "llvm/Support/ErrorOr.h"
36 #include "llvm/Support/FileSystem.h"
37 #include "llvm/Support/Path.h"
38 #include "llvm/Target/TargetOptions.h"
39 #include <cstring>
40 #include <system_error>
41 #include <utility>
43 using namespace llvm;
44 using namespace llvm::COFF;
45 using namespace llvm::codeview;
46 using namespace llvm::object;
47 using namespace llvm::support::endian;
48 using namespace lld;
49 using namespace lld::coff;
51 using llvm::Triple;
52 using llvm::support::ulittle32_t;
54 // Returns the last element of a path, which is supposed to be a filename.
55 static StringRef getBasename(StringRef path) {
56 return sys::path::filename(path, sys::path::Style::windows);
59 // Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)".
60 std::string lld::toString(const coff::InputFile *file) {
61 if (!file)
62 return "<internal>";
63 if (file->parentName.empty() || file->kind() == coff::InputFile::ImportKind)
64 return std::string(file->getName());
66 return (getBasename(file->parentName) + "(" + getBasename(file->getName()) +
67 ")")
68 .str();
71 /// Checks that Source is compatible with being a weak alias to Target.
72 /// If Source is Undefined and has no weak alias set, makes it a weak
73 /// alias to Target.
74 static void checkAndSetWeakAlias(SymbolTable *symtab, InputFile *f,
75 Symbol *source, Symbol *target) {
76 if (auto *u = dyn_cast<Undefined>(source)) {
77 if (u->weakAlias && u->weakAlias != target) {
78 // Weak aliases as produced by GCC are named in the form
79 // .weak.<weaksymbol>.<othersymbol>, where <othersymbol> is the name
80 // of another symbol emitted near the weak symbol.
81 // Just use the definition from the first object file that defined
82 // this weak symbol.
83 if (config->mingw)
84 return;
85 symtab->reportDuplicate(source, f);
87 u->weakAlias = target;
91 static bool ignoredSymbolName(StringRef name) {
92 return name == "@feat.00" || name == "@comp.id";
95 ArchiveFile::ArchiveFile(COFFLinkerContext &ctx, MemoryBufferRef m)
96 : InputFile(ctx, ArchiveKind, m) {}
98 void ArchiveFile::parse() {
99 // Parse a MemoryBufferRef as an archive file.
100 file = CHECK(Archive::create(mb), this);
102 // Read the symbol table to construct Lazy objects.
103 for (const Archive::Symbol &sym : file->symbols())
104 ctx.symtab.addLazyArchive(this, sym);
107 // Returns a buffer pointing to a member file containing a given symbol.
108 void ArchiveFile::addMember(const Archive::Symbol &sym) {
109 const Archive::Child &c =
110 CHECK(sym.getMember(),
111 "could not get the member for symbol " + toCOFFString(sym));
113 // Return an empty buffer if we have already returned the same buffer.
114 if (!seen.insert(c.getChildOffset()).second)
115 return;
117 driver->enqueueArchiveMember(c, sym, getName());
120 std::vector<MemoryBufferRef> lld::coff::getArchiveMembers(Archive *file) {
121 std::vector<MemoryBufferRef> v;
122 Error err = Error::success();
123 for (const Archive::Child &c : file->children(err)) {
124 MemoryBufferRef mbref =
125 CHECK(c.getMemoryBufferRef(),
126 file->getFileName() +
127 ": could not get the buffer for a child of the archive");
128 v.push_back(mbref);
130 if (err)
131 fatal(file->getFileName() +
132 ": Archive::children failed: " + toString(std::move(err)));
133 return v;
136 void ObjFile::parseLazy() {
137 // Native object file.
138 std::unique_ptr<Binary> coffObjPtr = CHECK(createBinary(mb), this);
139 COFFObjectFile *coffObj = cast<COFFObjectFile>(coffObjPtr.get());
140 uint32_t numSymbols = coffObj->getNumberOfSymbols();
141 for (uint32_t i = 0; i < numSymbols; ++i) {
142 COFFSymbolRef coffSym = check(coffObj->getSymbol(i));
143 if (coffSym.isUndefined() || !coffSym.isExternal() ||
144 coffSym.isWeakExternal())
145 continue;
146 StringRef name = check(coffObj->getSymbolName(coffSym));
147 if (coffSym.isAbsolute() && ignoredSymbolName(name))
148 continue;
149 ctx.symtab.addLazyObject(this, name);
150 i += coffSym.getNumberOfAuxSymbols();
154 void ObjFile::parse() {
155 // Parse a memory buffer as a COFF file.
156 std::unique_ptr<Binary> bin = CHECK(createBinary(mb), this);
158 if (auto *obj = dyn_cast<COFFObjectFile>(bin.get())) {
159 bin.release();
160 coffObj.reset(obj);
161 } else {
162 fatal(toString(this) + " is not a COFF file");
165 // Read section and symbol tables.
166 initializeChunks();
167 initializeSymbols();
168 initializeFlags();
169 initializeDependencies();
172 const coff_section *ObjFile::getSection(uint32_t i) {
173 auto sec = coffObj->getSection(i);
174 if (!sec)
175 fatal("getSection failed: #" + Twine(i) + ": " + toString(sec.takeError()));
176 return *sec;
179 // We set SectionChunk pointers in the SparseChunks vector to this value
180 // temporarily to mark comdat sections as having an unknown resolution. As we
181 // walk the object file's symbol table, once we visit either a leader symbol or
182 // an associative section definition together with the parent comdat's leader,
183 // we set the pointer to either nullptr (to mark the section as discarded) or a
184 // valid SectionChunk for that section.
185 static SectionChunk *const pendingComdat = reinterpret_cast<SectionChunk *>(1);
187 void ObjFile::initializeChunks() {
188 uint32_t numSections = coffObj->getNumberOfSections();
189 sparseChunks.resize(numSections + 1);
190 for (uint32_t i = 1; i < numSections + 1; ++i) {
191 const coff_section *sec = getSection(i);
192 if (sec->Characteristics & IMAGE_SCN_LNK_COMDAT)
193 sparseChunks[i] = pendingComdat;
194 else
195 sparseChunks[i] = readSection(i, nullptr, "");
199 SectionChunk *ObjFile::readSection(uint32_t sectionNumber,
200 const coff_aux_section_definition *def,
201 StringRef leaderName) {
202 const coff_section *sec = getSection(sectionNumber);
204 StringRef name;
205 if (Expected<StringRef> e = coffObj->getSectionName(sec))
206 name = *e;
207 else
208 fatal("getSectionName failed: #" + Twine(sectionNumber) + ": " +
209 toString(e.takeError()));
211 if (name == ".drectve") {
212 ArrayRef<uint8_t> data;
213 cantFail(coffObj->getSectionContents(sec, data));
214 directives = StringRef((const char *)data.data(), data.size());
215 return nullptr;
218 if (name == ".llvm_addrsig") {
219 addrsigSec = sec;
220 return nullptr;
223 if (name == ".llvm.call-graph-profile") {
224 callgraphSec = sec;
225 return nullptr;
228 // Object files may have DWARF debug info or MS CodeView debug info
229 // (or both).
231 // DWARF sections don't need any special handling from the perspective
232 // of the linker; they are just a data section containing relocations.
233 // We can just link them to complete debug info.
235 // CodeView needs linker support. We need to interpret debug info,
236 // and then write it to a separate .pdb file.
238 // Ignore DWARF debug info unless /debug is given.
239 if (!config->debug && name.startswith(".debug_"))
240 return nullptr;
242 if (sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE)
243 return nullptr;
244 auto *c = make<SectionChunk>(this, sec);
245 if (def)
246 c->checksum = def->CheckSum;
248 // CodeView sections are stored to a different vector because they are not
249 // linked in the regular manner.
250 if (c->isCodeView())
251 debugChunks.push_back(c);
252 else if (name == ".gfids$y")
253 guardFidChunks.push_back(c);
254 else if (name == ".giats$y")
255 guardIATChunks.push_back(c);
256 else if (name == ".gljmp$y")
257 guardLJmpChunks.push_back(c);
258 else if (name == ".gehcont$y")
259 guardEHContChunks.push_back(c);
260 else if (name == ".sxdata")
261 sxDataChunks.push_back(c);
262 else if (config->tailMerge && sec->NumberOfRelocations == 0 &&
263 name == ".rdata" && leaderName.startswith("??_C@"))
264 // COFF sections that look like string literal sections (i.e. no
265 // relocations, in .rdata, leader symbol name matches the MSVC name mangling
266 // for string literals) are subject to string tail merging.
267 MergeChunk::addSection(ctx, c);
268 else if (name == ".rsrc" || name.startswith(".rsrc$"))
269 resourceChunks.push_back(c);
270 else
271 chunks.push_back(c);
273 return c;
276 void ObjFile::includeResourceChunks() {
277 chunks.insert(chunks.end(), resourceChunks.begin(), resourceChunks.end());
280 void ObjFile::readAssociativeDefinition(
281 COFFSymbolRef sym, const coff_aux_section_definition *def) {
282 readAssociativeDefinition(sym, def, def->getNumber(sym.isBigObj()));
285 void ObjFile::readAssociativeDefinition(COFFSymbolRef sym,
286 const coff_aux_section_definition *def,
287 uint32_t parentIndex) {
288 SectionChunk *parent = sparseChunks[parentIndex];
289 int32_t sectionNumber = sym.getSectionNumber();
291 auto diag = [&]() {
292 StringRef name = check(coffObj->getSymbolName(sym));
294 StringRef parentName;
295 const coff_section *parentSec = getSection(parentIndex);
296 if (Expected<StringRef> e = coffObj->getSectionName(parentSec))
297 parentName = *e;
298 error(toString(this) + ": associative comdat " + name + " (sec " +
299 Twine(sectionNumber) + ") has invalid reference to section " +
300 parentName + " (sec " + Twine(parentIndex) + ")");
303 if (parent == pendingComdat) {
304 // This can happen if an associative comdat refers to another associative
305 // comdat that appears after it (invalid per COFF spec) or to a section
306 // without any symbols.
307 diag();
308 return;
311 // Check whether the parent is prevailing. If it is, so are we, and we read
312 // the section; otherwise mark it as discarded.
313 if (parent) {
314 SectionChunk *c = readSection(sectionNumber, def, "");
315 sparseChunks[sectionNumber] = c;
316 if (c) {
317 c->selection = IMAGE_COMDAT_SELECT_ASSOCIATIVE;
318 parent->addAssociative(c);
320 } else {
321 sparseChunks[sectionNumber] = nullptr;
325 void ObjFile::recordPrevailingSymbolForMingw(
326 COFFSymbolRef sym, DenseMap<StringRef, uint32_t> &prevailingSectionMap) {
327 // For comdat symbols in executable sections, where this is the copy
328 // of the section chunk we actually include instead of discarding it,
329 // add the symbol to a map to allow using it for implicitly
330 // associating .[px]data$<func> sections to it.
331 // Use the suffix from the .text$<func> instead of the leader symbol
332 // name, for cases where the names differ (i386 mangling/decorations,
333 // cases where the leader is a weak symbol named .weak.func.default*).
334 int32_t sectionNumber = sym.getSectionNumber();
335 SectionChunk *sc = sparseChunks[sectionNumber];
336 if (sc && sc->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE) {
337 StringRef name = sc->getSectionName().split('$').second;
338 prevailingSectionMap[name] = sectionNumber;
342 void ObjFile::maybeAssociateSEHForMingw(
343 COFFSymbolRef sym, const coff_aux_section_definition *def,
344 const DenseMap<StringRef, uint32_t> &prevailingSectionMap) {
345 StringRef name = check(coffObj->getSymbolName(sym));
346 if (name.consume_front(".pdata$") || name.consume_front(".xdata$") ||
347 name.consume_front(".eh_frame$")) {
348 // For MinGW, treat .[px]data$<func> and .eh_frame$<func> as implicitly
349 // associative to the symbol <func>.
350 auto parentSym = prevailingSectionMap.find(name);
351 if (parentSym != prevailingSectionMap.end())
352 readAssociativeDefinition(sym, def, parentSym->second);
356 Symbol *ObjFile::createRegular(COFFSymbolRef sym) {
357 SectionChunk *sc = sparseChunks[sym.getSectionNumber()];
358 if (sym.isExternal()) {
359 StringRef name = check(coffObj->getSymbolName(sym));
360 if (sc)
361 return ctx.symtab.addRegular(this, name, sym.getGeneric(), sc,
362 sym.getValue());
363 // For MinGW symbols named .weak.* that point to a discarded section,
364 // don't create an Undefined symbol. If nothing ever refers to the symbol,
365 // everything should be fine. If something actually refers to the symbol
366 // (e.g. the undefined weak alias), linking will fail due to undefined
367 // references at the end.
368 if (config->mingw && name.startswith(".weak."))
369 return nullptr;
370 return ctx.symtab.addUndefined(name, this, false);
372 if (sc)
373 return make<DefinedRegular>(this, /*Name*/ "", /*IsCOMDAT*/ false,
374 /*IsExternal*/ false, sym.getGeneric(), sc);
375 return nullptr;
378 void ObjFile::initializeSymbols() {
379 uint32_t numSymbols = coffObj->getNumberOfSymbols();
380 symbols.resize(numSymbols);
382 SmallVector<std::pair<Symbol *, uint32_t>, 8> weakAliases;
383 std::vector<uint32_t> pendingIndexes;
384 pendingIndexes.reserve(numSymbols);
386 DenseMap<StringRef, uint32_t> prevailingSectionMap;
387 std::vector<const coff_aux_section_definition *> comdatDefs(
388 coffObj->getNumberOfSections() + 1);
390 for (uint32_t i = 0; i < numSymbols; ++i) {
391 COFFSymbolRef coffSym = check(coffObj->getSymbol(i));
392 bool prevailingComdat;
393 if (coffSym.isUndefined()) {
394 symbols[i] = createUndefined(coffSym);
395 } else if (coffSym.isWeakExternal()) {
396 symbols[i] = createUndefined(coffSym);
397 uint32_t tagIndex = coffSym.getAux<coff_aux_weak_external>()->TagIndex;
398 weakAliases.emplace_back(symbols[i], tagIndex);
399 } else if (Optional<Symbol *> optSym =
400 createDefined(coffSym, comdatDefs, prevailingComdat)) {
401 symbols[i] = *optSym;
402 if (config->mingw && prevailingComdat)
403 recordPrevailingSymbolForMingw(coffSym, prevailingSectionMap);
404 } else {
405 // createDefined() returns None if a symbol belongs to a section that
406 // was pending at the point when the symbol was read. This can happen in
407 // two cases:
408 // 1) section definition symbol for a comdat leader;
409 // 2) symbol belongs to a comdat section associated with another section.
410 // In both of these cases, we can expect the section to be resolved by
411 // the time we finish visiting the remaining symbols in the symbol
412 // table. So we postpone the handling of this symbol until that time.
413 pendingIndexes.push_back(i);
415 i += coffSym.getNumberOfAuxSymbols();
418 for (uint32_t i : pendingIndexes) {
419 COFFSymbolRef sym = check(coffObj->getSymbol(i));
420 if (const coff_aux_section_definition *def = sym.getSectionDefinition()) {
421 if (def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE)
422 readAssociativeDefinition(sym, def);
423 else if (config->mingw)
424 maybeAssociateSEHForMingw(sym, def, prevailingSectionMap);
426 if (sparseChunks[sym.getSectionNumber()] == pendingComdat) {
427 StringRef name = check(coffObj->getSymbolName(sym));
428 log("comdat section " + name +
429 " without leader and unassociated, discarding");
430 continue;
432 symbols[i] = createRegular(sym);
435 for (auto &kv : weakAliases) {
436 Symbol *sym = kv.first;
437 uint32_t idx = kv.second;
438 checkAndSetWeakAlias(&ctx.symtab, this, sym, symbols[idx]);
441 // Free the memory used by sparseChunks now that symbol loading is finished.
442 decltype(sparseChunks)().swap(sparseChunks);
445 Symbol *ObjFile::createUndefined(COFFSymbolRef sym) {
446 StringRef name = check(coffObj->getSymbolName(sym));
447 return ctx.symtab.addUndefined(name, this, sym.isWeakExternal());
450 static const coff_aux_section_definition *findSectionDef(COFFObjectFile *obj,
451 int32_t section) {
452 uint32_t numSymbols = obj->getNumberOfSymbols();
453 for (uint32_t i = 0; i < numSymbols; ++i) {
454 COFFSymbolRef sym = check(obj->getSymbol(i));
455 if (sym.getSectionNumber() != section)
456 continue;
457 if (const coff_aux_section_definition *def = sym.getSectionDefinition())
458 return def;
460 return nullptr;
463 void ObjFile::handleComdatSelection(
464 COFFSymbolRef sym, COMDATType &selection, bool &prevailing,
465 DefinedRegular *leader,
466 const llvm::object::coff_aux_section_definition *def) {
467 if (prevailing)
468 return;
469 // There's already an existing comdat for this symbol: `Leader`.
470 // Use the comdats's selection field to determine if the new
471 // symbol in `Sym` should be discarded, produce a duplicate symbol
472 // error, etc.
474 SectionChunk *leaderChunk = leader->getChunk();
475 COMDATType leaderSelection = leaderChunk->selection;
477 assert(leader->data && "Comdat leader without SectionChunk?");
478 if (isa<BitcodeFile>(leader->file)) {
479 // If the leader is only a LTO symbol, we don't know e.g. its final size
480 // yet, so we can't do the full strict comdat selection checking yet.
481 selection = leaderSelection = IMAGE_COMDAT_SELECT_ANY;
484 if ((selection == IMAGE_COMDAT_SELECT_ANY &&
485 leaderSelection == IMAGE_COMDAT_SELECT_LARGEST) ||
486 (selection == IMAGE_COMDAT_SELECT_LARGEST &&
487 leaderSelection == IMAGE_COMDAT_SELECT_ANY)) {
488 // cl.exe picks "any" for vftables when building with /GR- and
489 // "largest" when building with /GR. To be able to link object files
490 // compiled with each flag, "any" and "largest" are merged as "largest".
491 leaderSelection = selection = IMAGE_COMDAT_SELECT_LARGEST;
494 // GCCs __declspec(selectany) doesn't actually pick "any" but "same size as".
495 // Clang on the other hand picks "any". To be able to link two object files
496 // with a __declspec(selectany) declaration, one compiled with gcc and the
497 // other with clang, we merge them as proper "same size as"
498 if (config->mingw && ((selection == IMAGE_COMDAT_SELECT_ANY &&
499 leaderSelection == IMAGE_COMDAT_SELECT_SAME_SIZE) ||
500 (selection == IMAGE_COMDAT_SELECT_SAME_SIZE &&
501 leaderSelection == IMAGE_COMDAT_SELECT_ANY))) {
502 leaderSelection = selection = IMAGE_COMDAT_SELECT_SAME_SIZE;
505 // Other than that, comdat selections must match. This is a bit more
506 // strict than link.exe which allows merging "any" and "largest" if "any"
507 // is the first symbol the linker sees, and it allows merging "largest"
508 // with everything (!) if "largest" is the first symbol the linker sees.
509 // Making this symmetric independent of which selection is seen first
510 // seems better though.
511 // (This behavior matches ModuleLinker::getComdatResult().)
512 if (selection != leaderSelection) {
513 log(("conflicting comdat type for " + toString(*leader) + ": " +
514 Twine((int)leaderSelection) + " in " + toString(leader->getFile()) +
515 " and " + Twine((int)selection) + " in " + toString(this))
516 .str());
517 ctx.symtab.reportDuplicate(leader, this);
518 return;
521 switch (selection) {
522 case IMAGE_COMDAT_SELECT_NODUPLICATES:
523 ctx.symtab.reportDuplicate(leader, this);
524 break;
526 case IMAGE_COMDAT_SELECT_ANY:
527 // Nothing to do.
528 break;
530 case IMAGE_COMDAT_SELECT_SAME_SIZE:
531 if (leaderChunk->getSize() != getSection(sym)->SizeOfRawData) {
532 if (!config->mingw) {
533 ctx.symtab.reportDuplicate(leader, this);
534 } else {
535 const coff_aux_section_definition *leaderDef = nullptr;
536 if (leaderChunk->file)
537 leaderDef = findSectionDef(leaderChunk->file->getCOFFObj(),
538 leaderChunk->getSectionNumber());
539 if (!leaderDef || leaderDef->Length != def->Length)
540 ctx.symtab.reportDuplicate(leader, this);
543 break;
545 case IMAGE_COMDAT_SELECT_EXACT_MATCH: {
546 SectionChunk newChunk(this, getSection(sym));
547 // link.exe only compares section contents here and doesn't complain
548 // if the two comdat sections have e.g. different alignment.
549 // Match that.
550 if (leaderChunk->getContents() != newChunk.getContents())
551 ctx.symtab.reportDuplicate(leader, this, &newChunk, sym.getValue());
552 break;
555 case IMAGE_COMDAT_SELECT_ASSOCIATIVE:
556 // createDefined() is never called for IMAGE_COMDAT_SELECT_ASSOCIATIVE.
557 // (This means lld-link doesn't produce duplicate symbol errors for
558 // associative comdats while link.exe does, but associate comdats
559 // are never extern in practice.)
560 llvm_unreachable("createDefined not called for associative comdats");
562 case IMAGE_COMDAT_SELECT_LARGEST:
563 if (leaderChunk->getSize() < getSection(sym)->SizeOfRawData) {
564 // Replace the existing comdat symbol with the new one.
565 StringRef name = check(coffObj->getSymbolName(sym));
566 // FIXME: This is incorrect: With /opt:noref, the previous sections
567 // make it into the final executable as well. Correct handling would
568 // be to undo reading of the whole old section that's being replaced,
569 // or doing one pass that determines what the final largest comdat
570 // is for all IMAGE_COMDAT_SELECT_LARGEST comdats and then reading
571 // only the largest one.
572 replaceSymbol<DefinedRegular>(leader, this, name, /*IsCOMDAT*/ true,
573 /*IsExternal*/ true, sym.getGeneric(),
574 nullptr);
575 prevailing = true;
577 break;
579 case IMAGE_COMDAT_SELECT_NEWEST:
580 llvm_unreachable("should have been rejected earlier");
584 Optional<Symbol *> ObjFile::createDefined(
585 COFFSymbolRef sym,
586 std::vector<const coff_aux_section_definition *> &comdatDefs,
587 bool &prevailing) {
588 prevailing = false;
589 auto getName = [&]() { return check(coffObj->getSymbolName(sym)); };
591 if (sym.isCommon()) {
592 auto *c = make<CommonChunk>(sym);
593 chunks.push_back(c);
594 return ctx.symtab.addCommon(this, getName(), sym.getValue(),
595 sym.getGeneric(), c);
598 if (sym.isAbsolute()) {
599 StringRef name = getName();
601 if (name == "@feat.00")
602 feat00Flags = sym.getValue();
603 // Skip special symbols.
604 if (ignoredSymbolName(name))
605 return nullptr;
607 if (sym.isExternal())
608 return ctx.symtab.addAbsolute(name, sym);
609 return make<DefinedAbsolute>(name, sym);
612 int32_t sectionNumber = sym.getSectionNumber();
613 if (sectionNumber == llvm::COFF::IMAGE_SYM_DEBUG)
614 return nullptr;
616 if (llvm::COFF::isReservedSectionNumber(sectionNumber))
617 fatal(toString(this) + ": " + getName() +
618 " should not refer to special section " + Twine(sectionNumber));
620 if ((uint32_t)sectionNumber >= sparseChunks.size())
621 fatal(toString(this) + ": " + getName() +
622 " should not refer to non-existent section " + Twine(sectionNumber));
624 // Comdat handling.
625 // A comdat symbol consists of two symbol table entries.
626 // The first symbol entry has the name of the section (e.g. .text), fixed
627 // values for the other fields, and one auxiliary record.
628 // The second symbol entry has the name of the comdat symbol, called the
629 // "comdat leader".
630 // When this function is called for the first symbol entry of a comdat,
631 // it sets comdatDefs and returns None, and when it's called for the second
632 // symbol entry it reads comdatDefs and then sets it back to nullptr.
634 // Handle comdat leader.
635 if (const coff_aux_section_definition *def = comdatDefs[sectionNumber]) {
636 comdatDefs[sectionNumber] = nullptr;
637 DefinedRegular *leader;
639 if (sym.isExternal()) {
640 std::tie(leader, prevailing) =
641 ctx.symtab.addComdat(this, getName(), sym.getGeneric());
642 } else {
643 leader = make<DefinedRegular>(this, /*Name*/ "", /*IsCOMDAT*/ false,
644 /*IsExternal*/ false, sym.getGeneric());
645 prevailing = true;
648 if (def->Selection < (int)IMAGE_COMDAT_SELECT_NODUPLICATES ||
649 // Intentionally ends at IMAGE_COMDAT_SELECT_LARGEST: link.exe
650 // doesn't understand IMAGE_COMDAT_SELECT_NEWEST either.
651 def->Selection > (int)IMAGE_COMDAT_SELECT_LARGEST) {
652 fatal("unknown comdat type " + std::to_string((int)def->Selection) +
653 " for " + getName() + " in " + toString(this));
655 COMDATType selection = (COMDATType)def->Selection;
657 if (leader->isCOMDAT)
658 handleComdatSelection(sym, selection, prevailing, leader, def);
660 if (prevailing) {
661 SectionChunk *c = readSection(sectionNumber, def, getName());
662 sparseChunks[sectionNumber] = c;
663 c->sym = cast<DefinedRegular>(leader);
664 c->selection = selection;
665 cast<DefinedRegular>(leader)->data = &c->repl;
666 } else {
667 sparseChunks[sectionNumber] = nullptr;
669 return leader;
672 // Prepare to handle the comdat leader symbol by setting the section's
673 // ComdatDefs pointer if we encounter a non-associative comdat.
674 if (sparseChunks[sectionNumber] == pendingComdat) {
675 if (const coff_aux_section_definition *def = sym.getSectionDefinition()) {
676 if (def->Selection != IMAGE_COMDAT_SELECT_ASSOCIATIVE)
677 comdatDefs[sectionNumber] = def;
679 return None;
682 return createRegular(sym);
685 MachineTypes ObjFile::getMachineType() {
686 if (coffObj)
687 return static_cast<MachineTypes>(coffObj->getMachine());
688 return IMAGE_FILE_MACHINE_UNKNOWN;
691 ArrayRef<uint8_t> ObjFile::getDebugSection(StringRef secName) {
692 if (SectionChunk *sec = SectionChunk::findByName(debugChunks, secName))
693 return sec->consumeDebugMagic();
694 return {};
697 // OBJ files systematically store critical information in a .debug$S stream,
698 // even if the TU was compiled with no debug info. At least two records are
699 // always there. S_OBJNAME stores a 32-bit signature, which is loaded into the
700 // PCHSignature member. S_COMPILE3 stores compile-time cmd-line flags. This is
701 // currently used to initialize the hotPatchable member.
702 void ObjFile::initializeFlags() {
703 ArrayRef<uint8_t> data = getDebugSection(".debug$S");
704 if (data.empty())
705 return;
707 DebugSubsectionArray subsections;
709 BinaryStreamReader reader(data, support::little);
710 ExitOnError exitOnErr;
711 exitOnErr(reader.readArray(subsections, data.size()));
713 for (const DebugSubsectionRecord &ss : subsections) {
714 if (ss.kind() != DebugSubsectionKind::Symbols)
715 continue;
717 unsigned offset = 0;
719 // Only parse the first two records. We are only looking for S_OBJNAME
720 // and S_COMPILE3, and they usually appear at the beginning of the
721 // stream.
722 for (unsigned i = 0; i < 2; ++i) {
723 Expected<CVSymbol> sym = readSymbolFromStream(ss.getRecordData(), offset);
724 if (!sym) {
725 consumeError(sym.takeError());
726 return;
728 if (sym->kind() == SymbolKind::S_COMPILE3) {
729 auto cs =
730 cantFail(SymbolDeserializer::deserializeAs<Compile3Sym>(sym.get()));
731 hotPatchable =
732 (cs.Flags & CompileSym3Flags::HotPatch) != CompileSym3Flags::None;
734 if (sym->kind() == SymbolKind::S_OBJNAME) {
735 auto objName = cantFail(SymbolDeserializer::deserializeAs<ObjNameSym>(
736 sym.get()));
737 pchSignature = objName.Signature;
739 offset += sym->length();
744 // Depending on the compilation flags, OBJs can refer to external files,
745 // necessary to merge this OBJ into the final PDB. We currently support two
746 // types of external files: Precomp/PCH OBJs, when compiling with /Yc and /Yu.
747 // And PDB type servers, when compiling with /Zi. This function extracts these
748 // dependencies and makes them available as a TpiSource interface (see
749 // DebugTypes.h). Both cases only happen with cl.exe: clang-cl produces regular
750 // output even with /Yc and /Yu and with /Zi.
751 void ObjFile::initializeDependencies() {
752 if (!config->debug)
753 return;
755 bool isPCH = false;
757 ArrayRef<uint8_t> data = getDebugSection(".debug$P");
758 if (!data.empty())
759 isPCH = true;
760 else
761 data = getDebugSection(".debug$T");
763 // symbols but no types, make a plain, empty TpiSource anyway, because it
764 // simplifies adding the symbols later.
765 if (data.empty()) {
766 if (!debugChunks.empty())
767 debugTypesObj = makeTpiSource(ctx, this);
768 return;
771 // Get the first type record. It will indicate if this object uses a type
772 // server (/Zi) or a PCH file (/Yu).
773 CVTypeArray types;
774 BinaryStreamReader reader(data, support::little);
775 cantFail(reader.readArray(types, reader.getLength()));
776 CVTypeArray::Iterator firstType = types.begin();
777 if (firstType == types.end())
778 return;
780 // Remember the .debug$T or .debug$P section.
781 debugTypes = data;
783 // This object file is a PCH file that others will depend on.
784 if (isPCH) {
785 debugTypesObj = makePrecompSource(ctx, this);
786 return;
789 // This object file was compiled with /Zi. Enqueue the PDB dependency.
790 if (firstType->kind() == LF_TYPESERVER2) {
791 TypeServer2Record ts = cantFail(
792 TypeDeserializer::deserializeAs<TypeServer2Record>(firstType->data()));
793 debugTypesObj = makeUseTypeServerSource(ctx, this, ts);
794 enqueuePdbFile(ts.getName(), this);
795 return;
798 // This object was compiled with /Yu. It uses types from another object file
799 // with a matching signature.
800 if (firstType->kind() == LF_PRECOMP) {
801 PrecompRecord precomp = cantFail(
802 TypeDeserializer::deserializeAs<PrecompRecord>(firstType->data()));
803 debugTypesObj = makeUsePrecompSource(ctx, this, precomp);
804 // Drop the LF_PRECOMP record from the input stream.
805 debugTypes = debugTypes.drop_front(firstType->RecordData.size());
806 return;
809 // This is a plain old object file.
810 debugTypesObj = makeTpiSource(ctx, this);
813 // Make a PDB path assuming the PDB is in the same folder as the OBJ
814 static std::string getPdbBaseName(ObjFile *file, StringRef tSPath) {
815 StringRef localPath =
816 !file->parentName.empty() ? file->parentName : file->getName();
817 SmallString<128> path = sys::path::parent_path(localPath);
819 // Currently, type server PDBs are only created by MSVC cl, which only runs
820 // on Windows, so we can assume type server paths are Windows style.
821 sys::path::append(path,
822 sys::path::filename(tSPath, sys::path::Style::windows));
823 return std::string(path.str());
826 // The casing of the PDB path stamped in the OBJ can differ from the actual path
827 // on disk. With this, we ensure to always use lowercase as a key for the
828 // pdbInputFileInstances map, at least on Windows.
829 static std::string normalizePdbPath(StringRef path) {
830 #if defined(_WIN32)
831 return path.lower();
832 #else // LINUX
833 return std::string(path);
834 #endif
837 // If existing, return the actual PDB path on disk.
838 static Optional<std::string> findPdbPath(StringRef pdbPath,
839 ObjFile *dependentFile) {
840 // Ensure the file exists before anything else. In some cases, if the path
841 // points to a removable device, Driver::enqueuePath() would fail with an
842 // error (EAGAIN, "resource unavailable try again") which we want to skip
843 // silently.
844 if (llvm::sys::fs::exists(pdbPath))
845 return normalizePdbPath(pdbPath);
846 std::string ret = getPdbBaseName(dependentFile, pdbPath);
847 if (llvm::sys::fs::exists(ret))
848 return normalizePdbPath(ret);
849 return None;
852 PDBInputFile::PDBInputFile(COFFLinkerContext &ctx, MemoryBufferRef m)
853 : InputFile(ctx, PDBKind, m) {}
855 PDBInputFile::~PDBInputFile() = default;
857 PDBInputFile *PDBInputFile::findFromRecordPath(const COFFLinkerContext &ctx,
858 StringRef path,
859 ObjFile *fromFile) {
860 auto p = findPdbPath(path.str(), fromFile);
861 if (!p)
862 return nullptr;
863 auto it = ctx.pdbInputFileInstances.find(*p);
864 if (it != ctx.pdbInputFileInstances.end())
865 return it->second;
866 return nullptr;
869 void PDBInputFile::parse() {
870 ctx.pdbInputFileInstances[mb.getBufferIdentifier().str()] = this;
872 std::unique_ptr<pdb::IPDBSession> thisSession;
873 loadErr.emplace(pdb::NativeSession::createFromPdb(
874 MemoryBuffer::getMemBuffer(mb, false), thisSession));
875 if (*loadErr)
876 return; // fail silently at this point - the error will be handled later,
877 // when merging the debug type stream
879 session.reset(static_cast<pdb::NativeSession *>(thisSession.release()));
881 pdb::PDBFile &pdbFile = session->getPDBFile();
882 auto expectedInfo = pdbFile.getPDBInfoStream();
883 // All PDB Files should have an Info stream.
884 if (!expectedInfo) {
885 loadErr.emplace(expectedInfo.takeError());
886 return;
888 debugTypesObj = makeTypeServerSource(ctx, this);
891 // Used only for DWARF debug info, which is not common (except in MinGW
892 // environments). This returns an optional pair of file name and line
893 // number for where the variable was defined.
894 Optional<std::pair<StringRef, uint32_t>>
895 ObjFile::getVariableLocation(StringRef var) {
896 if (!dwarf) {
897 dwarf = make<DWARFCache>(DWARFContext::create(*getCOFFObj()));
898 if (!dwarf)
899 return None;
901 if (config->machine == I386)
902 var.consume_front("_");
903 Optional<std::pair<std::string, unsigned>> ret = dwarf->getVariableLoc(var);
904 if (!ret)
905 return None;
906 return std::make_pair(saver().save(ret->first), ret->second);
909 // Used only for DWARF debug info, which is not common (except in MinGW
910 // environments).
911 Optional<DILineInfo> ObjFile::getDILineInfo(uint32_t offset,
912 uint32_t sectionIndex) {
913 if (!dwarf) {
914 dwarf = make<DWARFCache>(DWARFContext::create(*getCOFFObj()));
915 if (!dwarf)
916 return None;
919 return dwarf->getDILineInfo(offset, sectionIndex);
922 void ObjFile::enqueuePdbFile(StringRef path, ObjFile *fromFile) {
923 auto p = findPdbPath(path.str(), fromFile);
924 if (!p)
925 return;
926 auto it = ctx.pdbInputFileInstances.emplace(*p, nullptr);
927 if (!it.second)
928 return; // already scheduled for load
929 driver->enqueuePDB(*p);
932 void ImportFile::parse() {
933 const char *buf = mb.getBufferStart();
934 const auto *hdr = reinterpret_cast<const coff_import_header *>(buf);
936 // Check if the total size is valid.
937 if (mb.getBufferSize() != sizeof(*hdr) + hdr->SizeOfData)
938 fatal("broken import library");
940 // Read names and create an __imp_ symbol.
941 StringRef name = saver().save(StringRef(buf + sizeof(*hdr)));
942 StringRef impName = saver().save("__imp_" + name);
943 const char *nameStart = buf + sizeof(coff_import_header) + name.size() + 1;
944 dllName = std::string(StringRef(nameStart));
945 StringRef extName;
946 switch (hdr->getNameType()) {
947 case IMPORT_ORDINAL:
948 extName = "";
949 break;
950 case IMPORT_NAME:
951 extName = name;
952 break;
953 case IMPORT_NAME_NOPREFIX:
954 extName = ltrim1(name, "?@_");
955 break;
956 case IMPORT_NAME_UNDECORATE:
957 extName = ltrim1(name, "?@_");
958 extName = extName.substr(0, extName.find('@'));
959 break;
962 this->hdr = hdr;
963 externalName = extName;
965 impSym = ctx.symtab.addImportData(impName, this);
966 // If this was a duplicate, we logged an error but may continue;
967 // in this case, impSym is nullptr.
968 if (!impSym)
969 return;
971 if (hdr->getType() == llvm::COFF::IMPORT_CONST)
972 static_cast<void>(ctx.symtab.addImportData(name, this));
974 // If type is function, we need to create a thunk which jump to an
975 // address pointed by the __imp_ symbol. (This allows you to call
976 // DLL functions just like regular non-DLL functions.)
977 if (hdr->getType() == llvm::COFF::IMPORT_CODE)
978 thunkSym = ctx.symtab.addImportThunk(
979 name, cast_or_null<DefinedImportData>(impSym), hdr->Machine);
982 BitcodeFile::BitcodeFile(COFFLinkerContext &ctx, MemoryBufferRef mb,
983 StringRef archiveName, uint64_t offsetInArchive,
984 bool lazy)
985 : InputFile(ctx, BitcodeKind, mb, lazy) {
986 std::string path = mb.getBufferIdentifier().str();
987 if (config->thinLTOIndexOnly)
988 path = replaceThinLTOSuffix(mb.getBufferIdentifier());
990 // ThinLTO assumes that all MemoryBufferRefs given to it have a unique
991 // name. If two archives define two members with the same name, this
992 // causes a collision which result in only one of the objects being taken
993 // into consideration at LTO time (which very likely causes undefined
994 // symbols later in the link stage). So we append file offset to make
995 // filename unique.
996 MemoryBufferRef mbref(mb.getBuffer(),
997 saver().save(archiveName.empty()
998 ? path
999 : archiveName +
1000 sys::path::filename(path) +
1001 utostr(offsetInArchive)));
1003 obj = check(lto::InputFile::create(mbref));
1006 BitcodeFile::~BitcodeFile() = default;
1008 namespace {
1009 // Convenience class for initializing a coff_section with specific flags.
1010 class FakeSection {
1011 public:
1012 FakeSection(int c) { section.Characteristics = c; }
1014 coff_section section;
1017 // Convenience class for initializing a SectionChunk with specific flags.
1018 class FakeSectionChunk {
1019 public:
1020 FakeSectionChunk(const coff_section *section) : chunk(nullptr, section) {
1021 // Comdats from LTO files can't be fully treated as regular comdats
1022 // at this point; we don't know what size or contents they are going to
1023 // have, so we can't do proper checking of such aspects of them.
1024 chunk.selection = IMAGE_COMDAT_SELECT_ANY;
1027 SectionChunk chunk;
1030 FakeSection ltoTextSection(IMAGE_SCN_MEM_EXECUTE);
1031 FakeSection ltoDataSection(IMAGE_SCN_CNT_INITIALIZED_DATA);
1032 FakeSectionChunk ltoTextSectionChunk(&ltoTextSection.section);
1033 FakeSectionChunk ltoDataSectionChunk(&ltoDataSection.section);
1034 } // namespace
1036 void BitcodeFile::parse() {
1037 llvm::StringSaver &saver = lld::saver();
1038 std::vector<std::pair<Symbol *, bool>> comdat(obj->getComdatTable().size());
1039 for (size_t i = 0; i != obj->getComdatTable().size(); ++i)
1040 // FIXME: Check nodeduplicate
1041 comdat[i] =
1042 ctx.symtab.addComdat(this, saver.save(obj->getComdatTable()[i].first));
1043 for (const lto::InputFile::Symbol &objSym : obj->symbols()) {
1044 StringRef symName = saver.save(objSym.getName());
1045 int comdatIndex = objSym.getComdatIndex();
1046 Symbol *sym;
1047 SectionChunk *fakeSC = nullptr;
1048 if (objSym.isExecutable())
1049 fakeSC = &ltoTextSectionChunk.chunk;
1050 else
1051 fakeSC = &ltoDataSectionChunk.chunk;
1052 if (objSym.isUndefined()) {
1053 sym = ctx.symtab.addUndefined(symName, this, false);
1054 } else if (objSym.isCommon()) {
1055 sym = ctx.symtab.addCommon(this, symName, objSym.getCommonSize());
1056 } else if (objSym.isWeak() && objSym.isIndirect()) {
1057 // Weak external.
1058 sym = ctx.symtab.addUndefined(symName, this, true);
1059 std::string fallback = std::string(objSym.getCOFFWeakExternalFallback());
1060 Symbol *alias = ctx.symtab.addUndefined(saver.save(fallback));
1061 checkAndSetWeakAlias(&ctx.symtab, this, sym, alias);
1062 } else if (comdatIndex != -1) {
1063 if (symName == obj->getComdatTable()[comdatIndex].first) {
1064 sym = comdat[comdatIndex].first;
1065 if (cast<DefinedRegular>(sym)->data == nullptr)
1066 cast<DefinedRegular>(sym)->data = &fakeSC->repl;
1067 } else if (comdat[comdatIndex].second) {
1068 sym = ctx.symtab.addRegular(this, symName, nullptr, fakeSC);
1069 } else {
1070 sym = ctx.symtab.addUndefined(symName, this, false);
1072 } else {
1073 sym = ctx.symtab.addRegular(this, symName, nullptr, fakeSC);
1075 symbols.push_back(sym);
1076 if (objSym.isUsed())
1077 config->gcroot.push_back(sym);
1079 directives = obj->getCOFFLinkerOpts();
1082 void BitcodeFile::parseLazy() {
1083 for (const lto::InputFile::Symbol &sym : obj->symbols())
1084 if (!sym.isUndefined())
1085 ctx.symtab.addLazyObject(this, sym.getName());
1088 MachineTypes BitcodeFile::getMachineType() {
1089 switch (Triple(obj->getTargetTriple()).getArch()) {
1090 case Triple::x86_64:
1091 return AMD64;
1092 case Triple::x86:
1093 return I386;
1094 case Triple::arm:
1095 return ARMNT;
1096 case Triple::aarch64:
1097 return ARM64;
1098 default:
1099 return IMAGE_FILE_MACHINE_UNKNOWN;
1103 std::string lld::coff::replaceThinLTOSuffix(StringRef path) {
1104 StringRef suffix = config->thinLTOObjectSuffixReplace.first;
1105 StringRef repl = config->thinLTOObjectSuffixReplace.second;
1107 if (path.consume_back(suffix))
1108 return (path + repl).str();
1109 return std::string(path);
1112 static bool isRVACode(COFFObjectFile *coffObj, uint64_t rva, InputFile *file) {
1113 for (size_t i = 1, e = coffObj->getNumberOfSections(); i <= e; i++) {
1114 const coff_section *sec = CHECK(coffObj->getSection(i), file);
1115 if (rva >= sec->VirtualAddress &&
1116 rva <= sec->VirtualAddress + sec->VirtualSize) {
1117 return (sec->Characteristics & COFF::IMAGE_SCN_CNT_CODE) != 0;
1120 return false;
1123 void DLLFile::parse() {
1124 // Parse a memory buffer as a PE-COFF executable.
1125 std::unique_ptr<Binary> bin = CHECK(createBinary(mb), this);
1127 if (auto *obj = dyn_cast<COFFObjectFile>(bin.get())) {
1128 bin.release();
1129 coffObj.reset(obj);
1130 } else {
1131 error(toString(this) + " is not a COFF file");
1132 return;
1135 if (!coffObj->getPE32Header() && !coffObj->getPE32PlusHeader()) {
1136 error(toString(this) + " is not a PE-COFF executable");
1137 return;
1140 for (const auto &exp : coffObj->export_directories()) {
1141 StringRef dllName, symbolName;
1142 uint32_t exportRVA;
1143 checkError(exp.getDllName(dllName));
1144 checkError(exp.getSymbolName(symbolName));
1145 checkError(exp.getExportRVA(exportRVA));
1147 if (symbolName.empty())
1148 continue;
1150 bool code = isRVACode(coffObj.get(), exportRVA, this);
1152 Symbol *s = make<Symbol>();
1153 s->dllName = dllName;
1154 s->symbolName = symbolName;
1155 s->importType = code ? ImportType::IMPORT_CODE : ImportType::IMPORT_DATA;
1156 s->nameType = ImportNameType::IMPORT_NAME;
1158 if (coffObj->getMachine() == I386) {
1159 s->symbolName = symbolName = saver().save("_" + symbolName);
1160 s->nameType = ImportNameType::IMPORT_NAME_NOPREFIX;
1163 StringRef impName = saver().save("__imp_" + symbolName);
1164 ctx.symtab.addLazyDLLSymbol(this, s, impName);
1165 if (code)
1166 ctx.symtab.addLazyDLLSymbol(this, s, symbolName);
1170 MachineTypes DLLFile::getMachineType() {
1171 if (coffObj)
1172 return static_cast<MachineTypes>(coffObj->getMachine());
1173 return IMAGE_FILE_MACHINE_UNKNOWN;
1176 void DLLFile::makeImport(DLLFile::Symbol *s) {
1177 if (!seen.insert(s->symbolName).second)
1178 return;
1180 size_t impSize = s->dllName.size() + s->symbolName.size() + 2; // +2 for NULs
1181 size_t size = sizeof(coff_import_header) + impSize;
1182 char *buf = bAlloc().Allocate<char>(size);
1183 memset(buf, 0, size);
1184 char *p = buf;
1185 auto *imp = reinterpret_cast<coff_import_header *>(p);
1186 p += sizeof(*imp);
1187 imp->Sig2 = 0xFFFF;
1188 imp->Machine = coffObj->getMachine();
1189 imp->SizeOfData = impSize;
1190 imp->OrdinalHint = 0; // Only linking by name
1191 imp->TypeInfo = (s->nameType << 2) | s->importType;
1193 // Write symbol name and DLL name.
1194 memcpy(p, s->symbolName.data(), s->symbolName.size());
1195 p += s->symbolName.size() + 1;
1196 memcpy(p, s->dllName.data(), s->dllName.size());
1197 MemoryBufferRef mbref = MemoryBufferRef(StringRef(buf, size), s->dllName);
1198 ImportFile *impFile = make<ImportFile>(ctx, mbref);
1199 ctx.symtab.addFile(impFile);