[MemProf] Templatize CallStackRadixTreeBuilder (NFC) (#117014)
[llvm-project.git] / lld / wasm / InputFiles.cpp
blobfd06788457966a6e8cd6654ae493a419420c1700
1 //===- InputFiles.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "InputFiles.h"
10 #include "Config.h"
11 #include "InputChunks.h"
12 #include "InputElement.h"
13 #include "OutputSegment.h"
14 #include "SymbolTable.h"
15 #include "lld/Common/Args.h"
16 #include "lld/Common/CommonLinkerContext.h"
17 #include "lld/Common/Reproduce.h"
18 #include "llvm/BinaryFormat/Wasm.h"
19 #include "llvm/Object/Binary.h"
20 #include "llvm/Object/Wasm.h"
21 #include "llvm/ProfileData/InstrProf.h"
22 #include "llvm/Support/Path.h"
23 #include "llvm/Support/TarWriter.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include <optional>
27 #define DEBUG_TYPE "lld"
29 using namespace llvm;
30 using namespace llvm::object;
31 using namespace llvm::wasm;
32 using namespace llvm::sys;
34 namespace lld {
36 // Returns a string in the format of "foo.o" or "foo.a(bar.o)".
37 std::string toString(const wasm::InputFile *file) {
38 if (!file)
39 return "<internal>";
41 if (file->archiveName.empty())
42 return std::string(file->getName());
44 return (file->archiveName + "(" + file->getName() + ")").str();
47 namespace wasm {
49 std::string replaceThinLTOSuffix(StringRef path) {
50 auto [suffix, repl] = config->thinLTOObjectSuffixReplace;
51 if (path.consume_back(suffix))
52 return (path + repl).str();
53 return std::string(path);
56 void InputFile::checkArch(Triple::ArchType arch) const {
57 bool is64 = arch == Triple::wasm64;
58 if (is64 && !config->is64) {
59 fatal(toString(this) +
60 ": must specify -mwasm64 to process wasm64 object files");
61 } else if (config->is64.value_or(false) != is64) {
62 fatal(toString(this) +
63 ": wasm32 object file can't be linked in wasm64 mode");
67 std::unique_ptr<llvm::TarWriter> tar;
69 std::optional<MemoryBufferRef> readFile(StringRef path) {
70 log("Loading: " + path);
72 auto mbOrErr = MemoryBuffer::getFile(path);
73 if (auto ec = mbOrErr.getError()) {
74 error("cannot open " + path + ": " + ec.message());
75 return std::nullopt;
77 std::unique_ptr<MemoryBuffer> &mb = *mbOrErr;
78 MemoryBufferRef mbref = mb->getMemBufferRef();
79 make<std::unique_ptr<MemoryBuffer>>(std::move(mb)); // take MB ownership
81 if (tar)
82 tar->append(relativeToRoot(path), mbref.getBuffer());
83 return mbref;
86 InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName,
87 uint64_t offsetInArchive, bool lazy) {
88 file_magic magic = identify_magic(mb.getBuffer());
89 if (magic == file_magic::wasm_object) {
90 std::unique_ptr<Binary> bin =
91 CHECK(createBinary(mb), mb.getBufferIdentifier());
92 auto *obj = cast<WasmObjectFile>(bin.get());
93 if (obj->hasUnmodeledTypes())
94 fatal(toString(mb.getBufferIdentifier()) +
95 "file has unmodeled reference or GC types");
96 if (obj->isSharedObject())
97 return make<SharedFile>(mb);
98 return make<ObjFile>(mb, archiveName, lazy);
101 assert(magic == file_magic::bitcode);
102 return make<BitcodeFile>(mb, archiveName, offsetInArchive, lazy);
105 // Relocations contain either symbol or type indices. This function takes a
106 // relocation and returns relocated index (i.e. translates from the input
107 // symbol/type space to the output symbol/type space).
108 uint32_t ObjFile::calcNewIndex(const WasmRelocation &reloc) const {
109 if (reloc.Type == R_WASM_TYPE_INDEX_LEB) {
110 assert(typeIsUsed[reloc.Index]);
111 return typeMap[reloc.Index];
113 const Symbol *sym = symbols[reloc.Index];
114 if (auto *ss = dyn_cast<SectionSymbol>(sym))
115 sym = ss->getOutputSectionSymbol();
116 return sym->getOutputSymbolIndex();
119 // Relocations can contain addend for combined sections. This function takes a
120 // relocation and returns updated addend by offset in the output section.
121 int64_t ObjFile::calcNewAddend(const WasmRelocation &reloc) const {
122 switch (reloc.Type) {
123 case R_WASM_MEMORY_ADDR_LEB:
124 case R_WASM_MEMORY_ADDR_LEB64:
125 case R_WASM_MEMORY_ADDR_SLEB64:
126 case R_WASM_MEMORY_ADDR_SLEB:
127 case R_WASM_MEMORY_ADDR_REL_SLEB:
128 case R_WASM_MEMORY_ADDR_REL_SLEB64:
129 case R_WASM_MEMORY_ADDR_I32:
130 case R_WASM_MEMORY_ADDR_I64:
131 case R_WASM_MEMORY_ADDR_TLS_SLEB:
132 case R_WASM_MEMORY_ADDR_TLS_SLEB64:
133 case R_WASM_FUNCTION_OFFSET_I32:
134 case R_WASM_FUNCTION_OFFSET_I64:
135 case R_WASM_MEMORY_ADDR_LOCREL_I32:
136 return reloc.Addend;
137 case R_WASM_SECTION_OFFSET_I32:
138 return getSectionSymbol(reloc.Index)->section->getOffset(reloc.Addend);
139 default:
140 llvm_unreachable("unexpected relocation type");
144 // Translate from the relocation's index into the final linked output value.
145 uint64_t ObjFile::calcNewValue(const WasmRelocation &reloc, uint64_t tombstone,
146 const InputChunk *chunk) const {
147 const Symbol* sym = nullptr;
148 if (reloc.Type != R_WASM_TYPE_INDEX_LEB) {
149 sym = symbols[reloc.Index];
151 // We can end up with relocations against non-live symbols. For example
152 // in debug sections. We return a tombstone value in debug symbol sections
153 // so this will not produce a valid range conflicting with ranges of actual
154 // code. In other sections we return reloc.Addend.
156 if (!isa<SectionSymbol>(sym) && !sym->isLive())
157 return tombstone ? tombstone : reloc.Addend;
160 switch (reloc.Type) {
161 case R_WASM_TABLE_INDEX_I32:
162 case R_WASM_TABLE_INDEX_I64:
163 case R_WASM_TABLE_INDEX_SLEB:
164 case R_WASM_TABLE_INDEX_SLEB64:
165 case R_WASM_TABLE_INDEX_REL_SLEB:
166 case R_WASM_TABLE_INDEX_REL_SLEB64: {
167 if (!getFunctionSymbol(reloc.Index)->hasTableIndex())
168 return 0;
169 uint32_t index = getFunctionSymbol(reloc.Index)->getTableIndex();
170 if (reloc.Type == R_WASM_TABLE_INDEX_REL_SLEB ||
171 reloc.Type == R_WASM_TABLE_INDEX_REL_SLEB64)
172 index -= config->tableBase;
173 return index;
175 case R_WASM_MEMORY_ADDR_LEB:
176 case R_WASM_MEMORY_ADDR_LEB64:
177 case R_WASM_MEMORY_ADDR_SLEB:
178 case R_WASM_MEMORY_ADDR_SLEB64:
179 case R_WASM_MEMORY_ADDR_REL_SLEB:
180 case R_WASM_MEMORY_ADDR_REL_SLEB64:
181 case R_WASM_MEMORY_ADDR_I32:
182 case R_WASM_MEMORY_ADDR_I64:
183 case R_WASM_MEMORY_ADDR_TLS_SLEB:
184 case R_WASM_MEMORY_ADDR_TLS_SLEB64:
185 case R_WASM_MEMORY_ADDR_LOCREL_I32: {
186 if (isa<UndefinedData>(sym) || sym->isShared() || sym->isUndefWeak())
187 return 0;
188 auto D = cast<DefinedData>(sym);
189 uint64_t value = D->getVA() + reloc.Addend;
190 if (reloc.Type == R_WASM_MEMORY_ADDR_LOCREL_I32) {
191 const auto *segment = cast<InputSegment>(chunk);
192 uint64_t p = segment->outputSeg->startVA + segment->outputSegmentOffset +
193 reloc.Offset - segment->getInputSectionOffset();
194 value -= p;
196 return value;
198 case R_WASM_TYPE_INDEX_LEB:
199 return typeMap[reloc.Index];
200 case R_WASM_FUNCTION_INDEX_LEB:
201 case R_WASM_FUNCTION_INDEX_I32:
202 return getFunctionSymbol(reloc.Index)->getFunctionIndex();
203 case R_WASM_GLOBAL_INDEX_LEB:
204 case R_WASM_GLOBAL_INDEX_I32:
205 if (auto gs = dyn_cast<GlobalSymbol>(sym))
206 return gs->getGlobalIndex();
207 return sym->getGOTIndex();
208 case R_WASM_TAG_INDEX_LEB:
209 return getTagSymbol(reloc.Index)->getTagIndex();
210 case R_WASM_FUNCTION_OFFSET_I32:
211 case R_WASM_FUNCTION_OFFSET_I64: {
212 if (isa<UndefinedFunction>(sym)) {
213 return tombstone ? tombstone : reloc.Addend;
215 auto *f = cast<DefinedFunction>(sym);
216 return f->function->getOffset(f->function->getFunctionCodeOffset() +
217 reloc.Addend);
219 case R_WASM_SECTION_OFFSET_I32:
220 return getSectionSymbol(reloc.Index)->section->getOffset(reloc.Addend);
221 case R_WASM_TABLE_NUMBER_LEB:
222 return getTableSymbol(reloc.Index)->getTableNumber();
223 default:
224 llvm_unreachable("unknown relocation type");
228 template <class T>
229 static void setRelocs(const std::vector<T *> &chunks,
230 const WasmSection *section) {
231 if (!section)
232 return;
234 ArrayRef<WasmRelocation> relocs = section->Relocations;
235 assert(llvm::is_sorted(
236 relocs, [](const WasmRelocation &r1, const WasmRelocation &r2) {
237 return r1.Offset < r2.Offset;
238 }));
239 assert(llvm::is_sorted(chunks, [](InputChunk *c1, InputChunk *c2) {
240 return c1->getInputSectionOffset() < c2->getInputSectionOffset();
241 }));
243 auto relocsNext = relocs.begin();
244 auto relocsEnd = relocs.end();
245 auto relocLess = [](const WasmRelocation &r, uint32_t val) {
246 return r.Offset < val;
248 for (InputChunk *c : chunks) {
249 auto relocsStart = std::lower_bound(relocsNext, relocsEnd,
250 c->getInputSectionOffset(), relocLess);
251 relocsNext = std::lower_bound(
252 relocsStart, relocsEnd, c->getInputSectionOffset() + c->getInputSize(),
253 relocLess);
254 c->setRelocations(ArrayRef<WasmRelocation>(relocsStart, relocsNext));
258 // An object file can have two approaches to tables. With the reference-types
259 // feature enabled, input files that define or use tables declare the tables
260 // using symbols, and record each use with a relocation. This way when the
261 // linker combines inputs, it can collate the tables used by the inputs,
262 // assigning them distinct table numbers, and renumber all the uses as
263 // appropriate. At the same time, the linker has special logic to build the
264 // indirect function table if it is needed.
266 // However, MVP object files (those that target WebAssembly 1.0, the "minimum
267 // viable product" version of WebAssembly) neither write table symbols nor
268 // record relocations. These files can have at most one table, the indirect
269 // function table used by call_indirect and which is the address space for
270 // function pointers. If this table is present, it is always an import. If we
271 // have a file with a table import but no table symbols, it is an MVP object
272 // file. synthesizeMVPIndirectFunctionTableSymbolIfNeeded serves as a shim when
273 // loading these input files, defining the missing symbol to allow the indirect
274 // function table to be built.
276 // As indirect function table table usage in MVP objects cannot be relocated,
277 // the linker must ensure that this table gets assigned index zero.
278 void ObjFile::addLegacyIndirectFunctionTableIfNeeded(
279 uint32_t tableSymbolCount) {
280 uint32_t tableCount = wasmObj->getNumImportedTables() + tables.size();
282 // If there are symbols for all tables, then all is good.
283 if (tableCount == tableSymbolCount)
284 return;
286 // It's possible for an input to define tables and also use the indirect
287 // function table, but forget to compile with -mattr=+reference-types.
288 // For these newer files, we require symbols for all tables, and
289 // relocations for all of their uses.
290 if (tableSymbolCount != 0) {
291 error(toString(this) +
292 ": expected one symbol table entry for each of the " +
293 Twine(tableCount) + " table(s) present, but got " +
294 Twine(tableSymbolCount) + " symbol(s) instead.");
295 return;
298 // An MVP object file can have up to one table import, for the indirect
299 // function table, but will have no table definitions.
300 if (tables.size()) {
301 error(toString(this) +
302 ": unexpected table definition(s) without corresponding "
303 "symbol-table entries.");
304 return;
307 // An MVP object file can have only one table import.
308 if (tableCount != 1) {
309 error(toString(this) +
310 ": multiple table imports, but no corresponding symbol-table "
311 "entries.");
312 return;
315 const WasmImport *tableImport = nullptr;
316 for (const auto &import : wasmObj->imports()) {
317 if (import.Kind == WASM_EXTERNAL_TABLE) {
318 assert(!tableImport);
319 tableImport = &import;
322 assert(tableImport);
324 // We can only synthesize a symtab entry for the indirect function table; if
325 // it has an unexpected name or type, assume that it's not actually the
326 // indirect function table.
327 if (tableImport->Field != functionTableName ||
328 tableImport->Table.ElemType != ValType::FUNCREF) {
329 error(toString(this) + ": table import " + Twine(tableImport->Field) +
330 " is missing a symbol table entry.");
331 return;
334 WasmSymbolInfo info;
335 info.Name = tableImport->Field;
336 info.Kind = WASM_SYMBOL_TYPE_TABLE;
337 info.ImportModule = tableImport->Module;
338 info.ImportName = tableImport->Field;
339 info.Flags = WASM_SYMBOL_UNDEFINED | WASM_SYMBOL_NO_STRIP;
340 info.ElementIndex = 0;
341 LLVM_DEBUG(dbgs() << "Synthesizing symbol for table import: " << info.Name
342 << "\n");
343 const WasmGlobalType *globalType = nullptr;
344 const WasmSignature *signature = nullptr;
345 auto *wasmSym =
346 make<WasmSymbol>(info, globalType, &tableImport->Table, signature);
347 Symbol *sym = createUndefined(*wasmSym, false);
348 // We're only sure it's a TableSymbol if the createUndefined succeeded.
349 if (errorCount())
350 return;
351 symbols.push_back(sym);
352 // Because there are no TABLE_NUMBER relocs, we can't compute accurate
353 // liveness info; instead, just mark the symbol as always live.
354 sym->markLive();
356 // We assume that this compilation unit has unrelocatable references to
357 // this table.
358 ctx.legacyFunctionTable = true;
361 static bool shouldMerge(const WasmSection &sec) {
362 if (config->optimize == 0)
363 return false;
364 // Sadly we don't have section attributes yet for custom sections, so we
365 // currently go by the name alone.
366 // TODO(sbc): Add ability for wasm sections to carry flags so we don't
367 // need to use names here.
368 // For now, keep in sync with uses of wasm::WASM_SEG_FLAG_STRINGS in
369 // MCObjectFileInfo::initWasmMCObjectFileInfo which creates these custom
370 // sections.
371 return sec.Name == ".debug_str" || sec.Name == ".debug_str.dwo" ||
372 sec.Name == ".debug_line_str";
375 static bool shouldMerge(const WasmSegment &seg) {
376 // As of now we only support merging strings, and only with single byte
377 // alignment (2^0).
378 if (!(seg.Data.LinkingFlags & WASM_SEG_FLAG_STRINGS) ||
379 (seg.Data.Alignment != 0))
380 return false;
382 // On a regular link we don't merge sections if -O0 (default is -O1). This
383 // sometimes makes the linker significantly faster, although the output will
384 // be bigger.
385 if (config->optimize == 0)
386 return false;
388 // A mergeable section with size 0 is useless because they don't have
389 // any data to merge. A mergeable string section with size 0 can be
390 // argued as invalid because it doesn't end with a null character.
391 // We'll avoid a mess by handling them as if they were non-mergeable.
392 if (seg.Data.Content.size() == 0)
393 return false;
395 return true;
398 void ObjFile::parseLazy() {
399 LLVM_DEBUG(dbgs() << "ObjFile::parseLazy: " << toString(this) << " "
400 << wasmObj.get() << "\n");
401 for (const SymbolRef &sym : wasmObj->symbols()) {
402 const WasmSymbol &wasmSym = wasmObj->getWasmSymbol(sym.getRawDataRefImpl());
403 if (wasmSym.isUndefined() || wasmSym.isBindingLocal())
404 continue;
405 symtab->addLazy(wasmSym.Info.Name, this);
406 // addLazy() may trigger this->extract() if an existing symbol is an
407 // undefined symbol. If that happens, this function has served its purpose,
408 // and we can exit from the loop early.
409 if (!lazy)
410 break;
414 ObjFile::ObjFile(MemoryBufferRef m, StringRef archiveName, bool lazy)
415 : WasmFileBase(ObjectKind, m) {
416 this->lazy = lazy;
417 this->archiveName = std::string(archiveName);
419 // Currently we only do this check for regular object file, and not for shared
420 // object files. This is because architecture detection for shared objects is
421 // currently based on a heuristic, which is fallable:
422 // https://github.com/llvm/llvm-project/issues/98778
423 checkArch(wasmObj->getArch());
425 // If this isn't part of an archive, it's eagerly linked, so mark it live.
426 if (archiveName.empty())
427 markLive();
430 void SharedFile::parse() {
431 assert(wasmObj->isSharedObject());
433 for (const SymbolRef &sym : wasmObj->symbols()) {
434 const WasmSymbol &wasmSym = wasmObj->getWasmSymbol(sym.getRawDataRefImpl());
435 if (wasmSym.isDefined()) {
436 StringRef name = wasmSym.Info.Name;
437 // Certain shared library exports are known to be DSO-local so we
438 // don't want to add them to the symbol table.
439 // TODO(sbc): Instead of hardcoding these here perhaps we could add
440 // this as extra metadata in the `dylink` section.
441 if (name == "__wasm_apply_data_relocs" || name == "__wasm_call_ctors" ||
442 name.starts_with("__start_") || name.starts_with("__stop_"))
443 continue;
444 uint32_t flags = wasmSym.Info.Flags;
445 Symbol *s;
446 LLVM_DEBUG(dbgs() << "shared symbol: " << name << "\n");
447 switch (wasmSym.Info.Kind) {
448 case WASM_SYMBOL_TYPE_FUNCTION:
449 s = symtab->addSharedFunction(name, flags, this, wasmSym.Signature);
450 break;
451 case WASM_SYMBOL_TYPE_DATA:
452 s = symtab->addSharedData(name, flags, this);
453 break;
454 default:
455 continue;
457 symbols.push_back(s);
462 // Returns the alignment for a custom section. This is used to concatenate
463 // custom sections with the same name into a single custom section.
464 static uint32_t getCustomSectionAlignment(const WasmSection &sec) {
465 // TODO: Add a section attribute for alignment in the linking spec.
466 if (sec.Name == getInstrProfSectionName(IPSK_covfun, Triple::Wasm) ||
467 sec.Name == getInstrProfSectionName(IPSK_covmap, Triple::Wasm)) {
468 // llvm-cov assumes that coverage metadata sections are 8-byte aligned.
469 return 8;
471 return 1;
474 WasmFileBase::WasmFileBase(Kind k, MemoryBufferRef m) : InputFile(k, m) {
475 // Parse a memory buffer as a wasm file.
476 LLVM_DEBUG(dbgs() << "Reading object: " << toString(this) << "\n");
477 std::unique_ptr<Binary> bin = CHECK(createBinary(mb), toString(this));
479 auto *obj = dyn_cast<WasmObjectFile>(bin.get());
480 if (!obj)
481 fatal(toString(this) + ": not a wasm file");
483 bin.release();
484 wasmObj.reset(obj);
487 void ObjFile::parse(bool ignoreComdats) {
488 // Parse a memory buffer as a wasm file.
489 LLVM_DEBUG(dbgs() << "ObjFile::parse: " << toString(this) << "\n");
491 if (!wasmObj->isRelocatableObject())
492 fatal(toString(this) + ": not a relocatable wasm file");
494 // Build up a map of function indices to table indices for use when
495 // verifying the existing table index relocations
496 uint32_t totalFunctions =
497 wasmObj->getNumImportedFunctions() + wasmObj->functions().size();
498 tableEntriesRel.resize(totalFunctions);
499 tableEntries.resize(totalFunctions);
500 for (const WasmElemSegment &seg : wasmObj->elements()) {
501 int64_t offset;
502 if (seg.Offset.Extended)
503 fatal(toString(this) + ": extended init exprs not supported");
504 else if (seg.Offset.Inst.Opcode == WASM_OPCODE_I32_CONST)
505 offset = seg.Offset.Inst.Value.Int32;
506 else if (seg.Offset.Inst.Opcode == WASM_OPCODE_I64_CONST)
507 offset = seg.Offset.Inst.Value.Int64;
508 else
509 fatal(toString(this) + ": invalid table elements");
510 for (size_t index = 0; index < seg.Functions.size(); index++) {
511 auto functionIndex = seg.Functions[index];
512 tableEntriesRel[functionIndex] = index;
513 tableEntries[functionIndex] = offset + index;
517 ArrayRef<StringRef> comdats = wasmObj->linkingData().Comdats;
518 for (StringRef comdat : comdats) {
519 bool isNew = ignoreComdats || symtab->addComdat(comdat);
520 keptComdats.push_back(isNew);
523 uint32_t sectionIndex = 0;
525 // Bool for each symbol, true if called directly. This allows us to implement
526 // a weaker form of signature checking where undefined functions that are not
527 // called directly (i.e. only address taken) don't have to match the defined
528 // function's signature. We cannot do this for directly called functions
529 // because those signatures are checked at validation times.
530 // See https://github.com/llvm/llvm-project/issues/39758
531 std::vector<bool> isCalledDirectly(wasmObj->getNumberOfSymbols(), false);
532 for (const SectionRef &sec : wasmObj->sections()) {
533 const WasmSection &section = wasmObj->getWasmSection(sec);
534 // Wasm objects can have at most one code and one data section.
535 if (section.Type == WASM_SEC_CODE) {
536 assert(!codeSection);
537 codeSection = &section;
538 } else if (section.Type == WASM_SEC_DATA) {
539 assert(!dataSection);
540 dataSection = &section;
541 } else if (section.Type == WASM_SEC_CUSTOM) {
542 InputChunk *customSec;
543 uint32_t alignment = getCustomSectionAlignment(section);
544 if (shouldMerge(section))
545 customSec = make<MergeInputChunk>(section, this, alignment);
546 else
547 customSec = make<InputSection>(section, this, alignment);
548 customSec->discarded = isExcludedByComdat(customSec);
549 customSections.emplace_back(customSec);
550 customSections.back()->setRelocations(section.Relocations);
551 customSectionsByIndex[sectionIndex] = customSections.back();
553 sectionIndex++;
554 // Scans relocations to determine if a function symbol is called directly.
555 for (const WasmRelocation &reloc : section.Relocations)
556 if (reloc.Type == R_WASM_FUNCTION_INDEX_LEB)
557 isCalledDirectly[reloc.Index] = true;
560 typeMap.resize(getWasmObj()->types().size());
561 typeIsUsed.resize(getWasmObj()->types().size(), false);
564 // Populate `Segments`.
565 for (const WasmSegment &s : wasmObj->dataSegments()) {
566 InputChunk *seg;
567 if (shouldMerge(s))
568 seg = make<MergeInputChunk>(s, this);
569 else
570 seg = make<InputSegment>(s, this);
571 seg->discarded = isExcludedByComdat(seg);
572 // Older object files did not include WASM_SEG_FLAG_TLS and instead
573 // relied on the naming convention. To maintain compat with such objects
574 // we still imply the TLS flag based on the name of the segment.
575 if (!seg->isTLS() &&
576 (seg->name.starts_with(".tdata") || seg->name.starts_with(".tbss")))
577 seg->flags |= WASM_SEG_FLAG_TLS;
578 segments.emplace_back(seg);
580 setRelocs(segments, dataSection);
582 // Populate `Functions`.
583 ArrayRef<WasmFunction> funcs = wasmObj->functions();
584 ArrayRef<WasmSignature> types = wasmObj->types();
585 functions.reserve(funcs.size());
587 for (auto &f : funcs) {
588 auto *func = make<InputFunction>(types[f.SigIndex], &f, this);
589 func->discarded = isExcludedByComdat(func);
590 functions.emplace_back(func);
592 setRelocs(functions, codeSection);
594 // Populate `Tables`.
595 for (const WasmTable &t : wasmObj->tables())
596 tables.emplace_back(make<InputTable>(t, this));
598 // Populate `Globals`.
599 for (const WasmGlobal &g : wasmObj->globals())
600 globals.emplace_back(make<InputGlobal>(g, this));
602 // Populate `Tags`.
603 for (const WasmTag &t : wasmObj->tags())
604 tags.emplace_back(make<InputTag>(types[t.SigIndex], t, this));
606 // Populate `Symbols` based on the symbols in the object.
607 symbols.reserve(wasmObj->getNumberOfSymbols());
608 uint32_t tableSymbolCount = 0;
609 for (const SymbolRef &sym : wasmObj->symbols()) {
610 const WasmSymbol &wasmSym = wasmObj->getWasmSymbol(sym.getRawDataRefImpl());
611 if (wasmSym.isTypeTable())
612 tableSymbolCount++;
613 if (wasmSym.isDefined()) {
614 // createDefined may fail if the symbol is comdat excluded in which case
615 // we fall back to creating an undefined symbol
616 if (Symbol *d = createDefined(wasmSym)) {
617 symbols.push_back(d);
618 continue;
621 size_t idx = symbols.size();
622 symbols.push_back(createUndefined(wasmSym, isCalledDirectly[idx]));
625 addLegacyIndirectFunctionTableIfNeeded(tableSymbolCount);
628 bool ObjFile::isExcludedByComdat(const InputChunk *chunk) const {
629 uint32_t c = chunk->getComdat();
630 if (c == UINT32_MAX)
631 return false;
632 return !keptComdats[c];
635 FunctionSymbol *ObjFile::getFunctionSymbol(uint32_t index) const {
636 return cast<FunctionSymbol>(symbols[index]);
639 GlobalSymbol *ObjFile::getGlobalSymbol(uint32_t index) const {
640 return cast<GlobalSymbol>(symbols[index]);
643 TagSymbol *ObjFile::getTagSymbol(uint32_t index) const {
644 return cast<TagSymbol>(symbols[index]);
647 TableSymbol *ObjFile::getTableSymbol(uint32_t index) const {
648 return cast<TableSymbol>(symbols[index]);
651 SectionSymbol *ObjFile::getSectionSymbol(uint32_t index) const {
652 return cast<SectionSymbol>(symbols[index]);
655 DataSymbol *ObjFile::getDataSymbol(uint32_t index) const {
656 return cast<DataSymbol>(symbols[index]);
659 Symbol *ObjFile::createDefined(const WasmSymbol &sym) {
660 StringRef name = sym.Info.Name;
661 uint32_t flags = sym.Info.Flags;
663 switch (sym.Info.Kind) {
664 case WASM_SYMBOL_TYPE_FUNCTION: {
665 InputFunction *func =
666 functions[sym.Info.ElementIndex - wasmObj->getNumImportedFunctions()];
667 if (sym.isBindingLocal())
668 return make<DefinedFunction>(name, flags, this, func);
669 if (func->discarded)
670 return nullptr;
671 return symtab->addDefinedFunction(name, flags, this, func);
673 case WASM_SYMBOL_TYPE_DATA: {
674 InputChunk *seg = segments[sym.Info.DataRef.Segment];
675 auto offset = sym.Info.DataRef.Offset;
676 auto size = sym.Info.DataRef.Size;
677 // Support older (e.g. llvm 13) object files that pre-date the per-symbol
678 // TLS flag, and symbols were assumed to be TLS by being defined in a TLS
679 // segment.
680 if (!(flags & WASM_SYMBOL_TLS) && seg->isTLS())
681 flags |= WASM_SYMBOL_TLS;
682 if (sym.isBindingLocal())
683 return make<DefinedData>(name, flags, this, seg, offset, size);
684 if (seg->discarded)
685 return nullptr;
686 return symtab->addDefinedData(name, flags, this, seg, offset, size);
688 case WASM_SYMBOL_TYPE_GLOBAL: {
689 InputGlobal *global =
690 globals[sym.Info.ElementIndex - wasmObj->getNumImportedGlobals()];
691 if (sym.isBindingLocal())
692 return make<DefinedGlobal>(name, flags, this, global);
693 return symtab->addDefinedGlobal(name, flags, this, global);
695 case WASM_SYMBOL_TYPE_SECTION: {
696 InputChunk *section = customSectionsByIndex[sym.Info.ElementIndex];
697 assert(sym.isBindingLocal());
698 // Need to return null if discarded here? data and func only do that when
699 // binding is not local.
700 if (section->discarded)
701 return nullptr;
702 return make<SectionSymbol>(flags, section, this);
704 case WASM_SYMBOL_TYPE_TAG: {
705 InputTag *tag = tags[sym.Info.ElementIndex - wasmObj->getNumImportedTags()];
706 if (sym.isBindingLocal())
707 return make<DefinedTag>(name, flags, this, tag);
708 return symtab->addDefinedTag(name, flags, this, tag);
710 case WASM_SYMBOL_TYPE_TABLE: {
711 InputTable *table =
712 tables[sym.Info.ElementIndex - wasmObj->getNumImportedTables()];
713 if (sym.isBindingLocal())
714 return make<DefinedTable>(name, flags, this, table);
715 return symtab->addDefinedTable(name, flags, this, table);
718 llvm_unreachable("unknown symbol kind");
721 Symbol *ObjFile::createUndefined(const WasmSymbol &sym, bool isCalledDirectly) {
722 StringRef name = sym.Info.Name;
723 uint32_t flags = sym.Info.Flags | WASM_SYMBOL_UNDEFINED;
725 switch (sym.Info.Kind) {
726 case WASM_SYMBOL_TYPE_FUNCTION:
727 if (sym.isBindingLocal())
728 return make<UndefinedFunction>(name, sym.Info.ImportName,
729 sym.Info.ImportModule, flags, this,
730 sym.Signature, isCalledDirectly);
731 return symtab->addUndefinedFunction(name, sym.Info.ImportName,
732 sym.Info.ImportModule, flags, this,
733 sym.Signature, isCalledDirectly);
734 case WASM_SYMBOL_TYPE_DATA:
735 if (sym.isBindingLocal())
736 return make<UndefinedData>(name, flags, this);
737 return symtab->addUndefinedData(name, flags, this);
738 case WASM_SYMBOL_TYPE_GLOBAL:
739 if (sym.isBindingLocal())
740 return make<UndefinedGlobal>(name, sym.Info.ImportName,
741 sym.Info.ImportModule, flags, this,
742 sym.GlobalType);
743 return symtab->addUndefinedGlobal(name, sym.Info.ImportName,
744 sym.Info.ImportModule, flags, this,
745 sym.GlobalType);
746 case WASM_SYMBOL_TYPE_TABLE:
747 if (sym.isBindingLocal())
748 return make<UndefinedTable>(name, sym.Info.ImportName,
749 sym.Info.ImportModule, flags, this,
750 sym.TableType);
751 return symtab->addUndefinedTable(name, sym.Info.ImportName,
752 sym.Info.ImportModule, flags, this,
753 sym.TableType);
754 case WASM_SYMBOL_TYPE_TAG:
755 if (sym.isBindingLocal())
756 return make<UndefinedTag>(name, sym.Info.ImportName,
757 sym.Info.ImportModule, flags, this,
758 sym.Signature);
759 return symtab->addUndefinedTag(name, sym.Info.ImportName,
760 sym.Info.ImportModule, flags, this,
761 sym.Signature);
762 case WASM_SYMBOL_TYPE_SECTION:
763 llvm_unreachable("section symbols cannot be undefined");
765 llvm_unreachable("unknown symbol kind");
768 static StringRef strip(StringRef s) { return s.trim(' '); }
770 void StubFile::parse() {
771 bool first = true;
773 SmallVector<StringRef> lines;
774 mb.getBuffer().split(lines, '\n');
775 for (StringRef line : lines) {
776 line = line.trim();
778 // File must begin with #STUB
779 if (first) {
780 assert(line == "#STUB");
781 first = false;
784 // Lines starting with # are considered comments
785 if (line.starts_with("#") || !line.size())
786 continue;
788 StringRef sym;
789 StringRef rest;
790 std::tie(sym, rest) = line.split(':');
791 sym = strip(sym);
792 rest = strip(rest);
794 symbolDependencies[sym] = {};
796 while (rest.size()) {
797 StringRef dep;
798 std::tie(dep, rest) = rest.split(',');
799 dep = strip(dep);
800 symbolDependencies[sym].push_back(dep);
805 static uint8_t mapVisibility(GlobalValue::VisibilityTypes gvVisibility) {
806 switch (gvVisibility) {
807 case GlobalValue::DefaultVisibility:
808 return WASM_SYMBOL_VISIBILITY_DEFAULT;
809 case GlobalValue::HiddenVisibility:
810 case GlobalValue::ProtectedVisibility:
811 return WASM_SYMBOL_VISIBILITY_HIDDEN;
813 llvm_unreachable("unknown visibility");
816 static Symbol *createBitcodeSymbol(const std::vector<bool> &keptComdats,
817 const lto::InputFile::Symbol &objSym,
818 BitcodeFile &f) {
819 StringRef name = saver().save(objSym.getName());
821 uint32_t flags = objSym.isWeak() ? WASM_SYMBOL_BINDING_WEAK : 0;
822 flags |= mapVisibility(objSym.getVisibility());
824 int c = objSym.getComdatIndex();
825 bool excludedByComdat = c != -1 && !keptComdats[c];
827 if (objSym.isUndefined() || excludedByComdat) {
828 flags |= WASM_SYMBOL_UNDEFINED;
829 if (objSym.isExecutable())
830 return symtab->addUndefinedFunction(name, std::nullopt, std::nullopt,
831 flags, &f, nullptr, true);
832 return symtab->addUndefinedData(name, flags, &f);
835 if (objSym.isExecutable())
836 return symtab->addDefinedFunction(name, flags, &f, nullptr);
837 return symtab->addDefinedData(name, flags, &f, nullptr, 0, 0);
840 BitcodeFile::BitcodeFile(MemoryBufferRef m, StringRef archiveName,
841 uint64_t offsetInArchive, bool lazy)
842 : InputFile(BitcodeKind, m) {
843 this->lazy = lazy;
844 this->archiveName = std::string(archiveName);
846 std::string path = mb.getBufferIdentifier().str();
847 if (config->thinLTOIndexOnly)
848 path = replaceThinLTOSuffix(mb.getBufferIdentifier());
850 // ThinLTO assumes that all MemoryBufferRefs given to it have a unique
851 // name. If two archives define two members with the same name, this
852 // causes a collision which result in only one of the objects being taken
853 // into consideration at LTO time (which very likely causes undefined
854 // symbols later in the link stage). So we append file offset to make
855 // filename unique.
856 StringRef name = archiveName.empty()
857 ? saver().save(path)
858 : saver().save(archiveName + "(" + path::filename(path) +
859 " at " + utostr(offsetInArchive) + ")");
860 MemoryBufferRef mbref(mb.getBuffer(), name);
862 obj = check(lto::InputFile::create(mbref));
864 // If this isn't part of an archive, it's eagerly linked, so mark it live.
865 if (archiveName.empty())
866 markLive();
869 bool BitcodeFile::doneLTO = false;
871 void BitcodeFile::parseLazy() {
872 for (auto [i, irSym] : llvm::enumerate(obj->symbols())) {
873 if (irSym.isUndefined())
874 continue;
875 StringRef name = saver().save(irSym.getName());
876 symtab->addLazy(name, this);
877 // addLazy() may trigger this->extract() if an existing symbol is an
878 // undefined symbol. If that happens, this function has served its purpose,
879 // and we can exit from the loop early.
880 if (!lazy)
881 break;
885 void BitcodeFile::parse(StringRef symName) {
886 if (doneLTO) {
887 error(toString(this) + ": attempt to add bitcode file after LTO (" + symName + ")");
888 return;
891 Triple t(obj->getTargetTriple());
892 if (!t.isWasm()) {
893 error(toString(this) + ": machine type must be wasm32 or wasm64");
894 return;
896 checkArch(t.getArch());
897 std::vector<bool> keptComdats;
898 // TODO Support nodeduplicate
899 // https://github.com/llvm/llvm-project/issues/49875
900 for (std::pair<StringRef, Comdat::SelectionKind> s : obj->getComdatTable())
901 keptComdats.push_back(symtab->addComdat(s.first));
903 for (const lto::InputFile::Symbol &objSym : obj->symbols())
904 symbols.push_back(createBitcodeSymbol(keptComdats, objSym, *this));
907 } // namespace wasm
908 } // namespace lld