1 //===- InputFiles.cpp -----------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "InputFiles.h"
11 #include "InputChunks.h"
12 #include "InputElement.h"
13 #include "OutputSegment.h"
14 #include "SymbolTable.h"
15 #include "lld/Common/Args.h"
16 #include "lld/Common/CommonLinkerContext.h"
17 #include "lld/Common/Reproduce.h"
18 #include "llvm/BinaryFormat/Wasm.h"
19 #include "llvm/Object/Binary.h"
20 #include "llvm/Object/Wasm.h"
21 #include "llvm/ProfileData/InstrProf.h"
22 #include "llvm/Support/Path.h"
23 #include "llvm/Support/TarWriter.h"
24 #include "llvm/Support/raw_ostream.h"
27 #define DEBUG_TYPE "lld"
30 using namespace llvm::object
;
31 using namespace llvm::wasm
;
32 using namespace llvm::sys
;
36 // Returns a string in the format of "foo.o" or "foo.a(bar.o)".
37 std::string
toString(const wasm::InputFile
*file
) {
41 if (file
->archiveName
.empty())
42 return std::string(file
->getName());
44 return (file
->archiveName
+ "(" + file
->getName() + ")").str();
49 std::string
replaceThinLTOSuffix(StringRef path
) {
50 auto [suffix
, repl
] = config
->thinLTOObjectSuffixReplace
;
51 if (path
.consume_back(suffix
))
52 return (path
+ repl
).str();
53 return std::string(path
);
56 void InputFile::checkArch(Triple::ArchType arch
) const {
57 bool is64
= arch
== Triple::wasm64
;
58 if (is64
&& !config
->is64
) {
59 fatal(toString(this) +
60 ": must specify -mwasm64 to process wasm64 object files");
61 } else if (config
->is64
.value_or(false) != is64
) {
62 fatal(toString(this) +
63 ": wasm32 object file can't be linked in wasm64 mode");
67 std::unique_ptr
<llvm::TarWriter
> tar
;
69 std::optional
<MemoryBufferRef
> readFile(StringRef path
) {
70 log("Loading: " + path
);
72 auto mbOrErr
= MemoryBuffer::getFile(path
);
73 if (auto ec
= mbOrErr
.getError()) {
74 error("cannot open " + path
+ ": " + ec
.message());
77 std::unique_ptr
<MemoryBuffer
> &mb
= *mbOrErr
;
78 MemoryBufferRef mbref
= mb
->getMemBufferRef();
79 make
<std::unique_ptr
<MemoryBuffer
>>(std::move(mb
)); // take MB ownership
82 tar
->append(relativeToRoot(path
), mbref
.getBuffer());
86 InputFile
*createObjectFile(MemoryBufferRef mb
, StringRef archiveName
,
87 uint64_t offsetInArchive
, bool lazy
) {
88 file_magic magic
= identify_magic(mb
.getBuffer());
89 if (magic
== file_magic::wasm_object
) {
90 std::unique_ptr
<Binary
> bin
=
91 CHECK(createBinary(mb
), mb
.getBufferIdentifier());
92 auto *obj
= cast
<WasmObjectFile
>(bin
.get());
93 if (obj
->hasUnmodeledTypes())
94 fatal(toString(mb
.getBufferIdentifier()) +
95 "file has unmodeled reference or GC types");
96 if (obj
->isSharedObject())
97 return make
<SharedFile
>(mb
);
98 return make
<ObjFile
>(mb
, archiveName
, lazy
);
101 assert(magic
== file_magic::bitcode
);
102 return make
<BitcodeFile
>(mb
, archiveName
, offsetInArchive
, lazy
);
105 // Relocations contain either symbol or type indices. This function takes a
106 // relocation and returns relocated index (i.e. translates from the input
107 // symbol/type space to the output symbol/type space).
108 uint32_t ObjFile::calcNewIndex(const WasmRelocation
&reloc
) const {
109 if (reloc
.Type
== R_WASM_TYPE_INDEX_LEB
) {
110 assert(typeIsUsed
[reloc
.Index
]);
111 return typeMap
[reloc
.Index
];
113 const Symbol
*sym
= symbols
[reloc
.Index
];
114 if (auto *ss
= dyn_cast
<SectionSymbol
>(sym
))
115 sym
= ss
->getOutputSectionSymbol();
116 return sym
->getOutputSymbolIndex();
119 // Relocations can contain addend for combined sections. This function takes a
120 // relocation and returns updated addend by offset in the output section.
121 int64_t ObjFile::calcNewAddend(const WasmRelocation
&reloc
) const {
122 switch (reloc
.Type
) {
123 case R_WASM_MEMORY_ADDR_LEB
:
124 case R_WASM_MEMORY_ADDR_LEB64
:
125 case R_WASM_MEMORY_ADDR_SLEB64
:
126 case R_WASM_MEMORY_ADDR_SLEB
:
127 case R_WASM_MEMORY_ADDR_REL_SLEB
:
128 case R_WASM_MEMORY_ADDR_REL_SLEB64
:
129 case R_WASM_MEMORY_ADDR_I32
:
130 case R_WASM_MEMORY_ADDR_I64
:
131 case R_WASM_MEMORY_ADDR_TLS_SLEB
:
132 case R_WASM_MEMORY_ADDR_TLS_SLEB64
:
133 case R_WASM_FUNCTION_OFFSET_I32
:
134 case R_WASM_FUNCTION_OFFSET_I64
:
135 case R_WASM_MEMORY_ADDR_LOCREL_I32
:
137 case R_WASM_SECTION_OFFSET_I32
:
138 return getSectionSymbol(reloc
.Index
)->section
->getOffset(reloc
.Addend
);
140 llvm_unreachable("unexpected relocation type");
144 // Translate from the relocation's index into the final linked output value.
145 uint64_t ObjFile::calcNewValue(const WasmRelocation
&reloc
, uint64_t tombstone
,
146 const InputChunk
*chunk
) const {
147 const Symbol
* sym
= nullptr;
148 if (reloc
.Type
!= R_WASM_TYPE_INDEX_LEB
) {
149 sym
= symbols
[reloc
.Index
];
151 // We can end up with relocations against non-live symbols. For example
152 // in debug sections. We return a tombstone value in debug symbol sections
153 // so this will not produce a valid range conflicting with ranges of actual
154 // code. In other sections we return reloc.Addend.
156 if (!isa
<SectionSymbol
>(sym
) && !sym
->isLive())
157 return tombstone
? tombstone
: reloc
.Addend
;
160 switch (reloc
.Type
) {
161 case R_WASM_TABLE_INDEX_I32
:
162 case R_WASM_TABLE_INDEX_I64
:
163 case R_WASM_TABLE_INDEX_SLEB
:
164 case R_WASM_TABLE_INDEX_SLEB64
:
165 case R_WASM_TABLE_INDEX_REL_SLEB
:
166 case R_WASM_TABLE_INDEX_REL_SLEB64
: {
167 if (!getFunctionSymbol(reloc
.Index
)->hasTableIndex())
169 uint32_t index
= getFunctionSymbol(reloc
.Index
)->getTableIndex();
170 if (reloc
.Type
== R_WASM_TABLE_INDEX_REL_SLEB
||
171 reloc
.Type
== R_WASM_TABLE_INDEX_REL_SLEB64
)
172 index
-= config
->tableBase
;
175 case R_WASM_MEMORY_ADDR_LEB
:
176 case R_WASM_MEMORY_ADDR_LEB64
:
177 case R_WASM_MEMORY_ADDR_SLEB
:
178 case R_WASM_MEMORY_ADDR_SLEB64
:
179 case R_WASM_MEMORY_ADDR_REL_SLEB
:
180 case R_WASM_MEMORY_ADDR_REL_SLEB64
:
181 case R_WASM_MEMORY_ADDR_I32
:
182 case R_WASM_MEMORY_ADDR_I64
:
183 case R_WASM_MEMORY_ADDR_TLS_SLEB
:
184 case R_WASM_MEMORY_ADDR_TLS_SLEB64
:
185 case R_WASM_MEMORY_ADDR_LOCREL_I32
: {
186 if (isa
<UndefinedData
>(sym
) || sym
->isShared() || sym
->isUndefWeak())
188 auto D
= cast
<DefinedData
>(sym
);
189 uint64_t value
= D
->getVA() + reloc
.Addend
;
190 if (reloc
.Type
== R_WASM_MEMORY_ADDR_LOCREL_I32
) {
191 const auto *segment
= cast
<InputSegment
>(chunk
);
192 uint64_t p
= segment
->outputSeg
->startVA
+ segment
->outputSegmentOffset
+
193 reloc
.Offset
- segment
->getInputSectionOffset();
198 case R_WASM_TYPE_INDEX_LEB
:
199 return typeMap
[reloc
.Index
];
200 case R_WASM_FUNCTION_INDEX_LEB
:
201 case R_WASM_FUNCTION_INDEX_I32
:
202 return getFunctionSymbol(reloc
.Index
)->getFunctionIndex();
203 case R_WASM_GLOBAL_INDEX_LEB
:
204 case R_WASM_GLOBAL_INDEX_I32
:
205 if (auto gs
= dyn_cast
<GlobalSymbol
>(sym
))
206 return gs
->getGlobalIndex();
207 return sym
->getGOTIndex();
208 case R_WASM_TAG_INDEX_LEB
:
209 return getTagSymbol(reloc
.Index
)->getTagIndex();
210 case R_WASM_FUNCTION_OFFSET_I32
:
211 case R_WASM_FUNCTION_OFFSET_I64
: {
212 if (isa
<UndefinedFunction
>(sym
)) {
213 return tombstone
? tombstone
: reloc
.Addend
;
215 auto *f
= cast
<DefinedFunction
>(sym
);
216 return f
->function
->getOffset(f
->function
->getFunctionCodeOffset() +
219 case R_WASM_SECTION_OFFSET_I32
:
220 return getSectionSymbol(reloc
.Index
)->section
->getOffset(reloc
.Addend
);
221 case R_WASM_TABLE_NUMBER_LEB
:
222 return getTableSymbol(reloc
.Index
)->getTableNumber();
224 llvm_unreachable("unknown relocation type");
229 static void setRelocs(const std::vector
<T
*> &chunks
,
230 const WasmSection
*section
) {
234 ArrayRef
<WasmRelocation
> relocs
= section
->Relocations
;
235 assert(llvm::is_sorted(
236 relocs
, [](const WasmRelocation
&r1
, const WasmRelocation
&r2
) {
237 return r1
.Offset
< r2
.Offset
;
239 assert(llvm::is_sorted(chunks
, [](InputChunk
*c1
, InputChunk
*c2
) {
240 return c1
->getInputSectionOffset() < c2
->getInputSectionOffset();
243 auto relocsNext
= relocs
.begin();
244 auto relocsEnd
= relocs
.end();
245 auto relocLess
= [](const WasmRelocation
&r
, uint32_t val
) {
246 return r
.Offset
< val
;
248 for (InputChunk
*c
: chunks
) {
249 auto relocsStart
= std::lower_bound(relocsNext
, relocsEnd
,
250 c
->getInputSectionOffset(), relocLess
);
251 relocsNext
= std::lower_bound(
252 relocsStart
, relocsEnd
, c
->getInputSectionOffset() + c
->getInputSize(),
254 c
->setRelocations(ArrayRef
<WasmRelocation
>(relocsStart
, relocsNext
));
258 // An object file can have two approaches to tables. With the reference-types
259 // feature enabled, input files that define or use tables declare the tables
260 // using symbols, and record each use with a relocation. This way when the
261 // linker combines inputs, it can collate the tables used by the inputs,
262 // assigning them distinct table numbers, and renumber all the uses as
263 // appropriate. At the same time, the linker has special logic to build the
264 // indirect function table if it is needed.
266 // However, MVP object files (those that target WebAssembly 1.0, the "minimum
267 // viable product" version of WebAssembly) neither write table symbols nor
268 // record relocations. These files can have at most one table, the indirect
269 // function table used by call_indirect and which is the address space for
270 // function pointers. If this table is present, it is always an import. If we
271 // have a file with a table import but no table symbols, it is an MVP object
272 // file. synthesizeMVPIndirectFunctionTableSymbolIfNeeded serves as a shim when
273 // loading these input files, defining the missing symbol to allow the indirect
274 // function table to be built.
276 // As indirect function table table usage in MVP objects cannot be relocated,
277 // the linker must ensure that this table gets assigned index zero.
278 void ObjFile::addLegacyIndirectFunctionTableIfNeeded(
279 uint32_t tableSymbolCount
) {
280 uint32_t tableCount
= wasmObj
->getNumImportedTables() + tables
.size();
282 // If there are symbols for all tables, then all is good.
283 if (tableCount
== tableSymbolCount
)
286 // It's possible for an input to define tables and also use the indirect
287 // function table, but forget to compile with -mattr=+reference-types.
288 // For these newer files, we require symbols for all tables, and
289 // relocations for all of their uses.
290 if (tableSymbolCount
!= 0) {
291 error(toString(this) +
292 ": expected one symbol table entry for each of the " +
293 Twine(tableCount
) + " table(s) present, but got " +
294 Twine(tableSymbolCount
) + " symbol(s) instead.");
298 // An MVP object file can have up to one table import, for the indirect
299 // function table, but will have no table definitions.
301 error(toString(this) +
302 ": unexpected table definition(s) without corresponding "
303 "symbol-table entries.");
307 // An MVP object file can have only one table import.
308 if (tableCount
!= 1) {
309 error(toString(this) +
310 ": multiple table imports, but no corresponding symbol-table "
315 const WasmImport
*tableImport
= nullptr;
316 for (const auto &import
: wasmObj
->imports()) {
317 if (import
.Kind
== WASM_EXTERNAL_TABLE
) {
318 assert(!tableImport
);
319 tableImport
= &import
;
324 // We can only synthesize a symtab entry for the indirect function table; if
325 // it has an unexpected name or type, assume that it's not actually the
326 // indirect function table.
327 if (tableImport
->Field
!= functionTableName
||
328 tableImport
->Table
.ElemType
!= ValType::FUNCREF
) {
329 error(toString(this) + ": table import " + Twine(tableImport
->Field
) +
330 " is missing a symbol table entry.");
335 info
.Name
= tableImport
->Field
;
336 info
.Kind
= WASM_SYMBOL_TYPE_TABLE
;
337 info
.ImportModule
= tableImport
->Module
;
338 info
.ImportName
= tableImport
->Field
;
339 info
.Flags
= WASM_SYMBOL_UNDEFINED
| WASM_SYMBOL_NO_STRIP
;
340 info
.ElementIndex
= 0;
341 LLVM_DEBUG(dbgs() << "Synthesizing symbol for table import: " << info
.Name
343 const WasmGlobalType
*globalType
= nullptr;
344 const WasmSignature
*signature
= nullptr;
346 make
<WasmSymbol
>(info
, globalType
, &tableImport
->Table
, signature
);
347 Symbol
*sym
= createUndefined(*wasmSym
, false);
348 // We're only sure it's a TableSymbol if the createUndefined succeeded.
351 symbols
.push_back(sym
);
352 // Because there are no TABLE_NUMBER relocs, we can't compute accurate
353 // liveness info; instead, just mark the symbol as always live.
356 // We assume that this compilation unit has unrelocatable references to
358 ctx
.legacyFunctionTable
= true;
361 static bool shouldMerge(const WasmSection
&sec
) {
362 if (config
->optimize
== 0)
364 // Sadly we don't have section attributes yet for custom sections, so we
365 // currently go by the name alone.
366 // TODO(sbc): Add ability for wasm sections to carry flags so we don't
367 // need to use names here.
368 // For now, keep in sync with uses of wasm::WASM_SEG_FLAG_STRINGS in
369 // MCObjectFileInfo::initWasmMCObjectFileInfo which creates these custom
371 return sec
.Name
== ".debug_str" || sec
.Name
== ".debug_str.dwo" ||
372 sec
.Name
== ".debug_line_str";
375 static bool shouldMerge(const WasmSegment
&seg
) {
376 // As of now we only support merging strings, and only with single byte
378 if (!(seg
.Data
.LinkingFlags
& WASM_SEG_FLAG_STRINGS
) ||
379 (seg
.Data
.Alignment
!= 0))
382 // On a regular link we don't merge sections if -O0 (default is -O1). This
383 // sometimes makes the linker significantly faster, although the output will
385 if (config
->optimize
== 0)
388 // A mergeable section with size 0 is useless because they don't have
389 // any data to merge. A mergeable string section with size 0 can be
390 // argued as invalid because it doesn't end with a null character.
391 // We'll avoid a mess by handling them as if they were non-mergeable.
392 if (seg
.Data
.Content
.size() == 0)
398 void ObjFile::parseLazy() {
399 LLVM_DEBUG(dbgs() << "ObjFile::parseLazy: " << toString(this) << " "
400 << wasmObj
.get() << "\n");
401 for (const SymbolRef
&sym
: wasmObj
->symbols()) {
402 const WasmSymbol
&wasmSym
= wasmObj
->getWasmSymbol(sym
.getRawDataRefImpl());
403 if (wasmSym
.isUndefined() || wasmSym
.isBindingLocal())
405 symtab
->addLazy(wasmSym
.Info
.Name
, this);
406 // addLazy() may trigger this->extract() if an existing symbol is an
407 // undefined symbol. If that happens, this function has served its purpose,
408 // and we can exit from the loop early.
414 ObjFile::ObjFile(MemoryBufferRef m
, StringRef archiveName
, bool lazy
)
415 : WasmFileBase(ObjectKind
, m
) {
417 this->archiveName
= std::string(archiveName
);
419 // Currently we only do this check for regular object file, and not for shared
420 // object files. This is because architecture detection for shared objects is
421 // currently based on a heuristic, which is fallable:
422 // https://github.com/llvm/llvm-project/issues/98778
423 checkArch(wasmObj
->getArch());
425 // If this isn't part of an archive, it's eagerly linked, so mark it live.
426 if (archiveName
.empty())
430 void SharedFile::parse() {
431 assert(wasmObj
->isSharedObject());
433 for (const SymbolRef
&sym
: wasmObj
->symbols()) {
434 const WasmSymbol
&wasmSym
= wasmObj
->getWasmSymbol(sym
.getRawDataRefImpl());
435 if (wasmSym
.isDefined()) {
436 StringRef name
= wasmSym
.Info
.Name
;
437 // Certain shared library exports are known to be DSO-local so we
438 // don't want to add them to the symbol table.
439 // TODO(sbc): Instead of hardcoding these here perhaps we could add
440 // this as extra metadata in the `dylink` section.
441 if (name
== "__wasm_apply_data_relocs" || name
== "__wasm_call_ctors" ||
442 name
.starts_with("__start_") || name
.starts_with("__stop_"))
444 uint32_t flags
= wasmSym
.Info
.Flags
;
446 LLVM_DEBUG(dbgs() << "shared symbol: " << name
<< "\n");
447 switch (wasmSym
.Info
.Kind
) {
448 case WASM_SYMBOL_TYPE_FUNCTION
:
449 s
= symtab
->addSharedFunction(name
, flags
, this, wasmSym
.Signature
);
451 case WASM_SYMBOL_TYPE_DATA
:
452 s
= symtab
->addSharedData(name
, flags
, this);
457 symbols
.push_back(s
);
462 // Returns the alignment for a custom section. This is used to concatenate
463 // custom sections with the same name into a single custom section.
464 static uint32_t getCustomSectionAlignment(const WasmSection
&sec
) {
465 // TODO: Add a section attribute for alignment in the linking spec.
466 if (sec
.Name
== getInstrProfSectionName(IPSK_covfun
, Triple::Wasm
) ||
467 sec
.Name
== getInstrProfSectionName(IPSK_covmap
, Triple::Wasm
)) {
468 // llvm-cov assumes that coverage metadata sections are 8-byte aligned.
474 WasmFileBase::WasmFileBase(Kind k
, MemoryBufferRef m
) : InputFile(k
, m
) {
475 // Parse a memory buffer as a wasm file.
476 LLVM_DEBUG(dbgs() << "Reading object: " << toString(this) << "\n");
477 std::unique_ptr
<Binary
> bin
= CHECK(createBinary(mb
), toString(this));
479 auto *obj
= dyn_cast
<WasmObjectFile
>(bin
.get());
481 fatal(toString(this) + ": not a wasm file");
487 void ObjFile::parse(bool ignoreComdats
) {
488 // Parse a memory buffer as a wasm file.
489 LLVM_DEBUG(dbgs() << "ObjFile::parse: " << toString(this) << "\n");
491 if (!wasmObj
->isRelocatableObject())
492 fatal(toString(this) + ": not a relocatable wasm file");
494 // Build up a map of function indices to table indices for use when
495 // verifying the existing table index relocations
496 uint32_t totalFunctions
=
497 wasmObj
->getNumImportedFunctions() + wasmObj
->functions().size();
498 tableEntriesRel
.resize(totalFunctions
);
499 tableEntries
.resize(totalFunctions
);
500 for (const WasmElemSegment
&seg
: wasmObj
->elements()) {
502 if (seg
.Offset
.Extended
)
503 fatal(toString(this) + ": extended init exprs not supported");
504 else if (seg
.Offset
.Inst
.Opcode
== WASM_OPCODE_I32_CONST
)
505 offset
= seg
.Offset
.Inst
.Value
.Int32
;
506 else if (seg
.Offset
.Inst
.Opcode
== WASM_OPCODE_I64_CONST
)
507 offset
= seg
.Offset
.Inst
.Value
.Int64
;
509 fatal(toString(this) + ": invalid table elements");
510 for (size_t index
= 0; index
< seg
.Functions
.size(); index
++) {
511 auto functionIndex
= seg
.Functions
[index
];
512 tableEntriesRel
[functionIndex
] = index
;
513 tableEntries
[functionIndex
] = offset
+ index
;
517 ArrayRef
<StringRef
> comdats
= wasmObj
->linkingData().Comdats
;
518 for (StringRef comdat
: comdats
) {
519 bool isNew
= ignoreComdats
|| symtab
->addComdat(comdat
);
520 keptComdats
.push_back(isNew
);
523 uint32_t sectionIndex
= 0;
525 // Bool for each symbol, true if called directly. This allows us to implement
526 // a weaker form of signature checking where undefined functions that are not
527 // called directly (i.e. only address taken) don't have to match the defined
528 // function's signature. We cannot do this for directly called functions
529 // because those signatures are checked at validation times.
530 // See https://github.com/llvm/llvm-project/issues/39758
531 std::vector
<bool> isCalledDirectly(wasmObj
->getNumberOfSymbols(), false);
532 for (const SectionRef
&sec
: wasmObj
->sections()) {
533 const WasmSection
§ion
= wasmObj
->getWasmSection(sec
);
534 // Wasm objects can have at most one code and one data section.
535 if (section
.Type
== WASM_SEC_CODE
) {
536 assert(!codeSection
);
537 codeSection
= §ion
;
538 } else if (section
.Type
== WASM_SEC_DATA
) {
539 assert(!dataSection
);
540 dataSection
= §ion
;
541 } else if (section
.Type
== WASM_SEC_CUSTOM
) {
542 InputChunk
*customSec
;
543 uint32_t alignment
= getCustomSectionAlignment(section
);
544 if (shouldMerge(section
))
545 customSec
= make
<MergeInputChunk
>(section
, this, alignment
);
547 customSec
= make
<InputSection
>(section
, this, alignment
);
548 customSec
->discarded
= isExcludedByComdat(customSec
);
549 customSections
.emplace_back(customSec
);
550 customSections
.back()->setRelocations(section
.Relocations
);
551 customSectionsByIndex
[sectionIndex
] = customSections
.back();
554 // Scans relocations to determine if a function symbol is called directly.
555 for (const WasmRelocation
&reloc
: section
.Relocations
)
556 if (reloc
.Type
== R_WASM_FUNCTION_INDEX_LEB
)
557 isCalledDirectly
[reloc
.Index
] = true;
560 typeMap
.resize(getWasmObj()->types().size());
561 typeIsUsed
.resize(getWasmObj()->types().size(), false);
564 // Populate `Segments`.
565 for (const WasmSegment
&s
: wasmObj
->dataSegments()) {
568 seg
= make
<MergeInputChunk
>(s
, this);
570 seg
= make
<InputSegment
>(s
, this);
571 seg
->discarded
= isExcludedByComdat(seg
);
572 // Older object files did not include WASM_SEG_FLAG_TLS and instead
573 // relied on the naming convention. To maintain compat with such objects
574 // we still imply the TLS flag based on the name of the segment.
576 (seg
->name
.starts_with(".tdata") || seg
->name
.starts_with(".tbss")))
577 seg
->flags
|= WASM_SEG_FLAG_TLS
;
578 segments
.emplace_back(seg
);
580 setRelocs(segments
, dataSection
);
582 // Populate `Functions`.
583 ArrayRef
<WasmFunction
> funcs
= wasmObj
->functions();
584 ArrayRef
<WasmSignature
> types
= wasmObj
->types();
585 functions
.reserve(funcs
.size());
587 for (auto &f
: funcs
) {
588 auto *func
= make
<InputFunction
>(types
[f
.SigIndex
], &f
, this);
589 func
->discarded
= isExcludedByComdat(func
);
590 functions
.emplace_back(func
);
592 setRelocs(functions
, codeSection
);
594 // Populate `Tables`.
595 for (const WasmTable
&t
: wasmObj
->tables())
596 tables
.emplace_back(make
<InputTable
>(t
, this));
598 // Populate `Globals`.
599 for (const WasmGlobal
&g
: wasmObj
->globals())
600 globals
.emplace_back(make
<InputGlobal
>(g
, this));
603 for (const WasmTag
&t
: wasmObj
->tags())
604 tags
.emplace_back(make
<InputTag
>(types
[t
.SigIndex
], t
, this));
606 // Populate `Symbols` based on the symbols in the object.
607 symbols
.reserve(wasmObj
->getNumberOfSymbols());
608 uint32_t tableSymbolCount
= 0;
609 for (const SymbolRef
&sym
: wasmObj
->symbols()) {
610 const WasmSymbol
&wasmSym
= wasmObj
->getWasmSymbol(sym
.getRawDataRefImpl());
611 if (wasmSym
.isTypeTable())
613 if (wasmSym
.isDefined()) {
614 // createDefined may fail if the symbol is comdat excluded in which case
615 // we fall back to creating an undefined symbol
616 if (Symbol
*d
= createDefined(wasmSym
)) {
617 symbols
.push_back(d
);
621 size_t idx
= symbols
.size();
622 symbols
.push_back(createUndefined(wasmSym
, isCalledDirectly
[idx
]));
625 addLegacyIndirectFunctionTableIfNeeded(tableSymbolCount
);
628 bool ObjFile::isExcludedByComdat(const InputChunk
*chunk
) const {
629 uint32_t c
= chunk
->getComdat();
632 return !keptComdats
[c
];
635 FunctionSymbol
*ObjFile::getFunctionSymbol(uint32_t index
) const {
636 return cast
<FunctionSymbol
>(symbols
[index
]);
639 GlobalSymbol
*ObjFile::getGlobalSymbol(uint32_t index
) const {
640 return cast
<GlobalSymbol
>(symbols
[index
]);
643 TagSymbol
*ObjFile::getTagSymbol(uint32_t index
) const {
644 return cast
<TagSymbol
>(symbols
[index
]);
647 TableSymbol
*ObjFile::getTableSymbol(uint32_t index
) const {
648 return cast
<TableSymbol
>(symbols
[index
]);
651 SectionSymbol
*ObjFile::getSectionSymbol(uint32_t index
) const {
652 return cast
<SectionSymbol
>(symbols
[index
]);
655 DataSymbol
*ObjFile::getDataSymbol(uint32_t index
) const {
656 return cast
<DataSymbol
>(symbols
[index
]);
659 Symbol
*ObjFile::createDefined(const WasmSymbol
&sym
) {
660 StringRef name
= sym
.Info
.Name
;
661 uint32_t flags
= sym
.Info
.Flags
;
663 switch (sym
.Info
.Kind
) {
664 case WASM_SYMBOL_TYPE_FUNCTION
: {
665 InputFunction
*func
=
666 functions
[sym
.Info
.ElementIndex
- wasmObj
->getNumImportedFunctions()];
667 if (sym
.isBindingLocal())
668 return make
<DefinedFunction
>(name
, flags
, this, func
);
671 return symtab
->addDefinedFunction(name
, flags
, this, func
);
673 case WASM_SYMBOL_TYPE_DATA
: {
674 InputChunk
*seg
= segments
[sym
.Info
.DataRef
.Segment
];
675 auto offset
= sym
.Info
.DataRef
.Offset
;
676 auto size
= sym
.Info
.DataRef
.Size
;
677 // Support older (e.g. llvm 13) object files that pre-date the per-symbol
678 // TLS flag, and symbols were assumed to be TLS by being defined in a TLS
680 if (!(flags
& WASM_SYMBOL_TLS
) && seg
->isTLS())
681 flags
|= WASM_SYMBOL_TLS
;
682 if (sym
.isBindingLocal())
683 return make
<DefinedData
>(name
, flags
, this, seg
, offset
, size
);
686 return symtab
->addDefinedData(name
, flags
, this, seg
, offset
, size
);
688 case WASM_SYMBOL_TYPE_GLOBAL
: {
689 InputGlobal
*global
=
690 globals
[sym
.Info
.ElementIndex
- wasmObj
->getNumImportedGlobals()];
691 if (sym
.isBindingLocal())
692 return make
<DefinedGlobal
>(name
, flags
, this, global
);
693 return symtab
->addDefinedGlobal(name
, flags
, this, global
);
695 case WASM_SYMBOL_TYPE_SECTION
: {
696 InputChunk
*section
= customSectionsByIndex
[sym
.Info
.ElementIndex
];
697 assert(sym
.isBindingLocal());
698 // Need to return null if discarded here? data and func only do that when
699 // binding is not local.
700 if (section
->discarded
)
702 return make
<SectionSymbol
>(flags
, section
, this);
704 case WASM_SYMBOL_TYPE_TAG
: {
705 InputTag
*tag
= tags
[sym
.Info
.ElementIndex
- wasmObj
->getNumImportedTags()];
706 if (sym
.isBindingLocal())
707 return make
<DefinedTag
>(name
, flags
, this, tag
);
708 return symtab
->addDefinedTag(name
, flags
, this, tag
);
710 case WASM_SYMBOL_TYPE_TABLE
: {
712 tables
[sym
.Info
.ElementIndex
- wasmObj
->getNumImportedTables()];
713 if (sym
.isBindingLocal())
714 return make
<DefinedTable
>(name
, flags
, this, table
);
715 return symtab
->addDefinedTable(name
, flags
, this, table
);
718 llvm_unreachable("unknown symbol kind");
721 Symbol
*ObjFile::createUndefined(const WasmSymbol
&sym
, bool isCalledDirectly
) {
722 StringRef name
= sym
.Info
.Name
;
723 uint32_t flags
= sym
.Info
.Flags
| WASM_SYMBOL_UNDEFINED
;
725 switch (sym
.Info
.Kind
) {
726 case WASM_SYMBOL_TYPE_FUNCTION
:
727 if (sym
.isBindingLocal())
728 return make
<UndefinedFunction
>(name
, sym
.Info
.ImportName
,
729 sym
.Info
.ImportModule
, flags
, this,
730 sym
.Signature
, isCalledDirectly
);
731 return symtab
->addUndefinedFunction(name
, sym
.Info
.ImportName
,
732 sym
.Info
.ImportModule
, flags
, this,
733 sym
.Signature
, isCalledDirectly
);
734 case WASM_SYMBOL_TYPE_DATA
:
735 if (sym
.isBindingLocal())
736 return make
<UndefinedData
>(name
, flags
, this);
737 return symtab
->addUndefinedData(name
, flags
, this);
738 case WASM_SYMBOL_TYPE_GLOBAL
:
739 if (sym
.isBindingLocal())
740 return make
<UndefinedGlobal
>(name
, sym
.Info
.ImportName
,
741 sym
.Info
.ImportModule
, flags
, this,
743 return symtab
->addUndefinedGlobal(name
, sym
.Info
.ImportName
,
744 sym
.Info
.ImportModule
, flags
, this,
746 case WASM_SYMBOL_TYPE_TABLE
:
747 if (sym
.isBindingLocal())
748 return make
<UndefinedTable
>(name
, sym
.Info
.ImportName
,
749 sym
.Info
.ImportModule
, flags
, this,
751 return symtab
->addUndefinedTable(name
, sym
.Info
.ImportName
,
752 sym
.Info
.ImportModule
, flags
, this,
754 case WASM_SYMBOL_TYPE_TAG
:
755 if (sym
.isBindingLocal())
756 return make
<UndefinedTag
>(name
, sym
.Info
.ImportName
,
757 sym
.Info
.ImportModule
, flags
, this,
759 return symtab
->addUndefinedTag(name
, sym
.Info
.ImportName
,
760 sym
.Info
.ImportModule
, flags
, this,
762 case WASM_SYMBOL_TYPE_SECTION
:
763 llvm_unreachable("section symbols cannot be undefined");
765 llvm_unreachable("unknown symbol kind");
768 static StringRef
strip(StringRef s
) { return s
.trim(' '); }
770 void StubFile::parse() {
773 SmallVector
<StringRef
> lines
;
774 mb
.getBuffer().split(lines
, '\n');
775 for (StringRef line
: lines
) {
778 // File must begin with #STUB
780 assert(line
== "#STUB");
784 // Lines starting with # are considered comments
785 if (line
.starts_with("#") || !line
.size())
790 std::tie(sym
, rest
) = line
.split(':');
794 symbolDependencies
[sym
] = {};
796 while (rest
.size()) {
798 std::tie(dep
, rest
) = rest
.split(',');
800 symbolDependencies
[sym
].push_back(dep
);
805 static uint8_t mapVisibility(GlobalValue::VisibilityTypes gvVisibility
) {
806 switch (gvVisibility
) {
807 case GlobalValue::DefaultVisibility
:
808 return WASM_SYMBOL_VISIBILITY_DEFAULT
;
809 case GlobalValue::HiddenVisibility
:
810 case GlobalValue::ProtectedVisibility
:
811 return WASM_SYMBOL_VISIBILITY_HIDDEN
;
813 llvm_unreachable("unknown visibility");
816 static Symbol
*createBitcodeSymbol(const std::vector
<bool> &keptComdats
,
817 const lto::InputFile::Symbol
&objSym
,
819 StringRef name
= saver().save(objSym
.getName());
821 uint32_t flags
= objSym
.isWeak() ? WASM_SYMBOL_BINDING_WEAK
: 0;
822 flags
|= mapVisibility(objSym
.getVisibility());
824 int c
= objSym
.getComdatIndex();
825 bool excludedByComdat
= c
!= -1 && !keptComdats
[c
];
827 if (objSym
.isUndefined() || excludedByComdat
) {
828 flags
|= WASM_SYMBOL_UNDEFINED
;
829 if (objSym
.isExecutable())
830 return symtab
->addUndefinedFunction(name
, std::nullopt
, std::nullopt
,
831 flags
, &f
, nullptr, true);
832 return symtab
->addUndefinedData(name
, flags
, &f
);
835 if (objSym
.isExecutable())
836 return symtab
->addDefinedFunction(name
, flags
, &f
, nullptr);
837 return symtab
->addDefinedData(name
, flags
, &f
, nullptr, 0, 0);
840 BitcodeFile::BitcodeFile(MemoryBufferRef m
, StringRef archiveName
,
841 uint64_t offsetInArchive
, bool lazy
)
842 : InputFile(BitcodeKind
, m
) {
844 this->archiveName
= std::string(archiveName
);
846 std::string path
= mb
.getBufferIdentifier().str();
847 if (config
->thinLTOIndexOnly
)
848 path
= replaceThinLTOSuffix(mb
.getBufferIdentifier());
850 // ThinLTO assumes that all MemoryBufferRefs given to it have a unique
851 // name. If two archives define two members with the same name, this
852 // causes a collision which result in only one of the objects being taken
853 // into consideration at LTO time (which very likely causes undefined
854 // symbols later in the link stage). So we append file offset to make
856 StringRef name
= archiveName
.empty()
858 : saver().save(archiveName
+ "(" + path::filename(path
) +
859 " at " + utostr(offsetInArchive
) + ")");
860 MemoryBufferRef
mbref(mb
.getBuffer(), name
);
862 obj
= check(lto::InputFile::create(mbref
));
864 // If this isn't part of an archive, it's eagerly linked, so mark it live.
865 if (archiveName
.empty())
869 bool BitcodeFile::doneLTO
= false;
871 void BitcodeFile::parseLazy() {
872 for (auto [i
, irSym
] : llvm::enumerate(obj
->symbols())) {
873 if (irSym
.isUndefined())
875 StringRef name
= saver().save(irSym
.getName());
876 symtab
->addLazy(name
, this);
877 // addLazy() may trigger this->extract() if an existing symbol is an
878 // undefined symbol. If that happens, this function has served its purpose,
879 // and we can exit from the loop early.
885 void BitcodeFile::parse(StringRef symName
) {
887 error(toString(this) + ": attempt to add bitcode file after LTO (" + symName
+ ")");
891 Triple
t(obj
->getTargetTriple());
893 error(toString(this) + ": machine type must be wasm32 or wasm64");
896 checkArch(t
.getArch());
897 std::vector
<bool> keptComdats
;
898 // TODO Support nodeduplicate
899 // https://github.com/llvm/llvm-project/issues/49875
900 for (std::pair
<StringRef
, Comdat::SelectionKind
> s
: obj
->getComdatTable())
901 keptComdats
.push_back(symtab
->addComdat(s
.first
));
903 for (const lto::InputFile::Symbol
&objSym
: obj
->symbols())
904 symbols
.push_back(createBitcodeSymbol(keptComdats
, objSym
, *this));