1 //===- InputChunks.cpp ----------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "InputChunks.h"
11 #include "OutputSegment.h"
12 #include "WriterUtils.h"
13 #include "lld/Common/ErrorHandler.h"
14 #include "lld/Common/LLVM.h"
15 #include "llvm/Support/LEB128.h"
16 #include "llvm/Support/xxhash.h"
18 #define DEBUG_TYPE "lld"
21 using namespace llvm::wasm
;
22 using namespace llvm::support::endian
;
25 StringRef
relocTypeToString(uint8_t relocType
) {
27 #define WASM_RELOC(NAME, REL) \
30 #include "llvm/BinaryFormat/WasmRelocs.def"
33 llvm_unreachable("unknown reloc type");
36 bool relocIs64(uint8_t relocType
) {
38 case R_WASM_MEMORY_ADDR_LEB64
:
39 case R_WASM_MEMORY_ADDR_SLEB64
:
40 case R_WASM_MEMORY_ADDR_REL_SLEB64
:
41 case R_WASM_MEMORY_ADDR_I64
:
42 case R_WASM_TABLE_INDEX_SLEB64
:
43 case R_WASM_TABLE_INDEX_I64
:
44 case R_WASM_FUNCTION_OFFSET_I64
:
45 case R_WASM_TABLE_INDEX_REL_SLEB64
:
46 case R_WASM_MEMORY_ADDR_TLS_SLEB64
:
53 std::string
toString(const wasm::InputChunk
*c
) {
54 return (toString(c
->file
) + ":(" + c
->name
+ ")").str();
58 StringRef
InputChunk::getComdatName() const {
59 uint32_t index
= getComdat();
60 if (index
== UINT32_MAX
)
62 return file
->getWasmObj()->linkingData().Comdats
[index
];
65 uint32_t InputChunk::getSize() const {
66 if (const auto *ms
= dyn_cast
<SyntheticMergedChunk
>(this))
67 return ms
->builder
.getSize();
69 if (const auto *f
= dyn_cast
<InputFunction
>(this)) {
70 if (config
->compressRelocations
&& f
->file
) {
71 return f
->getCompressedSize();
78 uint32_t InputChunk::getInputSize() const {
79 if (const auto *f
= dyn_cast
<InputFunction
>(this))
80 return f
->function
->Size
;
84 // Copy this input chunk to an mmap'ed output file and apply relocations.
85 void InputChunk::writeTo(uint8_t *buf
) const {
86 if (const auto *f
= dyn_cast
<InputFunction
>(this)) {
87 if (file
&& config
->compressRelocations
)
88 return f
->writeCompressed(buf
);
89 } else if (const auto *ms
= dyn_cast
<SyntheticMergedChunk
>(this)) {
90 ms
->builder
.write(buf
+ outSecOff
);
92 ms
->relocate(buf
+ outSecOff
);
97 memcpy(buf
+ outSecOff
, data().data(), data().size());
100 relocate(buf
+ outSecOff
);
103 void InputChunk::relocate(uint8_t *buf
) const {
104 if (relocations
.empty())
107 LLVM_DEBUG(dbgs() << "applying relocations: " << toString(this)
108 << " count=" << relocations
.size() << "\n");
109 int32_t inputSectionOffset
= getInputSectionOffset();
110 uint64_t tombstone
= getTombstone();
112 for (const WasmRelocation
&rel
: relocations
) {
113 uint8_t *loc
= buf
+ rel
.Offset
- inputSectionOffset
;
114 LLVM_DEBUG(dbgs() << "apply reloc: type=" << relocTypeToString(rel
.Type
));
115 if (rel
.Type
!= R_WASM_TYPE_INDEX_LEB
)
116 LLVM_DEBUG(dbgs() << " sym=" << file
->getSymbols()[rel
.Index
]->getName());
117 LLVM_DEBUG(dbgs() << " addend=" << rel
.Addend
<< " index=" << rel
.Index
118 << " offset=" << rel
.Offset
<< "\n");
119 // TODO(sbc): Check that the value is within the range of the
120 // relocation type below. Most likely we must error out here
121 // if its not with range.
122 uint64_t value
= file
->calcNewValue(rel
, tombstone
, this);
125 case R_WASM_TYPE_INDEX_LEB
:
126 case R_WASM_FUNCTION_INDEX_LEB
:
127 case R_WASM_GLOBAL_INDEX_LEB
:
128 case R_WASM_TAG_INDEX_LEB
:
129 case R_WASM_MEMORY_ADDR_LEB
:
130 case R_WASM_TABLE_NUMBER_LEB
:
131 encodeULEB128(static_cast<uint32_t>(value
), loc
, 5);
133 case R_WASM_MEMORY_ADDR_LEB64
:
134 encodeULEB128(value
, loc
, 10);
136 case R_WASM_TABLE_INDEX_SLEB
:
137 case R_WASM_TABLE_INDEX_REL_SLEB
:
138 case R_WASM_MEMORY_ADDR_SLEB
:
139 case R_WASM_MEMORY_ADDR_REL_SLEB
:
140 case R_WASM_MEMORY_ADDR_TLS_SLEB
:
141 encodeSLEB128(static_cast<int32_t>(value
), loc
, 5);
143 case R_WASM_TABLE_INDEX_SLEB64
:
144 case R_WASM_TABLE_INDEX_REL_SLEB64
:
145 case R_WASM_MEMORY_ADDR_SLEB64
:
146 case R_WASM_MEMORY_ADDR_REL_SLEB64
:
147 case R_WASM_MEMORY_ADDR_TLS_SLEB64
:
148 encodeSLEB128(static_cast<int64_t>(value
), loc
, 10);
150 case R_WASM_TABLE_INDEX_I32
:
151 case R_WASM_MEMORY_ADDR_I32
:
152 case R_WASM_FUNCTION_OFFSET_I32
:
153 case R_WASM_FUNCTION_INDEX_I32
:
154 case R_WASM_SECTION_OFFSET_I32
:
155 case R_WASM_GLOBAL_INDEX_I32
:
156 case R_WASM_MEMORY_ADDR_LOCREL_I32
:
157 write32le(loc
, value
);
159 case R_WASM_TABLE_INDEX_I64
:
160 case R_WASM_MEMORY_ADDR_I64
:
161 case R_WASM_FUNCTION_OFFSET_I64
:
162 write64le(loc
, value
);
165 llvm_unreachable("unknown relocation type");
170 // Copy relocation entries to a given output stream.
171 // This function is used only when a user passes "-r". For a regular link,
172 // we consume relocations instead of copying them to an output file.
173 void InputChunk::writeRelocations(raw_ostream
&os
) const {
174 if (relocations
.empty())
177 int32_t off
= outSecOff
- getInputSectionOffset();
178 LLVM_DEBUG(dbgs() << "writeRelocations: " << file
->getName()
179 << " offset=" << Twine(off
) << "\n");
181 for (const WasmRelocation
&rel
: relocations
) {
182 writeUleb128(os
, rel
.Type
, "reloc type");
183 writeUleb128(os
, rel
.Offset
+ off
, "reloc offset");
184 writeUleb128(os
, file
->calcNewIndex(rel
), "reloc index");
186 if (relocTypeHasAddend(rel
.Type
))
187 writeSleb128(os
, file
->calcNewAddend(rel
), "reloc addend");
191 uint64_t InputChunk::getTombstone() const {
192 if (const auto *s
= dyn_cast
<InputSection
>(this)) {
193 return s
->tombstoneValue
;
199 void InputFunction::setFunctionIndex(uint32_t index
) {
200 LLVM_DEBUG(dbgs() << "InputFunction::setFunctionIndex: " << name
<< " -> "
202 assert(!hasFunctionIndex());
203 functionIndex
= index
;
206 void InputFunction::setTableIndex(uint32_t index
) {
207 LLVM_DEBUG(dbgs() << "InputFunction::setTableIndex: " << name
<< " -> "
209 assert(!hasTableIndex());
213 // Write a relocation value without padding and return the number of bytes
215 static unsigned writeCompressedReloc(uint8_t *buf
, const WasmRelocation
&rel
,
218 case R_WASM_TYPE_INDEX_LEB
:
219 case R_WASM_FUNCTION_INDEX_LEB
:
220 case R_WASM_GLOBAL_INDEX_LEB
:
221 case R_WASM_TAG_INDEX_LEB
:
222 case R_WASM_MEMORY_ADDR_LEB
:
223 case R_WASM_MEMORY_ADDR_LEB64
:
224 case R_WASM_TABLE_NUMBER_LEB
:
225 return encodeULEB128(value
, buf
);
226 case R_WASM_TABLE_INDEX_SLEB
:
227 case R_WASM_TABLE_INDEX_SLEB64
:
228 case R_WASM_MEMORY_ADDR_SLEB
:
229 case R_WASM_MEMORY_ADDR_SLEB64
:
230 return encodeSLEB128(static_cast<int64_t>(value
), buf
);
232 llvm_unreachable("unexpected relocation type");
236 static unsigned getRelocWidthPadded(const WasmRelocation
&rel
) {
238 case R_WASM_TYPE_INDEX_LEB
:
239 case R_WASM_FUNCTION_INDEX_LEB
:
240 case R_WASM_GLOBAL_INDEX_LEB
:
241 case R_WASM_TAG_INDEX_LEB
:
242 case R_WASM_MEMORY_ADDR_LEB
:
243 case R_WASM_TABLE_NUMBER_LEB
:
244 case R_WASM_TABLE_INDEX_SLEB
:
245 case R_WASM_MEMORY_ADDR_SLEB
:
247 case R_WASM_TABLE_INDEX_SLEB64
:
248 case R_WASM_MEMORY_ADDR_LEB64
:
249 case R_WASM_MEMORY_ADDR_SLEB64
:
252 llvm_unreachable("unexpected relocation type");
256 static unsigned getRelocWidth(const WasmRelocation
&rel
, uint64_t value
) {
258 return writeCompressedReloc(buf
, rel
, value
);
261 // Relocations of type LEB and SLEB in the code section are padded to 5 bytes
262 // so that a fast linker can blindly overwrite them without needing to worry
263 // about the number of bytes needed to encode the values.
264 // However, for optimal output the code section can be compressed to remove
265 // the padding then outputting non-relocatable files.
266 // In this case we need to perform a size calculation based on the value at each
267 // relocation. At best we end up saving 4 bytes for each relocation entry.
269 // This function only computes the final output size. It must be called
270 // before getSize() is used to calculate of layout of the code section.
271 void InputFunction::calculateSize() {
272 if (!file
|| !config
->compressRelocations
)
275 LLVM_DEBUG(dbgs() << "calculateSize: " << name
<< "\n");
277 const uint8_t *secStart
= file
->codeSection
->Content
.data();
278 const uint8_t *funcStart
= secStart
+ getInputSectionOffset();
279 uint32_t functionSizeLength
;
280 decodeULEB128(funcStart
, &functionSizeLength
);
282 uint32_t start
= getInputSectionOffset();
283 uint32_t end
= start
+ function
->Size
;
285 uint64_t tombstone
= getTombstone();
287 uint32_t lastRelocEnd
= start
+ functionSizeLength
;
288 for (const WasmRelocation
&rel
: relocations
) {
289 LLVM_DEBUG(dbgs() << " region: " << (rel
.Offset
- lastRelocEnd
) << "\n");
290 compressedFuncSize
+= rel
.Offset
- lastRelocEnd
;
291 compressedFuncSize
+=
292 getRelocWidth(rel
, file
->calcNewValue(rel
, tombstone
, this));
293 lastRelocEnd
= rel
.Offset
+ getRelocWidthPadded(rel
);
295 LLVM_DEBUG(dbgs() << " final region: " << (end
- lastRelocEnd
) << "\n");
296 compressedFuncSize
+= end
- lastRelocEnd
;
298 // Now we know how long the resulting function is we can add the encoding
301 compressedSize
= compressedFuncSize
+ encodeULEB128(compressedFuncSize
, buf
);
303 LLVM_DEBUG(dbgs() << " calculateSize orig: " << function
->Size
<< "\n");
304 LLVM_DEBUG(dbgs() << " calculateSize new: " << compressedSize
<< "\n");
307 // Override the default writeTo method so that we can (optionally) write the
308 // compressed version of the function.
309 void InputFunction::writeCompressed(uint8_t *buf
) const {
314 const uint8_t *secStart
= file
->codeSection
->Content
.data();
315 const uint8_t *funcStart
= secStart
+ getInputSectionOffset();
316 const uint8_t *end
= funcStart
+ function
->Size
;
317 uint64_t tombstone
= getTombstone();
319 decodeULEB128(funcStart
, &count
);
322 LLVM_DEBUG(dbgs() << "write func: " << name
<< "\n");
323 buf
+= encodeULEB128(compressedFuncSize
, buf
);
324 const uint8_t *lastRelocEnd
= funcStart
;
325 for (const WasmRelocation
&rel
: relocations
) {
326 unsigned chunkSize
= (secStart
+ rel
.Offset
) - lastRelocEnd
;
327 LLVM_DEBUG(dbgs() << " write chunk: " << chunkSize
<< "\n");
328 memcpy(buf
, lastRelocEnd
, chunkSize
);
330 buf
+= writeCompressedReloc(buf
, rel
,
331 file
->calcNewValue(rel
, tombstone
, this));
332 lastRelocEnd
= secStart
+ rel
.Offset
+ getRelocWidthPadded(rel
);
335 unsigned chunkSize
= end
- lastRelocEnd
;
336 LLVM_DEBUG(dbgs() << " write final chunk: " << chunkSize
<< "\n");
337 memcpy(buf
, lastRelocEnd
, chunkSize
);
338 LLVM_DEBUG(dbgs() << " total: " << (buf
+ chunkSize
- orig
) << "\n");
341 uint64_t InputChunk::getChunkOffset(uint64_t offset
) const {
342 if (const auto *ms
= dyn_cast
<MergeInputChunk
>(this)) {
343 LLVM_DEBUG(dbgs() << "getChunkOffset(merged): " << name
<< "\n");
344 LLVM_DEBUG(dbgs() << "offset: " << offset
<< "\n");
345 LLVM_DEBUG(dbgs() << "parentOffset: " << ms
->getParentOffset(offset
)
348 return ms
->parent
->getChunkOffset(ms
->getParentOffset(offset
));
350 return outputSegmentOffset
+ offset
;
353 uint64_t InputChunk::getOffset(uint64_t offset
) const {
354 return outSecOff
+ getChunkOffset(offset
);
357 uint64_t InputChunk::getVA(uint64_t offset
) const {
358 return (outputSeg
? outputSeg
->startVA
: 0) + getChunkOffset(offset
);
361 // Generate code to apply relocations to the data section at runtime.
362 // This is only called when generating shared libraries (PIC) where address are
363 // not known at static link time.
364 bool InputChunk::generateRelocationCode(raw_ostream
&os
) const {
365 LLVM_DEBUG(dbgs() << "generating runtime relocations: " << name
366 << " count=" << relocations
.size() << "\n");
368 bool is64
= config
->is64
.value_or(false);
369 bool generated
= false;
370 unsigned opcode_ptr_const
= is64
? WASM_OPCODE_I64_CONST
371 : WASM_OPCODE_I32_CONST
;
372 unsigned opcode_ptr_add
= is64
? WASM_OPCODE_I64_ADD
373 : WASM_OPCODE_I32_ADD
;
375 uint64_t tombstone
= getTombstone();
376 // TODO(sbc): Encode the relocations in the data section and write a loop
377 // here to apply them.
378 for (const WasmRelocation
&rel
: relocations
) {
379 uint64_t offset
= getVA(rel
.Offset
) - getInputSectionOffset();
381 Symbol
*sym
= file
->getSymbol(rel
);
382 // Runtime relocations are needed when we don't know the address of
383 // a symbol statically.
384 bool requiresRuntimeReloc
= ctx
.isPic
|| sym
->hasGOTIndex();
385 if (!requiresRuntimeReloc
)
388 LLVM_DEBUG(dbgs() << "gen reloc: type=" << relocTypeToString(rel
.Type
)
389 << " addend=" << rel
.Addend
<< " index=" << rel
.Index
390 << " output offset=" << offset
<< "\n");
392 // Calculate the address at which to apply the relocation
393 writeU8(os
, opcode_ptr_const
, "CONST");
394 writeSleb128(os
, offset
, "offset");
396 // In PIC mode we need to add the __memory_base
398 writeU8(os
, WASM_OPCODE_GLOBAL_GET
, "GLOBAL_GET");
400 writeUleb128(os
, WasmSym::tlsBase
->getGlobalIndex(), "tls_base");
402 writeUleb128(os
, WasmSym::memoryBase
->getGlobalIndex(), "memory_base");
403 writeU8(os
, opcode_ptr_add
, "ADD");
406 // Now figure out what we want to store at this location
407 bool is64
= relocIs64(rel
.Type
);
408 unsigned opcode_reloc_const
=
409 is64
? WASM_OPCODE_I64_CONST
: WASM_OPCODE_I32_CONST
;
410 unsigned opcode_reloc_add
=
411 is64
? WASM_OPCODE_I64_ADD
: WASM_OPCODE_I32_ADD
;
412 unsigned opcode_reloc_store
=
413 is64
? WASM_OPCODE_I64_STORE
: WASM_OPCODE_I32_STORE
;
415 if (sym
->hasGOTIndex()) {
416 writeU8(os
, WASM_OPCODE_GLOBAL_GET
, "GLOBAL_GET");
417 writeUleb128(os
, sym
->getGOTIndex(), "global index");
419 writeU8(os
, opcode_reloc_const
, "CONST");
420 writeSleb128(os
, rel
.Addend
, "addend");
421 writeU8(os
, opcode_reloc_add
, "ADD");
425 const GlobalSymbol
* baseSymbol
= WasmSym::memoryBase
;
426 if (rel
.Type
== R_WASM_TABLE_INDEX_I32
||
427 rel
.Type
== R_WASM_TABLE_INDEX_I64
)
428 baseSymbol
= WasmSym::tableBase
;
429 else if (sym
->isTLS())
430 baseSymbol
= WasmSym::tlsBase
;
431 writeU8(os
, WASM_OPCODE_GLOBAL_GET
, "GLOBAL_GET");
432 writeUleb128(os
, baseSymbol
->getGlobalIndex(), "base");
433 writeU8(os
, opcode_reloc_const
, "CONST");
434 writeSleb128(os
, file
->calcNewValue(rel
, tombstone
, this), "offset");
435 writeU8(os
, opcode_reloc_add
, "ADD");
438 // Store that value at the virtual address
439 writeU8(os
, opcode_reloc_store
, "I32_STORE");
440 writeUleb128(os
, 2, "align");
441 writeUleb128(os
, 0, "offset");
447 // Split WASM_SEG_FLAG_STRINGS section. Such a section is a sequence of
448 // null-terminated strings.
449 void MergeInputChunk::splitStrings(ArrayRef
<uint8_t> data
) {
450 LLVM_DEBUG(llvm::dbgs() << "splitStrings\n");
452 StringRef s
= toStringRef(data
);
455 size_t end
= s
.find(0);
456 if (end
== StringRef::npos
)
457 fatal(toString(this) + ": string is not null terminated");
458 size_t size
= end
+ 1;
460 pieces
.emplace_back(off
, xxh3_64bits(s
.substr(0, size
)), true);
466 // This function is called after we obtain a complete list of input sections
467 // that need to be linked. This is responsible to split section contents
468 // into small chunks for further processing.
470 // Note that this function is called from parallelForEach. This must be
471 // thread-safe (i.e. no memory allocation from the pools).
472 void MergeInputChunk::splitIntoPieces() {
473 assert(pieces
.empty());
474 // As of now we only support WASM_SEG_FLAG_STRINGS but in the future we
475 // could add other types of splitting (see ELF's splitIntoPieces).
476 assert(flags
& WASM_SEG_FLAG_STRINGS
);
477 splitStrings(data());
480 SectionPiece
*MergeInputChunk::getSectionPiece(uint64_t offset
) {
481 if (this->data().size() <= offset
)
482 fatal(toString(this) + ": offset is outside the section");
484 // If Offset is not at beginning of a section piece, it is not in the map.
485 // In that case we need to do a binary search of the original section piece
487 auto it
= partition_point(
488 pieces
, [=](SectionPiece p
) { return p
.inputOff
<= offset
; });
492 // Returns the offset in an output section for a given input offset.
493 // Because contents of a mergeable section is not contiguous in output,
494 // it is not just an addition to a base output offset.
495 uint64_t MergeInputChunk::getParentOffset(uint64_t offset
) const {
496 // If Offset is not at beginning of a section piece, it is not in the map.
497 // In that case we need to search from the original section piece vector.
498 const SectionPiece
*piece
= getSectionPiece(offset
);
499 uint64_t addend
= offset
- piece
->inputOff
;
500 return piece
->outputOff
+ addend
;
503 void SyntheticMergedChunk::finalizeContents() {
504 // Add all string pieces to the string table builder to create section
506 for (MergeInputChunk
*sec
: chunks
)
507 for (size_t i
= 0, e
= sec
->pieces
.size(); i
!= e
; ++i
)
508 if (sec
->pieces
[i
].live
)
509 builder
.add(sec
->getData(i
));
511 // Fix the string table content. After this, the contents will never change.
514 // finalize() fixed tail-optimized strings, so we can now get
515 // offsets of strings. Get an offset for each string and save it
516 // to a corresponding SectionPiece for easy access.
517 for (MergeInputChunk
*sec
: chunks
)
518 for (size_t i
= 0, e
= sec
->pieces
.size(); i
!= e
; ++i
)
519 if (sec
->pieces
[i
].live
)
520 sec
->pieces
[i
].outputOff
= builder
.getOffset(sec
->getData(i
));
523 uint64_t InputSection::getTombstoneForSection(StringRef name
) {
524 // When a function is not live we need to update relocations referring to it.
525 // If they occur in DWARF debug symbols, we want to change the pc of the
526 // function to -1 to avoid overlapping with a valid range. However for the
527 // debug_ranges and debug_loc sections that would conflict with the existing
528 // meaning of -1 so we use -2.
529 if (name
== ".debug_ranges" || name
== ".debug_loc")
531 if (name
.starts_with(".debug_"))
533 // If the function occurs in an function attribute section change it to -1 since
534 // 0 is a valid function index.
535 if (name
.starts_with("llvm.func_attr."))
537 // Returning 0 means there is no tombstone value for this section, and relocation
538 // will just use the addend.