1 //===- Chunks.h -------------------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLD_COFF_CHUNKS_H
10 #define LLD_COFF_CHUNKS_H
13 #include "InputFiles.h"
14 #include "lld/Common/LLVM.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/PointerIntPair.h"
17 #include "llvm/ADT/iterator.h"
18 #include "llvm/ADT/iterator_range.h"
19 #include "llvm/MC/StringTableBuilder.h"
20 #include "llvm/Object/COFF.h"
27 using llvm::COFF::ImportDirectoryTableEntry
;
28 using llvm::object::COFFSymbolRef
;
29 using llvm::object::SectionRef
;
30 using llvm::object::coff_relocation
;
31 using llvm::object::coff_section
;
35 class DefinedImportData
;
39 class RuntimePseudoReloc
;
42 // Mask for permissions (discardable, writable, readable, executable, etc).
43 const uint32_t permMask
= 0xFE000000;
45 // Mask for section types (code, data, bss).
46 const uint32_t typeMask
= 0x000000E0;
48 // The log base 2 of the largest section alignment, which is log2(8192), or 13.
49 enum : unsigned { Log2MaxSectionAlignment
= 13 };
51 // A Chunk represents a chunk of data that will occupy space in the
52 // output (if the resolver chose that). It may or may not be backed by
53 // a section of an input file. It could be linker-created data, or
54 // doesn't even have actual data (if common or bss).
57 enum Kind
: uint8_t { SectionKind
, OtherKind
, ImportThunkKind
};
58 Kind
kind() const { return chunkKind
; }
60 // Returns the size of this chunk (even if this is a common or BSS.)
61 size_t getSize() const;
63 // Returns chunk alignment in power of two form. Value values are powers of
64 // two from 1 to 8192.
65 uint32_t getAlignment() const { return 1U << p2Align
; }
67 // Update the chunk section alignment measured in bytes. Internally alignment
69 void setAlignment(uint32_t align
) {
70 // Treat zero byte alignment as 1 byte alignment.
71 align
= align
? align
: 1;
72 assert(llvm::isPowerOf2_32(align
) && "alignment is not a power of 2");
73 p2Align
= llvm::Log2_32(align
);
74 assert(p2Align
<= Log2MaxSectionAlignment
&&
75 "impossible requested alignment");
78 // Write this chunk to a mmap'ed file, assuming Buf is pointing to
79 // beginning of the file. Because this function may use RVA values
80 // of other chunks for relocations, you need to set them properly
81 // before calling this function.
82 void writeTo(uint8_t *buf
) const;
84 // The writer sets and uses the addresses. In practice, PE images cannot be
85 // larger than 2GB. Chunks are always laid as part of the image, so Chunk RVAs
86 // can be stored with 32 bits.
87 uint32_t getRVA() const { return rva
; }
88 void setRVA(uint64_t v
) {
90 assert(rva
== v
&& "RVA truncated");
93 // Returns readable/writable/executable bits.
94 uint32_t getOutputCharacteristics() const;
96 // Returns the section name if this is a section chunk.
97 // It is illegal to call this function on non-section chunks.
98 StringRef
getSectionName() const;
100 // An output section has pointers to chunks in the section, and each
101 // chunk has a back pointer to an output section.
102 void setOutputSectionIdx(uint16_t o
) { osidx
= o
; }
103 uint16_t getOutputSectionIdx() const { return osidx
; }
104 OutputSection
*getOutputSection() const;
107 // Collect all locations that contain absolute addresses for base relocations.
108 void getBaserels(std::vector
<Baserel
> *res
);
110 // Returns a human-readable name of this chunk. Chunks are unnamed chunks of
111 // bytes, so this is used only for logging or debugging.
112 StringRef
getDebugName() const;
114 // Return true if this file has the hotpatch flag set to true in the
115 // S_COMPILE3 record in codeview debug info. Also returns true for some thunks
116 // synthesized by the linker.
117 bool isHotPatchable() const;
120 Chunk(Kind k
= OtherKind
) : chunkKind(k
), hasData(true), p2Align(0) {}
122 const Kind chunkKind
;
125 // Returns true if this has non-zero data. BSS chunks return
126 // false. If false is returned, the space occupied by this chunk
127 // will be filled with zeros. Corresponds to the
128 // IMAGE_SCN_CNT_UNINITIALIZED_DATA section characteristic bit.
132 // The alignment of this chunk, stored in log2 form. The writer uses the
136 // The output section index for this chunk. The first valid section number is
140 // The RVA of this chunk in the output. The writer sets a value.
144 class NonSectionChunk
: public Chunk
{
146 virtual ~NonSectionChunk() = default;
148 // Returns the size of this chunk (even if this is a common or BSS.)
149 virtual size_t getSize() const = 0;
151 virtual uint32_t getOutputCharacteristics() const { return 0; }
153 // Write this chunk to a mmap'ed file, assuming Buf is pointing to
154 // beginning of the file. Because this function may use RVA values
155 // of other chunks for relocations, you need to set them properly
156 // before calling this function.
157 virtual void writeTo(uint8_t *buf
) const {}
159 // Returns the section name if this is a section chunk.
160 // It is illegal to call this function on non-section chunks.
161 virtual StringRef
getSectionName() const {
162 llvm_unreachable("unimplemented getSectionName");
166 // Collect all locations that contain absolute addresses for base relocations.
167 virtual void getBaserels(std::vector
<Baserel
> *res
) {}
169 // Returns a human-readable name of this chunk. Chunks are unnamed chunks of
170 // bytes, so this is used only for logging or debugging.
171 virtual StringRef
getDebugName() const { return ""; }
173 static bool classof(const Chunk
*c
) { return c
->kind() != SectionKind
; }
176 NonSectionChunk(Kind k
= OtherKind
) : Chunk(k
) {}
179 // A chunk corresponding a section of an input file.
180 class SectionChunk final
: public Chunk
{
181 // Identical COMDAT Folding feature accesses section internal data.
185 class symbol_iterator
: public llvm::iterator_adaptor_base
<
186 symbol_iterator
, const coff_relocation
*,
187 std::random_access_iterator_tag
, Symbol
*> {
192 symbol_iterator(ObjFile
*file
, const coff_relocation
*i
)
193 : symbol_iterator::iterator_adaptor_base(i
), file(file
) {}
196 symbol_iterator() = default;
198 Symbol
*operator*() const { return file
->getSymbol(I
->SymbolTableIndex
); }
201 SectionChunk(ObjFile
*file
, const coff_section
*header
);
202 static bool classof(const Chunk
*c
) { return c
->kind() == SectionKind
; }
203 size_t getSize() const { return header
->SizeOfRawData
; }
204 ArrayRef
<uint8_t> getContents() const;
205 void writeTo(uint8_t *buf
) const;
207 uint32_t getOutputCharacteristics() const {
208 return header
->Characteristics
& (permMask
| typeMask
);
210 StringRef
getSectionName() const {
211 return StringRef(sectionNameData
, sectionNameSize
);
213 void getBaserels(std::vector
<Baserel
> *res
);
214 bool isCOMDAT() const;
215 void applyRelX64(uint8_t *off
, uint16_t type
, OutputSection
*os
, uint64_t s
,
217 void applyRelX86(uint8_t *off
, uint16_t type
, OutputSection
*os
, uint64_t s
,
219 void applyRelARM(uint8_t *off
, uint16_t type
, OutputSection
*os
, uint64_t s
,
221 void applyRelARM64(uint8_t *off
, uint16_t type
, OutputSection
*os
, uint64_t s
,
224 void getRuntimePseudoRelocs(std::vector
<RuntimePseudoReloc
> &res
);
226 // Called if the garbage collector decides to not include this chunk
227 // in a final output. It's supposed to print out a log message to stdout.
228 void printDiscardedMessage() const;
230 // Adds COMDAT associative sections to this COMDAT section. A chunk
231 // and its children are treated as a group by the garbage collector.
232 void addAssociative(SectionChunk
*child
);
234 StringRef
getDebugName() const;
236 // True if this is a codeview debug info chunk. These will not be laid out in
237 // the image. Instead they will end up in the PDB, if one is requested.
238 bool isCodeView() const {
239 return getSectionName() == ".debug" || getSectionName().startswith(".debug$");
242 // True if this is a DWARF debug info or exception handling chunk.
243 bool isDWARF() const {
244 return getSectionName().startswith(".debug_") || getSectionName() == ".eh_frame";
247 // Allow iteration over the bodies of this chunk's relocated symbols.
248 llvm::iterator_range
<symbol_iterator
> symbols() const {
249 return llvm::make_range(symbol_iterator(file
, relocsData
),
250 symbol_iterator(file
, relocsData
+ relocsSize
));
253 ArrayRef
<coff_relocation
> getRelocs() const {
254 return llvm::makeArrayRef(relocsData
, relocsSize
);
257 // Reloc setter used by ARM range extension thunk insertion.
258 void setRelocs(ArrayRef
<coff_relocation
> newRelocs
) {
259 relocsData
= newRelocs
.data();
260 relocsSize
= newRelocs
.size();
261 assert(relocsSize
== newRelocs
.size() && "reloc size truncation");
264 // Single linked list iterator for associated comdat children.
265 class AssociatedIterator
266 : public llvm::iterator_facade_base
<
267 AssociatedIterator
, std::forward_iterator_tag
, SectionChunk
> {
269 AssociatedIterator() = default;
270 AssociatedIterator(SectionChunk
*head
) : cur(head
) {}
271 bool operator==(const AssociatedIterator
&r
) const { return cur
== r
.cur
; }
272 // FIXME: Wrong const-ness, but it makes filter ranges work.
273 SectionChunk
&operator*() const { return *cur
; }
274 SectionChunk
&operator*() { return *cur
; }
275 AssociatedIterator
&operator++() {
276 cur
= cur
->assocChildren
;
281 SectionChunk
*cur
= nullptr;
284 // Allow iteration over the associated child chunks for this section.
285 llvm::iterator_range
<AssociatedIterator
> children() const {
286 return llvm::make_range(AssociatedIterator(assocChildren
),
287 AssociatedIterator(nullptr));
290 // The section ID this chunk belongs to in its Obj.
291 uint32_t getSectionNumber() const;
293 ArrayRef
<uint8_t> consumeDebugMagic();
295 static ArrayRef
<uint8_t> consumeDebugMagic(ArrayRef
<uint8_t> data
,
296 StringRef sectionName
);
298 static SectionChunk
*findByName(ArrayRef
<SectionChunk
*> sections
,
301 // The file that this chunk was created from.
304 // Pointer to the COFF section header in the input file.
305 const coff_section
*header
;
307 // The COMDAT leader symbol if this is a COMDAT chunk.
308 DefinedRegular
*sym
= nullptr;
310 // The CRC of the contents as described in the COFF spec 4.5.5.
311 // Auxiliary Format 5: Section Definitions. Used for ICF.
312 uint32_t checksum
= 0;
314 // Used by the garbage collector.
317 // Whether this section needs to be kept distinct from other sections during
318 // ICF. This is set by the driver using address-significance tables.
319 bool keepUnique
= false;
321 // The COMDAT selection if this is a COMDAT chunk.
322 llvm::COFF::COMDATType selection
= (llvm::COFF::COMDATType
)0;
324 // A pointer pointing to a replacement for this chunk.
325 // Initially it points to "this" object. If this chunk is merged
326 // with other chunk by ICF, it points to another chunk,
327 // and this chunk is considered as dead.
331 SectionChunk
*assocChildren
= nullptr;
333 // Used for ICF (Identical COMDAT Folding)
334 void replace(SectionChunk
*other
);
335 uint32_t eqClass
[2] = {0, 0};
337 // Relocations for this section. Size is stored below.
338 const coff_relocation
*relocsData
;
340 // Section name string. Size is stored below.
341 const char *sectionNameData
;
343 uint32_t relocsSize
= 0;
344 uint32_t sectionNameSize
= 0;
347 // Inline methods to implement faux-virtual dispatch for SectionChunk.
349 inline size_t Chunk::getSize() const {
350 if (isa
<SectionChunk
>(this))
351 return static_cast<const SectionChunk
*>(this)->getSize();
353 return static_cast<const NonSectionChunk
*>(this)->getSize();
356 inline uint32_t Chunk::getOutputCharacteristics() const {
357 if (isa
<SectionChunk
>(this))
358 return static_cast<const SectionChunk
*>(this)->getOutputCharacteristics();
360 return static_cast<const NonSectionChunk
*>(this)
361 ->getOutputCharacteristics();
364 inline void Chunk::writeTo(uint8_t *buf
) const {
365 if (isa
<SectionChunk
>(this))
366 static_cast<const SectionChunk
*>(this)->writeTo(buf
);
368 static_cast<const NonSectionChunk
*>(this)->writeTo(buf
);
371 inline StringRef
Chunk::getSectionName() const {
372 if (isa
<SectionChunk
>(this))
373 return static_cast<const SectionChunk
*>(this)->getSectionName();
375 return static_cast<const NonSectionChunk
*>(this)->getSectionName();
378 inline void Chunk::getBaserels(std::vector
<Baserel
> *res
) {
379 if (isa
<SectionChunk
>(this))
380 static_cast<SectionChunk
*>(this)->getBaserels(res
);
382 static_cast<NonSectionChunk
*>(this)->getBaserels(res
);
385 inline StringRef
Chunk::getDebugName() const {
386 if (isa
<SectionChunk
>(this))
387 return static_cast<const SectionChunk
*>(this)->getDebugName();
389 return static_cast<const NonSectionChunk
*>(this)->getDebugName();
392 // This class is used to implement an lld-specific feature (not implemented in
393 // MSVC) that minimizes the output size by finding string literals sharing tail
394 // parts and merging them.
396 // If string tail merging is enabled and a section is identified as containing a
397 // string literal, it is added to a MergeChunk with an appropriate alignment.
398 // The MergeChunk then tail merges the strings using the StringTableBuilder
399 // class and assigns RVAs and section offsets to each of the member chunks based
400 // on the offsets assigned by the StringTableBuilder.
401 class MergeChunk
: public NonSectionChunk
{
403 MergeChunk(uint32_t alignment
);
404 static void addSection(SectionChunk
*c
);
405 void finalizeContents();
406 void assignSubsectionRVAs();
408 uint32_t getOutputCharacteristics() const override
;
409 StringRef
getSectionName() const override
{ return ".rdata"; }
410 size_t getSize() const override
;
411 void writeTo(uint8_t *buf
) const override
;
413 static MergeChunk
*instances
[Log2MaxSectionAlignment
+ 1];
414 std::vector
<SectionChunk
*> sections
;
417 llvm::StringTableBuilder builder
;
418 bool finalized
= false;
421 // A chunk for common symbols. Common chunks don't have actual data.
422 class CommonChunk
: public NonSectionChunk
{
424 CommonChunk(const COFFSymbolRef sym
);
425 size_t getSize() const override
{ return sym
.getValue(); }
426 uint32_t getOutputCharacteristics() const override
;
427 StringRef
getSectionName() const override
{ return ".bss"; }
430 const COFFSymbolRef sym
;
433 // A chunk for linker-created strings.
434 class StringChunk
: public NonSectionChunk
{
436 explicit StringChunk(StringRef s
) : str(s
) {}
437 size_t getSize() const override
{ return str
.size() + 1; }
438 void writeTo(uint8_t *buf
) const override
;
444 static const uint8_t importThunkX86
[] = {
445 0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // JMP *0x0
448 static const uint8_t importThunkARM
[] = {
449 0x40, 0xf2, 0x00, 0x0c, // mov.w ip, #0
450 0xc0, 0xf2, 0x00, 0x0c, // mov.t ip, #0
451 0xdc, 0xf8, 0x00, 0xf0, // ldr.w pc, [ip]
454 static const uint8_t importThunkARM64
[] = {
455 0x10, 0x00, 0x00, 0x90, // adrp x16, #0
456 0x10, 0x02, 0x40, 0xf9, // ldr x16, [x16]
457 0x00, 0x02, 0x1f, 0xd6, // br x16
461 // A chunk for DLL import jump table entry. In a final output, its
462 // contents will be a JMP instruction to some __imp_ symbol.
463 class ImportThunkChunk
: public NonSectionChunk
{
465 ImportThunkChunk(Defined
*s
)
466 : NonSectionChunk(ImportThunkKind
), impSymbol(s
) {}
467 static bool classof(const Chunk
*c
) { return c
->kind() == ImportThunkKind
; }
473 class ImportThunkChunkX64
: public ImportThunkChunk
{
475 explicit ImportThunkChunkX64(Defined
*s
);
476 size_t getSize() const override
{ return sizeof(importThunkX86
); }
477 void writeTo(uint8_t *buf
) const override
;
480 class ImportThunkChunkX86
: public ImportThunkChunk
{
482 explicit ImportThunkChunkX86(Defined
*s
) : ImportThunkChunk(s
) {}
483 size_t getSize() const override
{ return sizeof(importThunkX86
); }
484 void getBaserels(std::vector
<Baserel
> *res
) override
;
485 void writeTo(uint8_t *buf
) const override
;
488 class ImportThunkChunkARM
: public ImportThunkChunk
{
490 explicit ImportThunkChunkARM(Defined
*s
) : ImportThunkChunk(s
) {
493 size_t getSize() const override
{ return sizeof(importThunkARM
); }
494 void getBaserels(std::vector
<Baserel
> *res
) override
;
495 void writeTo(uint8_t *buf
) const override
;
498 class ImportThunkChunkARM64
: public ImportThunkChunk
{
500 explicit ImportThunkChunkARM64(Defined
*s
) : ImportThunkChunk(s
) {
503 size_t getSize() const override
{ return sizeof(importThunkARM64
); }
504 void writeTo(uint8_t *buf
) const override
;
507 class RangeExtensionThunkARM
: public NonSectionChunk
{
509 explicit RangeExtensionThunkARM(Defined
*t
) : target(t
) { setAlignment(2); }
510 size_t getSize() const override
;
511 void writeTo(uint8_t *buf
) const override
;
516 class RangeExtensionThunkARM64
: public NonSectionChunk
{
518 explicit RangeExtensionThunkARM64(Defined
*t
) : target(t
) { setAlignment(4); }
519 size_t getSize() const override
;
520 void writeTo(uint8_t *buf
) const override
;
526 // See comments for DefinedLocalImport class.
527 class LocalImportChunk
: public NonSectionChunk
{
529 explicit LocalImportChunk(Defined
*s
) : sym(s
) {
530 setAlignment(config
->wordsize
);
532 size_t getSize() const override
;
533 void getBaserels(std::vector
<Baserel
> *res
) override
;
534 void writeTo(uint8_t *buf
) const override
;
540 // Duplicate RVAs are not allowed in RVA tables, so unique symbols by chunk and
541 // offset into the chunk. Order does not matter as the RVA table will be sorted
543 struct ChunkAndOffset
{
547 struct DenseMapInfo
{
548 static ChunkAndOffset
getEmptyKey() {
549 return {llvm::DenseMapInfo
<Chunk
*>::getEmptyKey(), 0};
551 static ChunkAndOffset
getTombstoneKey() {
552 return {llvm::DenseMapInfo
<Chunk
*>::getTombstoneKey(), 0};
554 static unsigned getHashValue(const ChunkAndOffset
&co
) {
555 return llvm::DenseMapInfo
<std::pair
<Chunk
*, uint32_t>>::getHashValue(
556 {co
.inputChunk
, co
.offset
});
558 static bool isEqual(const ChunkAndOffset
&lhs
, const ChunkAndOffset
&rhs
) {
559 return lhs
.inputChunk
== rhs
.inputChunk
&& lhs
.offset
== rhs
.offset
;
564 using SymbolRVASet
= llvm::DenseSet
<ChunkAndOffset
>;
566 // Table which contains symbol RVAs. Used for /safeseh and /guard:cf.
567 class RVATableChunk
: public NonSectionChunk
{
569 explicit RVATableChunk(SymbolRVASet s
) : syms(std::move(s
)) {}
570 size_t getSize() const override
{ return syms
.size() * 4; }
571 void writeTo(uint8_t *buf
) const override
;
578 // This class represents a block in .reloc section.
579 // See the PE/COFF spec 5.6 for details.
580 class BaserelChunk
: public NonSectionChunk
{
582 BaserelChunk(uint32_t page
, Baserel
*begin
, Baserel
*end
);
583 size_t getSize() const override
{ return data
.size(); }
584 void writeTo(uint8_t *buf
) const override
;
587 std::vector
<uint8_t> data
;
592 Baserel(uint32_t v
, uint8_t ty
) : rva(v
), type(ty
) {}
593 explicit Baserel(uint32_t v
) : Baserel(v
, getDefaultType()) {}
594 uint8_t getDefaultType();
600 // This is a placeholder Chunk, to allow attaching a DefinedSynthetic to a
601 // specific place in a section, without any data. This is used for the MinGW
602 // specific symbol __RUNTIME_PSEUDO_RELOC_LIST_END__, even though the concept
603 // of an empty chunk isn't MinGW specific.
604 class EmptyChunk
: public NonSectionChunk
{
607 size_t getSize() const override
{ return 0; }
608 void writeTo(uint8_t *buf
) const override
{}
611 // MinGW specific, for the "automatic import of variables from DLLs" feature.
612 // This provides the table of runtime pseudo relocations, for variable
613 // references that turned out to need to be imported from a DLL even though
614 // the reference didn't use the dllimport attribute. The MinGW runtime will
615 // process this table after loading, before handling control over to user
617 class PseudoRelocTableChunk
: public NonSectionChunk
{
619 PseudoRelocTableChunk(std::vector
<RuntimePseudoReloc
> &relocs
)
620 : relocs(std::move(relocs
)) {
623 size_t getSize() const override
;
624 void writeTo(uint8_t *buf
) const override
;
627 std::vector
<RuntimePseudoReloc
> relocs
;
630 // MinGW specific; information about one individual location in the image
631 // that needs to be fixed up at runtime after loading. This represents
632 // one individual element in the PseudoRelocTableChunk table.
633 class RuntimePseudoReloc
{
635 RuntimePseudoReloc(Defined
*sym
, SectionChunk
*target
, uint32_t targetOffset
,
637 : sym(sym
), target(target
), targetOffset(targetOffset
), flags(flags
) {}
640 SectionChunk
*target
;
641 uint32_t targetOffset
;
642 // The Flags field contains the size of the relocation, in bits. No other
643 // flags are currently defined.
647 // MinGW specific. A Chunk that contains one pointer-sized absolute value.
648 class AbsolutePointerChunk
: public NonSectionChunk
{
650 AbsolutePointerChunk(uint64_t value
) : value(value
) {
651 setAlignment(getSize());
653 size_t getSize() const override
;
654 void writeTo(uint8_t *buf
) const override
;
660 // Return true if this file has the hotpatch flag set to true in the S_COMPILE3
661 // record in codeview debug info. Also returns true for some thunks synthesized
663 inline bool Chunk::isHotPatchable() const {
664 if (auto *sc
= dyn_cast
<SectionChunk
>(this))
665 return sc
->file
->hotPatchable
;
666 else if (isa
<ImportThunkChunk
>(this))
671 void applyMOV32T(uint8_t *off
, uint32_t v
);
672 void applyBranch24T(uint8_t *off
, int32_t v
);
674 void applyArm64Addr(uint8_t *off
, uint64_t s
, uint64_t p
, int shift
);
675 void applyArm64Imm(uint8_t *off
, uint64_t imm
, uint32_t rangeLimit
);
676 void applyArm64Branch26(uint8_t *off
, int64_t v
);
683 struct DenseMapInfo
<lld::coff::ChunkAndOffset
>
684 : lld::coff::ChunkAndOffset::DenseMapInfo
{};