1 //===- InputSection.h -------------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLD_MACHO_INPUT_SECTION_H
10 #define LLD_MACHO_INPUT_SECTION_H
13 #include "Relocations.h"
16 #include "lld/Common/LLVM.h"
17 #include "lld/Common/Memory.h"
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/BitVector.h"
20 #include "llvm/ADT/CachedHashString.h"
21 #include "llvm/ADT/TinyPtrVector.h"
22 #include "llvm/BinaryFormat/MachO.h"
38 Kind
kind() const { return sectionKind
; }
39 virtual ~InputSection() = default;
40 virtual uint64_t getSize() const { return data
.size(); }
41 virtual bool empty() const { return data
.empty(); }
42 InputFile
*getFile() const { return section
.file
; }
43 StringRef
getName() const { return section
.name
; }
44 StringRef
getSegName() const { return section
.segname
; }
45 uint32_t getFlags() const { return section
.flags
; }
46 uint64_t getFileSize() const;
47 // Translates \p off -- an offset relative to this InputSection -- into an
48 // offset from the beginning of its parent OutputSection.
49 virtual uint64_t getOffset(uint64_t off
) const = 0;
50 // The offset from the beginning of the file.
51 uint64_t getVA(uint64_t off
) const;
52 // Return a user-friendly string for use in diagnostics.
53 // Format: /path/to/object.o:(symbol _func+0x123)
54 std::string
getLocation(uint64_t off
) const;
55 // Return the source line corresponding to an address, or the empty string.
56 // Format: Source.cpp:123 (/path/to/Source.cpp:123)
57 std::string
getSourceLocation(uint64_t off
) const;
58 // Return the relocation at \p off, if it exists. This does a linear search.
59 const Reloc
*getRelocAt(uint32_t off
) const;
60 // Whether the data at \p off in this InputSection is live.
61 virtual bool isLive(uint64_t off
) const = 0;
62 virtual void markLive(uint64_t off
) = 0;
63 virtual InputSection
*canonical() { return this; }
64 virtual const InputSection
*canonical() const { return this; }
67 InputSection(Kind kind
, const Section
§ion
, ArrayRef
<uint8_t> data
,
69 : sectionKind(kind
), keepUnique(false), hasAltEntry(false), align(align
),
70 data(data
), section(section
) {}
72 InputSection(const InputSection
&rhs
)
73 : sectionKind(rhs
.sectionKind
), keepUnique(false), hasAltEntry(false),
74 align(rhs
.align
), data(rhs
.data
), section(rhs
.section
) {}
79 // is address assigned?
81 // keep the address of the symbol(s) in this section unique in the final
84 // Does this section have symbols at offsets other than zero? (NOTE: only
85 // applies to ConcatInputSections.)
89 OutputSection
*parent
= nullptr;
90 ArrayRef
<uint8_t> data
;
91 std::vector
<Reloc
> relocs
;
92 // The symbols that belong to this InputSection, sorted by value. With
93 // .subsections_via_symbols, there is typically only one element here.
94 llvm::TinyPtrVector
<Defined
*> symbols
;
96 const Section
§ion
;
99 const Defined
*getContainingSymbol(uint64_t off
) const;
102 // ConcatInputSections are combined into (Concat)OutputSections through simple
103 // concatenation, in contrast with literal sections which may have their
104 // contents merged before output.
105 class ConcatInputSection final
: public InputSection
{
107 ConcatInputSection(const Section
§ion
, ArrayRef
<uint8_t> data
,
109 : InputSection(ConcatKind
, section
, data
, align
) {}
111 uint64_t getOffset(uint64_t off
) const override
{ return outSecOff
+ off
; }
112 uint64_t getVA() const { return InputSection::getVA(0); }
113 // ConcatInputSections are entirely live or dead, so the offset is irrelevant.
114 bool isLive(uint64_t off
) const override
{ return live
; }
115 void markLive(uint64_t off
) override
{ live
= true; }
116 bool isCoalescedWeak() const { return wasCoalesced
&& symbols
.empty(); }
117 bool shouldOmitFromOutput() const { return !live
|| isCoalescedWeak(); }
118 void writeTo(uint8_t *buf
);
120 void foldIdentical(ConcatInputSection
*redundant
,
121 Symbol::ICFFoldKind foldKind
= Symbol::ICFFoldKind::Body
);
122 ConcatInputSection
*canonical() override
{
123 return replacement
? replacement
: this;
125 const InputSection
*canonical() const override
{
126 return replacement
? replacement
: this;
129 static bool classof(const InputSection
*isec
) {
130 return isec
->kind() == ConcatKind
;
133 // Points to the surviving section after this one is folded by ICF
134 ConcatInputSection
*replacement
= nullptr;
135 // Equivalence-class ID for ICF
136 uint32_t icfEqClass
[2] = {0, 0};
138 // With subsections_via_symbols, most symbols have their own InputSection,
139 // and for weak symbols (e.g. from inline functions), only the
140 // InputSection from one translation unit will make it to the output,
141 // while all copies in other translation units are coalesced into the
142 // first and not copied to the output.
143 bool wasCoalesced
= false;
144 bool live
= !config
->deadStrip
;
145 bool hasCallSites
= false;
146 // This variable has two usages. Initially, it represents the input order.
147 // After assignAddresses is called, it represents the offset from the
148 // beginning of the output section this section was assigned to.
149 uint64_t outSecOff
= 0;
152 // Initialize a fake InputSection that does not belong to any InputFile.
153 // The created ConcatInputSection will always have 'live=true'
154 ConcatInputSection
*makeSyntheticInputSection(StringRef segName
,
157 ArrayRef
<uint8_t> data
= {},
160 // Helper functions to make it easy to sprinkle asserts.
162 inline bool shouldOmitFromOutput(InputSection
*isec
) {
163 return isa
<ConcatInputSection
>(isec
) &&
164 cast
<ConcatInputSection
>(isec
)->shouldOmitFromOutput();
167 inline bool isCoalescedWeak(InputSection
*isec
) {
168 return isa
<ConcatInputSection
>(isec
) &&
169 cast
<ConcatInputSection
>(isec
)->isCoalescedWeak();
172 // We allocate a lot of these and binary search on them, so they should be as
173 // compact as possible. Hence the use of 31 rather than 64 bits for the hash.
175 // Offset from the start of the containing input section.
178 // Only set if deduplicating literals
180 // Offset from the start of the containing output section.
181 uint64_t outSecOff
= 0;
183 StringPiece(uint64_t off
, uint32_t hash
)
184 : inSecOff(off
), live(!config
->deadStrip
), hash(hash
) {}
187 static_assert(sizeof(StringPiece
) == 16, "StringPiece is too big!");
189 // CStringInputSections are composed of multiple null-terminated string
190 // literals, which we represent using StringPieces. These literals can be
191 // deduplicated and tail-merged, so translating offsets between the input and
192 // outputs sections is more complicated.
194 // NOTE: One significant difference between LLD and ld64 is that we merge all
195 // cstring literals, even those referenced directly by non-private symbols.
196 // ld64 is more conservative and does not do that. This was mostly done for
197 // implementation simplicity; if we find programs that need the more
198 // conservative behavior we can certainly implement that.
199 class CStringInputSection final
: public InputSection
{
201 CStringInputSection(const Section
§ion
, ArrayRef
<uint8_t> data
,
202 uint32_t align
, bool dedupLiterals
)
203 : InputSection(CStringLiteralKind
, section
, data
, align
),
204 deduplicateLiterals(dedupLiterals
) {}
206 uint64_t getOffset(uint64_t off
) const override
;
207 bool isLive(uint64_t off
) const override
{ return getStringPiece(off
).live
; }
208 void markLive(uint64_t off
) override
{ getStringPiece(off
).live
= true; }
209 // Find the StringPiece that contains this offset.
210 StringPiece
&getStringPiece(uint64_t off
);
211 const StringPiece
&getStringPiece(uint64_t off
) const;
212 // Split at each null byte.
213 void splitIntoPieces();
215 LLVM_ATTRIBUTE_ALWAYS_INLINE
216 StringRef
getStringRef(size_t i
) const {
217 size_t begin
= pieces
[i
].inSecOff
;
218 // The endpoint should be *at* the null terminator, not after. This matches
219 // the behavior of StringRef(const char *Str).
221 ((pieces
.size() - 1 == i
) ? data
.size() : pieces
[i
+ 1].inSecOff
) - 1;
222 return toStringRef(data
.slice(begin
, end
- begin
));
225 StringRef
getStringRefAtOffset(uint64_t off
) const {
226 return getStringRef(getStringPieceIndex(off
));
229 // Returns i'th piece as a CachedHashStringRef. This function is very hot when
230 // string merging is enabled, so we want to inline.
231 LLVM_ATTRIBUTE_ALWAYS_INLINE
232 llvm::CachedHashStringRef
getCachedHashStringRef(size_t i
) const {
233 assert(deduplicateLiterals
);
234 return {getStringRef(i
), pieces
[i
].hash
};
237 static bool classof(const InputSection
*isec
) {
238 return isec
->kind() == CStringLiteralKind
;
241 bool deduplicateLiterals
= false;
242 std::vector
<StringPiece
> pieces
;
245 size_t getStringPieceIndex(uint64_t off
) const;
248 class WordLiteralInputSection final
: public InputSection
{
250 WordLiteralInputSection(const Section
§ion
, ArrayRef
<uint8_t> data
,
252 uint64_t getOffset(uint64_t off
) const override
;
253 bool isLive(uint64_t off
) const override
{
254 return live
[off
>> power2LiteralSize
];
256 void markLive(uint64_t off
) override
{
257 live
[off
>> power2LiteralSize
] = true;
260 static bool classof(const InputSection
*isec
) {
261 return isec
->kind() == WordLiteralKind
;
265 unsigned power2LiteralSize
;
266 // The liveness of data[off] is tracked by live[off >> power2LiteralSize].
267 llvm::BitVector live
;
270 inline uint8_t sectionType(uint32_t flags
) {
271 return flags
& llvm::MachO::SECTION_TYPE
;
274 inline bool isZeroFill(uint32_t flags
) {
275 return llvm::MachO::isVirtualSection(sectionType(flags
));
278 inline bool isThreadLocalVariables(uint32_t flags
) {
279 return sectionType(flags
) == llvm::MachO::S_THREAD_LOCAL_VARIABLES
;
282 // These sections contain the data for initializing thread-local variables.
283 inline bool isThreadLocalData(uint32_t flags
) {
284 return sectionType(flags
) == llvm::MachO::S_THREAD_LOCAL_REGULAR
||
285 sectionType(flags
) == llvm::MachO::S_THREAD_LOCAL_ZEROFILL
;
288 inline bool isDebugSection(uint32_t flags
) {
289 return (flags
& llvm::MachO::SECTION_ATTRIBUTES_USR
) ==
290 llvm::MachO::S_ATTR_DEBUG
;
293 inline bool isWordLiteralSection(uint32_t flags
) {
294 return sectionType(flags
) == llvm::MachO::S_4BYTE_LITERALS
||
295 sectionType(flags
) == llvm::MachO::S_8BYTE_LITERALS
||
296 sectionType(flags
) == llvm::MachO::S_16BYTE_LITERALS
;
299 bool isCodeSection(const InputSection
*);
300 bool isCfStringSection(const InputSection
*);
301 bool isClassRefsSection(const InputSection
*);
302 bool isSelRefsSection(const InputSection
*);
303 bool isEhFrameSection(const InputSection
*);
304 bool isGccExceptTabSection(const InputSection
*);
306 extern std::vector
<ConcatInputSection
*> inputSections
;
307 // This is used as a counter for specyfing input order for input sections
308 extern int inputSectionsOrder
;
310 namespace section_names
{
312 constexpr const char authGot
[] = "__auth_got";
313 constexpr const char authPtr
[] = "__auth_ptr";
314 constexpr const char binding
[] = "__binding";
315 constexpr const char bitcodeBundle
[] = "__bundle";
316 constexpr const char cString
[] = "__cstring";
317 constexpr const char cfString
[] = "__cfstring";
318 constexpr const char cgProfile
[] = "__cg_profile";
319 constexpr const char chainFixups
[] = "__chainfixups";
320 constexpr const char codeSignature
[] = "__code_signature";
321 constexpr const char common
[] = "__common";
322 constexpr const char compactUnwind
[] = "__compact_unwind";
323 constexpr const char data
[] = "__data";
324 constexpr const char debugAbbrev
[] = "__debug_abbrev";
325 constexpr const char debugInfo
[] = "__debug_info";
326 constexpr const char debugLine
[] = "__debug_line";
327 constexpr const char debugStr
[] = "__debug_str";
328 constexpr const char debugStrOffs
[] = "__debug_str_offs";
329 constexpr const char ehFrame
[] = "__eh_frame";
330 constexpr const char gccExceptTab
[] = "__gcc_except_tab";
331 constexpr const char export_
[] = "__export";
332 constexpr const char dataInCode
[] = "__data_in_code";
333 constexpr const char functionStarts
[] = "__func_starts";
334 constexpr const char got
[] = "__got";
335 constexpr const char header
[] = "__mach_header";
336 constexpr const char indirectSymbolTable
[] = "__ind_sym_tab";
337 constexpr const char initOffsets
[] = "__init_offsets";
338 constexpr const char const_
[] = "__const";
339 constexpr const char lazySymbolPtr
[] = "__la_symbol_ptr";
340 constexpr const char lazyBinding
[] = "__lazy_binding";
341 constexpr const char literals
[] = "__literals";
342 constexpr const char functionMap
[] = "__llvm_merge";
343 constexpr const char moduleInitFunc
[] = "__mod_init_func";
344 constexpr const char moduleTermFunc
[] = "__mod_term_func";
345 constexpr const char nonLazySymbolPtr
[] = "__nl_symbol_ptr";
346 constexpr const char objcCatList
[] = "__objc_catlist";
347 constexpr const char objcClassList
[] = "__objc_classlist";
348 constexpr const char objcMethList
[] = "__objc_methlist";
349 constexpr const char objcClassRefs
[] = "__objc_classrefs";
350 constexpr const char objcConst
[] = "__objc_const";
351 constexpr const char objCImageInfo
[] = "__objc_imageinfo";
352 constexpr const char objcStubs
[] = "__objc_stubs";
353 constexpr const char objcSelrefs
[] = "__objc_selrefs";
354 constexpr const char objcMethname
[] = "__objc_methname";
355 constexpr const char objcNonLazyCatList
[] = "__objc_nlcatlist";
356 constexpr const char objcNonLazyClassList
[] = "__objc_nlclslist";
357 constexpr const char objcProtoList
[] = "__objc_protolist";
358 constexpr const char outlinedHashTree
[] = "__llvm_outline";
359 constexpr const char pageZero
[] = "__pagezero";
360 constexpr const char pointers
[] = "__pointers";
361 constexpr const char rebase
[] = "__rebase";
362 constexpr const char staticInit
[] = "__StaticInit";
363 constexpr const char stringTable
[] = "__string_table";
364 constexpr const char stubHelper
[] = "__stub_helper";
365 constexpr const char stubs
[] = "__stubs";
366 constexpr const char swift
[] = "__swift";
367 constexpr const char symbolTable
[] = "__symbol_table";
368 constexpr const char textCoalNt
[] = "__textcoal_nt";
369 constexpr const char text
[] = "__text";
370 constexpr const char threadPtrs
[] = "__thread_ptrs";
371 constexpr const char threadVars
[] = "__thread_vars";
372 constexpr const char unwindInfo
[] = "__unwind_info";
373 constexpr const char weakBinding
[] = "__weak_binding";
374 constexpr const char zeroFill
[] = "__zerofill";
375 constexpr const char addrSig
[] = "__llvm_addrsig";
377 } // namespace section_names
379 void addInputSection(InputSection
*inputSection
);
382 std::string
toString(const macho::InputSection
*);