1 //===- InputSection.cpp ---------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "InputSection.h"
10 #include "ConcatOutputSection.h"
12 #include "InputFiles.h"
13 #include "OutputSegment.h"
15 #include "SyntheticSections.h"
17 #include "UnwindInfoSection.h"
20 #include "lld/Common/ErrorHandler.h"
21 #include "lld/Common/Memory.h"
22 #include "llvm/Support/Endian.h"
23 #include "llvm/Support/xxhash.h"
26 using namespace llvm::MachO
;
27 using namespace llvm::support
;
29 using namespace lld::macho
;
31 // Verify ConcatInputSection's size on 64-bit builds. The size of std::vector
32 // can differ based on STL debug levels (e.g. iterator debugging on MSVC's STL),
33 // so account for that.
34 static_assert(sizeof(void *) != 8 ||
35 sizeof(ConcatInputSection
) == sizeof(std::vector
<Reloc
>) + 88,
36 "Try to minimize ConcatInputSection's size, we create many "
39 std::vector
<ConcatInputSection
*> macho::inputSections
;
40 int macho::inputSectionsOrder
= 0;
42 // Call this function to add a new InputSection and have it routed to the
43 // appropriate container. Depending on its type and current config, it will
44 // either be added to 'inputSections' vector or to a synthetic section.
45 void lld::macho::addInputSection(InputSection
*inputSection
) {
46 if (auto *isec
= dyn_cast
<ConcatInputSection
>(inputSection
)) {
47 if (isec
->isCoalescedWeak())
49 if (config
->emitRelativeMethodLists
&&
50 ObjCMethListSection::isMethodList(isec
)) {
51 if (in
.objcMethList
->inputOrder
== UnspecifiedInputOrder
)
52 in
.objcMethList
->inputOrder
= inputSectionsOrder
++;
53 in
.objcMethList
->addInput(isec
);
54 isec
->parent
= in
.objcMethList
;
57 if (config
->emitInitOffsets
&&
58 sectionType(isec
->getFlags()) == S_MOD_INIT_FUNC_POINTERS
) {
59 in
.initOffsets
->addInput(isec
);
62 isec
->outSecOff
= inputSectionsOrder
++;
63 auto *osec
= ConcatOutputSection::getOrCreateForInput(isec
);
65 inputSections
.push_back(isec
);
66 } else if (auto *isec
= dyn_cast
<CStringInputSection
>(inputSection
)) {
67 if (isec
->getName() == section_names::objcMethname
) {
68 if (in
.objcMethnameSection
->inputOrder
== UnspecifiedInputOrder
)
69 in
.objcMethnameSection
->inputOrder
= inputSectionsOrder
++;
70 in
.objcMethnameSection
->addInput(isec
);
72 if (in
.cStringSection
->inputOrder
== UnspecifiedInputOrder
)
73 in
.cStringSection
->inputOrder
= inputSectionsOrder
++;
74 in
.cStringSection
->addInput(isec
);
76 } else if (auto *isec
= dyn_cast
<WordLiteralInputSection
>(inputSection
)) {
77 if (in
.wordLiteralSection
->inputOrder
== UnspecifiedInputOrder
)
78 in
.wordLiteralSection
->inputOrder
= inputSectionsOrder
++;
79 in
.wordLiteralSection
->addInput(isec
);
81 llvm_unreachable("unexpected input section kind");
84 assert(inputSectionsOrder
<= UnspecifiedInputOrder
);
87 uint64_t InputSection::getFileSize() const {
88 return isZeroFill(getFlags()) ? 0 : getSize();
91 uint64_t InputSection::getVA(uint64_t off
) const {
92 return parent
->addr
+ getOffset(off
);
95 static uint64_t resolveSymbolVA(const Symbol
*sym
, uint8_t type
) {
96 const RelocAttrs
&relocAttrs
= target
->getRelocAttrs(type
);
97 if (relocAttrs
.hasAttr(RelocAttrBits::BRANCH
))
98 return sym
->resolveBranchVA();
99 if (relocAttrs
.hasAttr(RelocAttrBits::GOT
))
100 return sym
->resolveGotVA();
101 if (relocAttrs
.hasAttr(RelocAttrBits::TLV
))
102 return sym
->resolveTlvVA();
106 const Defined
*InputSection::getContainingSymbol(uint64_t off
) const {
107 auto *nextSym
= llvm::upper_bound(
108 symbols
, off
, [](uint64_t a
, const Defined
*b
) { return a
< b
->value
; });
109 if (nextSym
== symbols
.begin())
111 return *std::prev(nextSym
);
114 std::string
InputSection::getLocation(uint64_t off
) const {
115 // First, try to find a symbol that's near the offset. Use it as a reference
117 if (auto *sym
= getContainingSymbol(off
))
118 return (toString(getFile()) + ":(symbol " + toString(*sym
) + "+0x" +
119 Twine::utohexstr(off
- sym
->value
) + ")")
122 // If that fails, use the section itself as a reference point.
123 for (const Subsection
&subsec
: section
.subsections
) {
124 if (subsec
.isec
== this) {
125 off
+= subsec
.offset
;
130 return (toString(getFile()) + ":(" + getName() + "+0x" +
131 Twine::utohexstr(off
) + ")")
135 std::string
InputSection::getSourceLocation(uint64_t off
) const {
136 auto *obj
= dyn_cast_or_null
<ObjFile
>(getFile());
140 DWARFCache
*dwarf
= obj
->getDwarf();
142 return std::string();
144 for (const Subsection
&subsec
: section
.subsections
) {
145 if (subsec
.isec
== this) {
146 off
+= subsec
.offset
;
151 auto createMsg
= [&](StringRef path
, unsigned line
) {
152 std::string filename
= sys::path::filename(path
).str();
153 std::string lineStr
= (":" + Twine(line
)).str();
154 if (filename
== path
)
155 return filename
+ lineStr
;
156 return (filename
+ lineStr
+ " (" + path
+ lineStr
+ ")").str();
159 // First, look up a function for a given offset.
160 if (std::optional
<DILineInfo
> li
= dwarf
->getDILineInfo(
161 section
.addr
+ off
, object::SectionedAddress::UndefSection
))
162 return createMsg(li
->FileName
, li
->Line
);
164 // If it failed, look up again as a variable.
165 if (const Defined
*sym
= getContainingSymbol(off
)) {
166 // Symbols are generally prefixed with an underscore, which is not included
167 // in the debug information.
168 StringRef symName
= sym
->getName();
169 if (!symName
.empty() && symName
[0] == '_')
170 symName
= symName
.substr(1);
172 if (std::optional
<std::pair
<std::string
, unsigned>> fileLine
=
173 dwarf
->getVariableLoc(symName
))
174 return createMsg(fileLine
->first
, fileLine
->second
);
177 // Try to get the source file's name from the DWARF information.
178 if (obj
->compileUnit
)
179 return obj
->sourceFile();
184 const Reloc
*InputSection::getRelocAt(uint32_t off
) const {
185 auto it
= llvm::find_if(
186 relocs
, [=](const macho::Reloc
&r
) { return r
.offset
== off
; });
187 if (it
== relocs
.end())
192 void ConcatInputSection::foldIdentical(ConcatInputSection
*copy
) {
193 align
= std::max(align
, copy
->align
);
195 copy
->wasCoalesced
= true;
196 copy
->replacement
= this;
197 for (auto ©Sym
: copy
->symbols
)
198 copySym
->wasIdenticalCodeFolded
= true;
200 symbols
.insert(symbols
.end(), copy
->symbols
.begin(), copy
->symbols
.end());
201 copy
->symbols
.clear();
203 // Remove duplicate compact unwind info for symbols at the same address.
206 for (auto it
= symbols
.begin() + 1; it
!= symbols
.end(); ++it
) {
207 assert((*it
)->value
== 0);
208 (*it
)->originalUnwindEntry
= nullptr;
212 void ConcatInputSection::writeTo(uint8_t *buf
) {
213 assert(!shouldOmitFromOutput());
215 if (getFileSize() == 0)
218 memcpy(buf
, data
.data(), data
.size());
220 for (size_t i
= 0; i
< relocs
.size(); i
++) {
221 const Reloc
&r
= relocs
[i
];
222 uint8_t *loc
= buf
+ r
.offset
;
223 uint64_t referentVA
= 0;
225 const bool needsFixup
= config
->emitChainedFixups
&&
226 target
->hasAttr(r
.type
, RelocAttrBits::UNSIGNED
);
227 if (target
->hasAttr(r
.type
, RelocAttrBits::SUBTRAHEND
)) {
228 const Symbol
*fromSym
= r
.referent
.get
<Symbol
*>();
229 const Reloc
&minuend
= relocs
[++i
];
231 if (const Symbol
*toSym
= minuend
.referent
.dyn_cast
<Symbol
*>())
232 minuendVA
= toSym
->getVA() + minuend
.addend
;
234 auto *referentIsec
= minuend
.referent
.get
<InputSection
*>();
235 assert(!::shouldOmitFromOutput(referentIsec
));
236 minuendVA
= referentIsec
->getVA(minuend
.addend
);
238 referentVA
= minuendVA
- fromSym
->getVA();
239 } else if (auto *referentSym
= r
.referent
.dyn_cast
<Symbol
*>()) {
240 if (target
->hasAttr(r
.type
, RelocAttrBits::LOAD
) &&
241 !referentSym
->isInGot())
242 target
->relaxGotLoad(loc
, r
.type
);
243 // For dtrace symbols, do not handle them as normal undefined symbols
244 if (referentSym
->getName().starts_with("___dtrace_")) {
245 // Change dtrace call site to pre-defined instructions
246 target
->handleDtraceReloc(referentSym
, r
, loc
);
249 referentVA
= resolveSymbolVA(referentSym
, r
.type
) + r
.addend
;
251 if (isThreadLocalVariables(getFlags()) && isa
<Defined
>(referentSym
)) {
252 // References from thread-local variable sections are treated as offsets
253 // relative to the start of the thread-local data memory area, which
254 // is initialized via copying all the TLV data sections (which are all
256 referentVA
-= firstTLVDataSection
->addr
;
257 } else if (needsFixup
) {
258 writeChainedFixup(loc
, referentSym
, r
.addend
);
261 } else if (auto *referentIsec
= r
.referent
.dyn_cast
<InputSection
*>()) {
262 assert(!::shouldOmitFromOutput(referentIsec
));
263 referentVA
= referentIsec
->getVA(r
.addend
);
266 writeChainedRebase(loc
, referentVA
);
270 target
->relocateOne(loc
, r
, referentVA
, getVA() + r
.offset
);
274 ConcatInputSection
*macho::makeSyntheticInputSection(StringRef segName
,
277 ArrayRef
<uint8_t> data
,
280 *make
<Section
>(/*file=*/nullptr, segName
, sectName
, flags
, /*addr=*/0);
281 auto isec
= make
<ConcatInputSection
>(section
, data
, align
);
282 // Since this is an explicitly created 'fake' input section,
283 // it should not be dead stripped.
285 section
.subsections
.push_back({0, isec
});
289 void CStringInputSection::splitIntoPieces() {
291 StringRef s
= toStringRef(data
);
293 size_t end
= s
.find(0);
294 if (end
== StringRef::npos
)
295 fatal(getLocation(off
) + ": string is not null terminated");
296 uint32_t hash
= deduplicateLiterals
? xxh3_64bits(s
.take_front(end
)) : 0;
297 pieces
.emplace_back(off
, hash
);
298 size_t size
= end
+ 1; // include null terminator
304 StringPiece
&CStringInputSection::getStringPiece(uint64_t off
) {
305 if (off
>= data
.size())
306 fatal(toString(this) + ": offset is outside the section");
309 partition_point(pieces
, [=](StringPiece p
) { return p
.inSecOff
<= off
; });
313 const StringPiece
&CStringInputSection::getStringPiece(uint64_t off
) const {
314 return const_cast<CStringInputSection
*>(this)->getStringPiece(off
);
317 size_t CStringInputSection::getStringPieceIndex(uint64_t off
) const {
318 if (off
>= data
.size())
319 fatal(toString(this) + ": offset is outside the section");
322 partition_point(pieces
, [=](StringPiece p
) { return p
.inSecOff
<= off
; });
323 return std::distance(pieces
.begin(), it
) - 1;
326 uint64_t CStringInputSection::getOffset(uint64_t off
) const {
327 const StringPiece
&piece
= getStringPiece(off
);
328 uint64_t addend
= off
- piece
.inSecOff
;
329 return piece
.outSecOff
+ addend
;
332 WordLiteralInputSection::WordLiteralInputSection(const Section
§ion
,
333 ArrayRef
<uint8_t> data
,
335 : InputSection(WordLiteralKind
, section
, data
, align
) {
336 switch (sectionType(getFlags())) {
337 case S_4BYTE_LITERALS
:
338 power2LiteralSize
= 2;
340 case S_8BYTE_LITERALS
:
341 power2LiteralSize
= 3;
343 case S_16BYTE_LITERALS
:
344 power2LiteralSize
= 4;
347 llvm_unreachable("invalid literal section type");
350 live
.resize(data
.size() >> power2LiteralSize
, !config
->deadStrip
);
353 uint64_t WordLiteralInputSection::getOffset(uint64_t off
) const {
354 auto *osec
= cast
<WordLiteralSection
>(parent
);
355 const uintptr_t buf
= reinterpret_cast<uintptr_t>(data
.data());
356 switch (sectionType(getFlags())) {
357 case S_4BYTE_LITERALS
:
358 return osec
->getLiteral4Offset(buf
+ (off
& ~3LLU)) | (off
& 3);
359 case S_8BYTE_LITERALS
:
360 return osec
->getLiteral8Offset(buf
+ (off
& ~7LLU)) | (off
& 7);
361 case S_16BYTE_LITERALS
:
362 return osec
->getLiteral16Offset(buf
+ (off
& ~15LLU)) | (off
& 15);
364 llvm_unreachable("invalid literal section type");
368 bool macho::isCodeSection(const InputSection
*isec
) {
369 uint32_t type
= sectionType(isec
->getFlags());
370 if (type
!= S_REGULAR
&& type
!= S_COALESCED
)
373 uint32_t attr
= isec
->getFlags() & SECTION_ATTRIBUTES_USR
;
374 if (attr
== S_ATTR_PURE_INSTRUCTIONS
)
377 if (isec
->getSegName() == segment_names::text
)
378 return StringSwitch
<bool>(isec
->getName())
379 .Cases(section_names::textCoalNt
, section_names::staticInit
, true)
385 bool macho::isCfStringSection(const InputSection
*isec
) {
386 return isec
->getName() == section_names::cfString
&&
387 isec
->getSegName() == segment_names::data
;
390 bool macho::isClassRefsSection(const InputSection
*isec
) {
391 return isec
->getName() == section_names::objcClassRefs
&&
392 isec
->getSegName() == segment_names::data
;
395 bool macho::isSelRefsSection(const InputSection
*isec
) {
396 return isec
->getName() == section_names::objcSelrefs
&&
397 isec
->getSegName() == segment_names::data
;
400 bool macho::isEhFrameSection(const InputSection
*isec
) {
401 return isec
->getName() == section_names::ehFrame
&&
402 isec
->getSegName() == segment_names::text
;
405 bool macho::isGccExceptTabSection(const InputSection
*isec
) {
406 return isec
->getName() == section_names::gccExceptTab
&&
407 isec
->getSegName() == segment_names::text
;
410 std::string
lld::toString(const InputSection
*isec
) {
411 return (toString(isec
->getFile()) + ":(" + isec
->getName() + ")").str();