1 //===- InputSection.cpp ---------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "InputSection.h"
10 #include "ConcatOutputSection.h"
12 #include "InputFiles.h"
13 #include "OutputSegment.h"
16 #include "SyntheticSections.h"
18 #include "UnwindInfoSection.h"
21 #include "lld/Common/ErrorHandler.h"
22 #include "lld/Common/Memory.h"
23 #include "llvm/Support/Endian.h"
24 #include "llvm/Support/xxhash.h"
27 using namespace llvm::MachO
;
28 using namespace llvm::support
;
30 using namespace lld::macho
;
32 // Verify ConcatInputSection's size on 64-bit builds. The size of std::vector
33 // can differ based on STL debug levels (e.g. iterator debugging on MSVC's STL),
34 // so account for that.
35 static_assert(sizeof(void *) != 8 ||
36 sizeof(ConcatInputSection
) == sizeof(std::vector
<Reloc
>) + 88,
37 "Try to minimize ConcatInputSection's size, we create many "
40 std::vector
<ConcatInputSection
*> macho::inputSections
;
41 int macho::inputSectionsOrder
= 0;
43 // Call this function to add a new InputSection and have it routed to the
44 // appropriate container. Depending on its type and current config, it will
45 // either be added to 'inputSections' vector or to a synthetic section.
46 void lld::macho::addInputSection(InputSection
*inputSection
) {
47 if (auto *isec
= dyn_cast
<ConcatInputSection
>(inputSection
)) {
48 if (isec
->isCoalescedWeak())
50 if (config
->emitRelativeMethodLists
&&
51 ObjCMethListSection::isMethodList(isec
)) {
52 if (in
.objcMethList
->inputOrder
== UnspecifiedInputOrder
)
53 in
.objcMethList
->inputOrder
= inputSectionsOrder
++;
54 in
.objcMethList
->addInput(isec
);
55 isec
->parent
= in
.objcMethList
;
58 if (config
->emitInitOffsets
&&
59 sectionType(isec
->getFlags()) == S_MOD_INIT_FUNC_POINTERS
) {
60 in
.initOffsets
->addInput(isec
);
63 isec
->outSecOff
= inputSectionsOrder
++;
64 auto *osec
= ConcatOutputSection::getOrCreateForInput(isec
);
66 inputSections
.push_back(isec
);
67 } else if (auto *isec
= dyn_cast
<CStringInputSection
>(inputSection
)) {
68 if (isec
->getName() == section_names::objcMethname
) {
69 if (in
.objcMethnameSection
->inputOrder
== UnspecifiedInputOrder
)
70 in
.objcMethnameSection
->inputOrder
= inputSectionsOrder
++;
71 in
.objcMethnameSection
->addInput(isec
);
73 if (in
.cStringSection
->inputOrder
== UnspecifiedInputOrder
)
74 in
.cStringSection
->inputOrder
= inputSectionsOrder
++;
75 in
.cStringSection
->addInput(isec
);
77 } else if (auto *isec
= dyn_cast
<WordLiteralInputSection
>(inputSection
)) {
78 if (in
.wordLiteralSection
->inputOrder
== UnspecifiedInputOrder
)
79 in
.wordLiteralSection
->inputOrder
= inputSectionsOrder
++;
80 in
.wordLiteralSection
->addInput(isec
);
82 llvm_unreachable("unexpected input section kind");
85 assert(inputSectionsOrder
<= UnspecifiedInputOrder
);
88 uint64_t InputSection::getFileSize() const {
89 return isZeroFill(getFlags()) ? 0 : getSize();
92 uint64_t InputSection::getVA(uint64_t off
) const {
93 return parent
->addr
+ getOffset(off
);
96 static uint64_t resolveSymbolVA(const Symbol
*sym
, uint8_t type
) {
97 const RelocAttrs
&relocAttrs
= target
->getRelocAttrs(type
);
98 if (relocAttrs
.hasAttr(RelocAttrBits::BRANCH
))
99 return sym
->resolveBranchVA();
100 if (relocAttrs
.hasAttr(RelocAttrBits::GOT
))
101 return sym
->resolveGotVA();
102 if (relocAttrs
.hasAttr(RelocAttrBits::TLV
))
103 return sym
->resolveTlvVA();
107 const Defined
*InputSection::getContainingSymbol(uint64_t off
) const {
108 auto *nextSym
= llvm::upper_bound(
109 symbols
, off
, [](uint64_t a
, const Defined
*b
) { return a
< b
->value
; });
110 if (nextSym
== symbols
.begin())
112 return *std::prev(nextSym
);
115 std::string
InputSection::getLocation(uint64_t off
) const {
116 // First, try to find a symbol that's near the offset. Use it as a reference
118 if (auto *sym
= getContainingSymbol(off
))
119 return (toString(getFile()) + ":(symbol " + toString(*sym
) + "+0x" +
120 Twine::utohexstr(off
- sym
->value
) + ")")
123 // If that fails, use the section itself as a reference point.
124 for (const Subsection
&subsec
: section
.subsections
) {
125 if (subsec
.isec
== this) {
126 off
+= subsec
.offset
;
131 return (toString(getFile()) + ":(" + getName() + "+0x" +
132 Twine::utohexstr(off
) + ")")
136 std::string
InputSection::getSourceLocation(uint64_t off
) const {
137 auto *obj
= dyn_cast_or_null
<ObjFile
>(getFile());
141 DWARFCache
*dwarf
= obj
->getDwarf();
143 return std::string();
145 for (const Subsection
&subsec
: section
.subsections
) {
146 if (subsec
.isec
== this) {
147 off
+= subsec
.offset
;
152 auto createMsg
= [&](StringRef path
, unsigned line
) {
153 std::string filename
= sys::path::filename(path
).str();
154 std::string lineStr
= (":" + Twine(line
)).str();
155 if (filename
== path
)
156 return filename
+ lineStr
;
157 return (filename
+ lineStr
+ " (" + path
+ lineStr
+ ")").str();
160 // First, look up a function for a given offset.
161 if (std::optional
<DILineInfo
> li
= dwarf
->getDILineInfo(
162 section
.addr
+ off
, object::SectionedAddress::UndefSection
))
163 return createMsg(li
->FileName
, li
->Line
);
165 // If it failed, look up again as a variable.
166 if (const Defined
*sym
= getContainingSymbol(off
)) {
167 // Symbols are generally prefixed with an underscore, which is not included
168 // in the debug information.
169 StringRef symName
= sym
->getName();
170 symName
.consume_front("_");
172 if (std::optional
<std::pair
<std::string
, unsigned>> fileLine
=
173 dwarf
->getVariableLoc(symName
))
174 return createMsg(fileLine
->first
, fileLine
->second
);
177 // Try to get the source file's name from the DWARF information.
178 if (obj
->compileUnit
)
179 return obj
->sourceFile();
184 const Reloc
*InputSection::getRelocAt(uint32_t off
) const {
185 auto it
= llvm::find_if(
186 relocs
, [=](const macho::Reloc
&r
) { return r
.offset
== off
; });
187 if (it
== relocs
.end())
192 void ConcatInputSection::foldIdentical(ConcatInputSection
*copy
,
193 Symbol::ICFFoldKind foldKind
) {
194 align
= std::max(align
, copy
->align
);
196 copy
->wasCoalesced
= true;
197 copy
->replacement
= this;
198 for (auto ©Sym
: copy
->symbols
)
199 copySym
->identicalCodeFoldingKind
= foldKind
;
201 symbols
.insert(symbols
.end(), copy
->symbols
.begin(), copy
->symbols
.end());
202 copy
->symbols
.clear();
204 // Remove duplicate compact unwind info for symbols at the same address.
207 for (auto it
= symbols
.begin() + 1; it
!= symbols
.end(); ++it
) {
208 assert((*it
)->value
== 0);
209 (*it
)->originalUnwindEntry
= nullptr;
213 void ConcatInputSection::writeTo(uint8_t *buf
) {
214 assert(!shouldOmitFromOutput());
216 if (getFileSize() == 0)
219 memcpy(buf
, data
.data(), data
.size());
221 for (size_t i
= 0; i
< relocs
.size(); i
++) {
222 const Reloc
&r
= relocs
[i
];
223 uint8_t *loc
= buf
+ r
.offset
;
224 uint64_t referentVA
= 0;
226 const bool needsFixup
= config
->emitChainedFixups
&&
227 target
->hasAttr(r
.type
, RelocAttrBits::UNSIGNED
);
228 if (target
->hasAttr(r
.type
, RelocAttrBits::SUBTRAHEND
)) {
229 const Symbol
*fromSym
= r
.referent
.get
<Symbol
*>();
230 const Reloc
&minuend
= relocs
[++i
];
232 if (const Symbol
*toSym
= minuend
.referent
.dyn_cast
<Symbol
*>())
233 minuendVA
= toSym
->getVA() + minuend
.addend
;
235 auto *referentIsec
= minuend
.referent
.get
<InputSection
*>();
236 assert(!::shouldOmitFromOutput(referentIsec
));
237 minuendVA
= referentIsec
->getVA(minuend
.addend
);
239 referentVA
= minuendVA
- fromSym
->getVA();
240 } else if (auto *referentSym
= r
.referent
.dyn_cast
<Symbol
*>()) {
241 if (target
->hasAttr(r
.type
, RelocAttrBits::LOAD
) &&
242 !referentSym
->isInGot())
243 target
->relaxGotLoad(loc
, r
.type
);
244 // For dtrace symbols, do not handle them as normal undefined symbols
245 if (referentSym
->getName().starts_with("___dtrace_")) {
246 // Change dtrace call site to pre-defined instructions
247 target
->handleDtraceReloc(referentSym
, r
, loc
);
250 referentVA
= resolveSymbolVA(referentSym
, r
.type
) + r
.addend
;
252 if (isThreadLocalVariables(getFlags()) && isa
<Defined
>(referentSym
)) {
253 // References from thread-local variable sections are treated as offsets
254 // relative to the start of the thread-local data memory area, which
255 // is initialized via copying all the TLV data sections (which are all
257 referentVA
-= firstTLVDataSection
->addr
;
258 } else if (needsFixup
) {
259 writeChainedFixup(loc
, referentSym
, r
.addend
);
262 } else if (auto *referentIsec
= r
.referent
.dyn_cast
<InputSection
*>()) {
263 assert(!::shouldOmitFromOutput(referentIsec
));
264 referentVA
= referentIsec
->getVA(r
.addend
);
267 writeChainedRebase(loc
, referentVA
);
271 target
->relocateOne(loc
, r
, referentVA
, getVA() + r
.offset
);
275 ConcatInputSection
*macho::makeSyntheticInputSection(StringRef segName
,
278 ArrayRef
<uint8_t> data
,
281 *make
<Section
>(/*file=*/nullptr, segName
, sectName
, flags
, /*addr=*/0);
282 auto isec
= make
<ConcatInputSection
>(section
, data
, align
);
283 // Since this is an explicitly created 'fake' input section,
284 // it should not be dead stripped.
286 section
.subsections
.push_back({0, isec
});
290 void CStringInputSection::splitIntoPieces() {
292 StringRef s
= toStringRef(data
);
294 size_t end
= s
.find(0);
295 if (end
== StringRef::npos
)
296 fatal(getLocation(off
) + ": string is not null terminated");
297 uint32_t hash
= deduplicateLiterals
? xxh3_64bits(s
.take_front(end
)) : 0;
298 pieces
.emplace_back(off
, hash
);
299 size_t size
= end
+ 1; // include null terminator
305 StringPiece
&CStringInputSection::getStringPiece(uint64_t off
) {
306 if (off
>= data
.size())
307 fatal(toString(this) + ": offset is outside the section");
310 partition_point(pieces
, [=](StringPiece p
) { return p
.inSecOff
<= off
; });
314 const StringPiece
&CStringInputSection::getStringPiece(uint64_t off
) const {
315 return const_cast<CStringInputSection
*>(this)->getStringPiece(off
);
318 size_t CStringInputSection::getStringPieceIndex(uint64_t off
) const {
319 if (off
>= data
.size())
320 fatal(toString(this) + ": offset is outside the section");
323 partition_point(pieces
, [=](StringPiece p
) { return p
.inSecOff
<= off
; });
324 return std::distance(pieces
.begin(), it
) - 1;
327 uint64_t CStringInputSection::getOffset(uint64_t off
) const {
328 const StringPiece
&piece
= getStringPiece(off
);
329 uint64_t addend
= off
- piece
.inSecOff
;
330 return piece
.outSecOff
+ addend
;
333 WordLiteralInputSection::WordLiteralInputSection(const Section
§ion
,
334 ArrayRef
<uint8_t> data
,
336 : InputSection(WordLiteralKind
, section
, data
, align
) {
337 switch (sectionType(getFlags())) {
338 case S_4BYTE_LITERALS
:
339 power2LiteralSize
= 2;
341 case S_8BYTE_LITERALS
:
342 power2LiteralSize
= 3;
344 case S_16BYTE_LITERALS
:
345 power2LiteralSize
= 4;
348 llvm_unreachable("invalid literal section type");
351 live
.resize(data
.size() >> power2LiteralSize
, !config
->deadStrip
);
354 uint64_t WordLiteralInputSection::getOffset(uint64_t off
) const {
355 auto *osec
= cast
<WordLiteralSection
>(parent
);
356 const uintptr_t buf
= reinterpret_cast<uintptr_t>(data
.data());
357 switch (sectionType(getFlags())) {
358 case S_4BYTE_LITERALS
:
359 return osec
->getLiteral4Offset(buf
+ (off
& ~3LLU)) | (off
& 3);
360 case S_8BYTE_LITERALS
:
361 return osec
->getLiteral8Offset(buf
+ (off
& ~7LLU)) | (off
& 7);
362 case S_16BYTE_LITERALS
:
363 return osec
->getLiteral16Offset(buf
+ (off
& ~15LLU)) | (off
& 15);
365 llvm_unreachable("invalid literal section type");
369 bool macho::isCodeSection(const InputSection
*isec
) {
370 return sections::isCodeSection(isec
->getName(), isec
->getSegName(),
374 bool macho::isCfStringSection(const InputSection
*isec
) {
375 return isec
->getName() == section_names::cfString
&&
376 isec
->getSegName() == segment_names::data
;
379 bool macho::isClassRefsSection(const InputSection
*isec
) {
380 return isec
->getName() == section_names::objcClassRefs
&&
381 isec
->getSegName() == segment_names::data
;
384 bool macho::isSelRefsSection(const InputSection
*isec
) {
385 return isec
->getName() == section_names::objcSelrefs
&&
386 isec
->getSegName() == segment_names::data
;
389 bool macho::isEhFrameSection(const InputSection
*isec
) {
390 return isec
->getName() == section_names::ehFrame
&&
391 isec
->getSegName() == segment_names::text
;
394 bool macho::isGccExceptTabSection(const InputSection
*isec
) {
395 return isec
->getName() == section_names::gccExceptTab
&&
396 isec
->getSegName() == segment_names::text
;
399 std::string
lld::toString(const InputSection
*isec
) {
400 return (toString(isec
->getFile()) + ":(" + isec
->getName() + ")").str();