1 //===- InputSection.cpp ---------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "InputSection.h"
10 #include "ConcatOutputSection.h"
12 #include "InputFiles.h"
13 #include "OutputSegment.h"
15 #include "SyntheticSections.h"
17 #include "UnwindInfoSection.h"
20 #include "lld/Common/ErrorHandler.h"
21 #include "lld/Common/Memory.h"
22 #include "llvm/Support/Endian.h"
23 #include "llvm/Support/xxhash.h"
26 using namespace llvm::MachO
;
27 using namespace llvm::support
;
29 using namespace lld::macho
;
31 // Verify ConcatInputSection's size on 64-bit builds. The size of std::vector
32 // can differ based on STL debug levels (e.g. iterator debugging on MSVC's STL),
33 // so account for that.
34 static_assert(sizeof(void *) != 8 ||
35 sizeof(ConcatInputSection
) == sizeof(std::vector
<Reloc
>) + 88,
36 "Try to minimize ConcatInputSection's size, we create many "
39 std::vector
<ConcatInputSection
*> macho::inputSections
;
41 uint64_t InputSection::getFileSize() const {
42 return isZeroFill(getFlags()) ? 0 : getSize();
45 uint64_t InputSection::getVA(uint64_t off
) const {
46 return parent
->addr
+ getOffset(off
);
49 static uint64_t resolveSymbolVA(const Symbol
*sym
, uint8_t type
) {
50 const RelocAttrs
&relocAttrs
= target
->getRelocAttrs(type
);
51 if (relocAttrs
.hasAttr(RelocAttrBits::BRANCH
))
52 return sym
->resolveBranchVA();
53 if (relocAttrs
.hasAttr(RelocAttrBits::GOT
))
54 return sym
->resolveGotVA();
55 if (relocAttrs
.hasAttr(RelocAttrBits::TLV
))
56 return sym
->resolveTlvVA();
60 const Defined
*InputSection::getContainingSymbol(uint64_t off
) const {
61 auto *nextSym
= llvm::upper_bound(
62 symbols
, off
, [](uint64_t a
, const Defined
*b
) { return a
< b
->value
; });
63 if (nextSym
== symbols
.begin())
65 return *std::prev(nextSym
);
68 std::string
InputSection::getLocation(uint64_t off
) const {
69 // First, try to find a symbol that's near the offset. Use it as a reference
71 if (auto *sym
= getContainingSymbol(off
))
72 return (toString(getFile()) + ":(symbol " + toString(*sym
) + "+0x" +
73 Twine::utohexstr(off
- sym
->value
) + ")")
76 // If that fails, use the section itself as a reference point.
77 for (const Subsection
&subsec
: section
.subsections
) {
78 if (subsec
.isec
== this) {
84 return (toString(getFile()) + ":(" + getName() + "+0x" +
85 Twine::utohexstr(off
) + ")")
89 std::string
InputSection::getSourceLocation(uint64_t off
) const {
90 auto *obj
= dyn_cast_or_null
<ObjFile
>(getFile());
94 DWARFCache
*dwarf
= obj
->getDwarf();
98 for (const Subsection
&subsec
: section
.subsections
) {
99 if (subsec
.isec
== this) {
100 off
+= subsec
.offset
;
105 auto createMsg
= [&](StringRef path
, unsigned line
) {
106 std::string filename
= sys::path::filename(path
).str();
107 std::string lineStr
= (":" + Twine(line
)).str();
108 if (filename
== path
)
109 return filename
+ lineStr
;
110 return (filename
+ lineStr
+ " (" + path
+ lineStr
+ ")").str();
113 // First, look up a function for a given offset.
114 if (std::optional
<DILineInfo
> li
= dwarf
->getDILineInfo(
115 section
.addr
+ off
, object::SectionedAddress::UndefSection
))
116 return createMsg(li
->FileName
, li
->Line
);
118 // If it failed, look up again as a variable.
119 if (const Defined
*sym
= getContainingSymbol(off
)) {
120 // Symbols are generally prefixed with an underscore, which is not included
121 // in the debug information.
122 StringRef symName
= sym
->getName();
123 if (!symName
.empty() && symName
[0] == '_')
124 symName
= symName
.substr(1);
126 if (std::optional
<std::pair
<std::string
, unsigned>> fileLine
=
127 dwarf
->getVariableLoc(symName
))
128 return createMsg(fileLine
->first
, fileLine
->second
);
131 // Try to get the source file's name from the DWARF information.
132 if (obj
->compileUnit
)
133 return obj
->sourceFile();
138 const Reloc
*InputSection::getRelocAt(uint32_t off
) const {
139 auto it
= llvm::find_if(
140 relocs
, [=](const macho::Reloc
&r
) { return r
.offset
== off
; });
141 if (it
== relocs
.end())
146 void ConcatInputSection::foldIdentical(ConcatInputSection
*copy
) {
147 align
= std::max(align
, copy
->align
);
149 copy
->wasCoalesced
= true;
150 copy
->replacement
= this;
151 for (auto ©Sym
: copy
->symbols
) {
152 copySym
->wasIdenticalCodeFolded
= true;
156 symbols
.insert(symbols
.end(), copy
->symbols
.begin(), copy
->symbols
.end());
157 copy
->symbols
.clear();
159 // Remove duplicate compact unwind info for symbols at the same address.
162 for (auto it
= symbols
.begin() + 1; it
!= symbols
.end(); ++it
) {
163 assert((*it
)->value
== 0);
164 (*it
)->unwindEntry
= nullptr;
168 void ConcatInputSection::writeTo(uint8_t *buf
) {
169 assert(!shouldOmitFromOutput());
171 if (getFileSize() == 0)
174 memcpy(buf
, data
.data(), data
.size());
176 for (size_t i
= 0; i
< relocs
.size(); i
++) {
177 const Reloc
&r
= relocs
[i
];
178 uint8_t *loc
= buf
+ r
.offset
;
179 uint64_t referentVA
= 0;
181 const bool needsFixup
= config
->emitChainedFixups
&&
182 target
->hasAttr(r
.type
, RelocAttrBits::UNSIGNED
);
183 if (target
->hasAttr(r
.type
, RelocAttrBits::SUBTRAHEND
)) {
184 const Symbol
*fromSym
= r
.referent
.get
<Symbol
*>();
185 const Reloc
&minuend
= relocs
[++i
];
187 if (const Symbol
*toSym
= minuend
.referent
.dyn_cast
<Symbol
*>())
188 minuendVA
= toSym
->getVA() + minuend
.addend
;
190 auto *referentIsec
= minuend
.referent
.get
<InputSection
*>();
191 assert(!::shouldOmitFromOutput(referentIsec
));
192 minuendVA
= referentIsec
->getVA(minuend
.addend
);
194 referentVA
= minuendVA
- fromSym
->getVA();
195 } else if (auto *referentSym
= r
.referent
.dyn_cast
<Symbol
*>()) {
196 if (target
->hasAttr(r
.type
, RelocAttrBits::LOAD
) &&
197 !referentSym
->isInGot())
198 target
->relaxGotLoad(loc
, r
.type
);
199 // For dtrace symbols, do not handle them as normal undefined symbols
200 if (referentSym
->getName().starts_with("___dtrace_")) {
201 // Change dtrace call site to pre-defined instructions
202 target
->handleDtraceReloc(referentSym
, r
, loc
);
205 referentVA
= resolveSymbolVA(referentSym
, r
.type
) + r
.addend
;
207 if (isThreadLocalVariables(getFlags()) && isa
<Defined
>(referentSym
)) {
208 // References from thread-local variable sections are treated as offsets
209 // relative to the start of the thread-local data memory area, which
210 // is initialized via copying all the TLV data sections (which are all
212 referentVA
-= firstTLVDataSection
->addr
;
213 } else if (needsFixup
) {
214 writeChainedFixup(loc
, referentSym
, r
.addend
);
217 } else if (auto *referentIsec
= r
.referent
.dyn_cast
<InputSection
*>()) {
218 assert(!::shouldOmitFromOutput(referentIsec
));
219 referentVA
= referentIsec
->getVA(r
.addend
);
222 writeChainedRebase(loc
, referentVA
);
226 target
->relocateOne(loc
, r
, referentVA
, getVA() + r
.offset
);
230 ConcatInputSection
*macho::makeSyntheticInputSection(StringRef segName
,
233 ArrayRef
<uint8_t> data
,
236 *make
<Section
>(/*file=*/nullptr, segName
, sectName
, flags
, /*addr=*/0);
237 auto isec
= make
<ConcatInputSection
>(section
, data
, align
);
238 section
.subsections
.push_back({0, isec
});
242 void CStringInputSection::splitIntoPieces() {
244 StringRef s
= toStringRef(data
);
246 size_t end
= s
.find(0);
247 if (end
== StringRef::npos
)
248 fatal(getLocation(off
) + ": string is not null terminated");
249 uint32_t hash
= deduplicateLiterals
? xxh3_64bits(s
.take_front(end
)) : 0;
250 pieces
.emplace_back(off
, hash
);
251 size_t size
= end
+ 1; // include null terminator
257 StringPiece
&CStringInputSection::getStringPiece(uint64_t off
) {
258 if (off
>= data
.size())
259 fatal(toString(this) + ": offset is outside the section");
262 partition_point(pieces
, [=](StringPiece p
) { return p
.inSecOff
<= off
; });
266 const StringPiece
&CStringInputSection::getStringPiece(uint64_t off
) const {
267 return const_cast<CStringInputSection
*>(this)->getStringPiece(off
);
270 size_t CStringInputSection::getStringPieceIndex(uint64_t off
) const {
271 if (off
>= data
.size())
272 fatal(toString(this) + ": offset is outside the section");
275 partition_point(pieces
, [=](StringPiece p
) { return p
.inSecOff
<= off
; });
276 return std::distance(pieces
.begin(), it
) - 1;
279 uint64_t CStringInputSection::getOffset(uint64_t off
) const {
280 const StringPiece
&piece
= getStringPiece(off
);
281 uint64_t addend
= off
- piece
.inSecOff
;
282 return piece
.outSecOff
+ addend
;
285 WordLiteralInputSection::WordLiteralInputSection(const Section
§ion
,
286 ArrayRef
<uint8_t> data
,
288 : InputSection(WordLiteralKind
, section
, data
, align
) {
289 switch (sectionType(getFlags())) {
290 case S_4BYTE_LITERALS
:
291 power2LiteralSize
= 2;
293 case S_8BYTE_LITERALS
:
294 power2LiteralSize
= 3;
296 case S_16BYTE_LITERALS
:
297 power2LiteralSize
= 4;
300 llvm_unreachable("invalid literal section type");
303 live
.resize(data
.size() >> power2LiteralSize
, !config
->deadStrip
);
306 uint64_t WordLiteralInputSection::getOffset(uint64_t off
) const {
307 auto *osec
= cast
<WordLiteralSection
>(parent
);
308 const uintptr_t buf
= reinterpret_cast<uintptr_t>(data
.data());
309 switch (sectionType(getFlags())) {
310 case S_4BYTE_LITERALS
:
311 return osec
->getLiteral4Offset(buf
+ (off
& ~3LLU)) | (off
& 3);
312 case S_8BYTE_LITERALS
:
313 return osec
->getLiteral8Offset(buf
+ (off
& ~7LLU)) | (off
& 7);
314 case S_16BYTE_LITERALS
:
315 return osec
->getLiteral16Offset(buf
+ (off
& ~15LLU)) | (off
& 15);
317 llvm_unreachable("invalid literal section type");
321 bool macho::isCodeSection(const InputSection
*isec
) {
322 uint32_t type
= sectionType(isec
->getFlags());
323 if (type
!= S_REGULAR
&& type
!= S_COALESCED
)
326 uint32_t attr
= isec
->getFlags() & SECTION_ATTRIBUTES_USR
;
327 if (attr
== S_ATTR_PURE_INSTRUCTIONS
)
330 if (isec
->getSegName() == segment_names::text
)
331 return StringSwitch
<bool>(isec
->getName())
332 .Cases(section_names::textCoalNt
, section_names::staticInit
, true)
338 bool macho::isCfStringSection(const InputSection
*isec
) {
339 return isec
->getName() == section_names::cfString
&&
340 isec
->getSegName() == segment_names::data
;
343 bool macho::isClassRefsSection(const InputSection
*isec
) {
344 return isec
->getName() == section_names::objcClassRefs
&&
345 isec
->getSegName() == segment_names::data
;
348 bool macho::isSelRefsSection(const InputSection
*isec
) {
349 return isec
->getName() == section_names::objcSelrefs
&&
350 isec
->getSegName() == segment_names::data
;
353 bool macho::isEhFrameSection(const InputSection
*isec
) {
354 return isec
->getName() == section_names::ehFrame
&&
355 isec
->getSegName() == segment_names::text
;
358 bool macho::isGccExceptTabSection(const InputSection
*isec
) {
359 return isec
->getName() == section_names::gccExceptTab
&&
360 isec
->getSegName() == segment_names::text
;
363 std::string
lld::toString(const InputSection
*isec
) {
364 return (toString(isec
->getFile()) + ":(" + isec
->getName() + ")").str();