1 //===- InputSection.cpp ---------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "InputSection.h"
10 #include "ConcatOutputSection.h"
12 #include "InputFiles.h"
13 #include "OutputSegment.h"
15 #include "SyntheticSections.h"
17 #include "UnwindInfoSection.h"
19 #include "lld/Common/Memory.h"
20 #include "llvm/Support/Endian.h"
21 #include "llvm/Support/xxhash.h"
24 using namespace llvm::MachO
;
25 using namespace llvm::support
;
27 using namespace lld::macho
;
29 // Verify ConcatInputSection's size on 64-bit builds. The size of std::vector
30 // can differ based on STL debug levels (e.g. iterator debugging on MSVC's STL),
31 // so account for that.
32 static_assert(sizeof(void *) != 8 || sizeof(ConcatInputSection
) ==
33 sizeof(std::vector
<Reloc
>) + 104,
34 "Try to minimize ConcatInputSection's size, we create many "
37 std::vector
<ConcatInputSection
*> macho::inputSections
;
39 uint64_t InputSection::getFileSize() const {
40 return isZeroFill(getFlags()) ? 0 : getSize();
43 uint64_t InputSection::getVA(uint64_t off
) const {
44 return parent
->addr
+ getOffset(off
);
47 static uint64_t resolveSymbolVA(const Symbol
*sym
, uint8_t type
) {
48 const RelocAttrs
&relocAttrs
= target
->getRelocAttrs(type
);
49 if (relocAttrs
.hasAttr(RelocAttrBits::BRANCH
))
50 return sym
->resolveBranchVA();
51 if (relocAttrs
.hasAttr(RelocAttrBits::GOT
))
52 return sym
->resolveGotVA();
53 if (relocAttrs
.hasAttr(RelocAttrBits::TLV
))
54 return sym
->resolveTlvVA();
58 const Defined
*InputSection::getContainingSymbol(uint64_t off
) const {
59 auto *nextSym
= llvm::upper_bound(
60 symbols
, off
, [](uint64_t a
, const Defined
*b
) { return a
< b
->value
; });
61 if (nextSym
== symbols
.begin())
63 return *std::prev(nextSym
);
66 std::string
InputSection::getLocation(uint64_t off
) const {
67 // First, try to find a symbol that's near the offset. Use it as a reference
69 if (auto *sym
= getContainingSymbol(off
))
70 return (toString(getFile()) + ":(symbol " + toString(*sym
) + "+0x" +
71 Twine::utohexstr(off
- sym
->value
) + ")")
74 // If that fails, use the section itself as a reference point.
75 for (const Subsection
&subsec
: section
.subsections
) {
76 if (subsec
.isec
== this) {
82 return (toString(getFile()) + ":(" + getName() + "+0x" +
83 Twine::utohexstr(off
) + ")")
87 std::string
InputSection::getSourceLocation(uint64_t off
) const {
88 auto *obj
= dyn_cast_or_null
<ObjFile
>(getFile());
92 DWARFCache
*dwarf
= obj
->getDwarf();
96 for (const Subsection
&subsec
: section
.subsections
) {
97 if (subsec
.isec
== this) {
103 auto createMsg
= [&](StringRef path
, unsigned line
) {
104 std::string filename
= sys::path::filename(path
).str();
105 std::string lineStr
= (":" + Twine(line
)).str();
106 if (filename
== path
)
107 return filename
+ lineStr
;
108 return (filename
+ lineStr
+ " (" + path
+ lineStr
+ ")").str();
111 // First, look up a function for a given offset.
112 if (Optional
<DILineInfo
> li
= dwarf
->getDILineInfo(
113 section
.addr
+ off
, object::SectionedAddress::UndefSection
))
114 return createMsg(li
->FileName
, li
->Line
);
116 // If it failed, look up again as a variable.
117 if (const Defined
*sym
= getContainingSymbol(off
)) {
118 // Symbols are generally prefixed with an underscore, which is not included
119 // in the debug information.
120 StringRef symName
= sym
->getName();
121 if (!symName
.empty() && symName
[0] == '_')
122 symName
= symName
.substr(1);
124 if (Optional
<std::pair
<std::string
, unsigned>> fileLine
=
125 dwarf
->getVariableLoc(symName
))
126 return createMsg(fileLine
->first
, fileLine
->second
);
129 // Try to get the source file's name from the DWARF information.
130 if (obj
->compileUnit
)
131 return obj
->sourceFile();
136 void ConcatInputSection::foldIdentical(ConcatInputSection
*copy
) {
137 align
= std::max(align
, copy
->align
);
139 copy
->wasCoalesced
= true;
140 copy
->replacement
= this;
141 for (auto ©Sym
: copy
->symbols
)
142 copySym
->wasIdenticalCodeFolded
= true;
144 // Merge the sorted vectors of symbols together.
145 auto it
= symbols
.begin();
146 for (auto copyIt
= copy
->symbols
.begin(); copyIt
!= copy
->symbols
.end();) {
147 if (it
== symbols
.end()) {
148 symbols
.push_back(*copyIt
++);
150 } else if ((*it
)->value
> (*copyIt
)->value
) {
151 std::swap(*it
++, *copyIt
);
156 copy
->symbols
.clear();
158 // Remove duplicate compact unwind info for symbols at the same address.
161 it
= symbols
.begin();
162 uint64_t v
= (*it
)->value
;
163 for (++it
; it
!= symbols
.end(); ++it
) {
166 d
->unwindEntry
= nullptr;
172 void ConcatInputSection::writeTo(uint8_t *buf
) {
173 assert(!shouldOmitFromOutput());
175 if (getFileSize() == 0)
178 memcpy(buf
, data
.data(), data
.size());
180 std::vector
<uint64_t> relocTargets
;
181 if (!optimizationHints
.empty())
182 relocTargets
.reserve(relocs
.size());
184 for (size_t i
= 0; i
< relocs
.size(); i
++) {
185 const Reloc
&r
= relocs
[i
];
186 uint8_t *loc
= buf
+ r
.offset
;
187 uint64_t referentVA
= 0;
188 if (target
->hasAttr(r
.type
, RelocAttrBits::SUBTRAHEND
)) {
189 const Symbol
*fromSym
= r
.referent
.get
<Symbol
*>();
190 const Reloc
&minuend
= relocs
[++i
];
192 if (const Symbol
*toSym
= minuend
.referent
.dyn_cast
<Symbol
*>())
193 minuendVA
= toSym
->getVA() + minuend
.addend
;
195 auto *referentIsec
= minuend
.referent
.get
<InputSection
*>();
196 assert(!::shouldOmitFromOutput(referentIsec
));
197 minuendVA
= referentIsec
->getVA(minuend
.addend
);
199 referentVA
= minuendVA
- fromSym
->getVA();
200 } else if (auto *referentSym
= r
.referent
.dyn_cast
<Symbol
*>()) {
201 if (target
->hasAttr(r
.type
, RelocAttrBits::LOAD
) &&
202 !referentSym
->isInGot())
203 target
->relaxGotLoad(loc
, r
.type
);
204 // For dtrace symbols, do not handle them as normal undefined symbols
205 if (referentSym
->getName().startswith("___dtrace_")) {
206 // Change dtrace call site to pre-defined instructions
207 target
->handleDtraceReloc(referentSym
, r
, loc
);
210 referentVA
= resolveSymbolVA(referentSym
, r
.type
) + r
.addend
;
212 if (isThreadLocalVariables(getFlags())) {
213 // References from thread-local variable sections are treated as offsets
214 // relative to the start of the thread-local data memory area, which
215 // is initialized via copying all the TLV data sections (which are all
217 if (isa
<Defined
>(referentSym
))
218 referentVA
-= firstTLVDataSection
->addr
;
220 } else if (auto *referentIsec
= r
.referent
.dyn_cast
<InputSection
*>()) {
221 assert(!::shouldOmitFromOutput(referentIsec
));
222 referentVA
= referentIsec
->getVA(r
.addend
);
224 target
->relocateOne(loc
, r
, referentVA
, getVA() + r
.offset
);
226 if (!optimizationHints
.empty())
227 relocTargets
.push_back(referentVA
);
230 if (!optimizationHints
.empty())
231 target
->applyOptimizationHints(buf
, this, relocTargets
);
234 ConcatInputSection
*macho::makeSyntheticInputSection(StringRef segName
,
237 ArrayRef
<uint8_t> data
,
240 *make
<Section
>(/*file=*/nullptr, segName
, sectName
, flags
, /*addr=*/0);
241 auto isec
= make
<ConcatInputSection
>(section
, data
, align
);
242 section
.subsections
.push_back({0, isec
});
246 void CStringInputSection::splitIntoPieces() {
248 StringRef s
= toStringRef(data
);
250 size_t end
= s
.find(0);
251 if (end
== StringRef::npos
)
252 fatal(getLocation(off
) + ": string is not null terminated");
253 size_t size
= end
+ 1;
254 uint32_t hash
= config
->dedupLiterals
? xxHash64(s
.substr(0, size
)) : 0;
255 pieces
.emplace_back(off
, hash
);
261 StringPiece
&CStringInputSection::getStringPiece(uint64_t off
) {
262 if (off
>= data
.size())
263 fatal(toString(this) + ": offset is outside the section");
266 partition_point(pieces
, [=](StringPiece p
) { return p
.inSecOff
<= off
; });
270 const StringPiece
&CStringInputSection::getStringPiece(uint64_t off
) const {
271 return const_cast<CStringInputSection
*>(this)->getStringPiece(off
);
274 uint64_t CStringInputSection::getOffset(uint64_t off
) const {
275 const StringPiece
&piece
= getStringPiece(off
);
276 uint64_t addend
= off
- piece
.inSecOff
;
277 return piece
.outSecOff
+ addend
;
280 WordLiteralInputSection::WordLiteralInputSection(const Section
§ion
,
281 ArrayRef
<uint8_t> data
,
283 : InputSection(WordLiteralKind
, section
, data
, align
) {
284 switch (sectionType(getFlags())) {
285 case S_4BYTE_LITERALS
:
286 power2LiteralSize
= 2;
288 case S_8BYTE_LITERALS
:
289 power2LiteralSize
= 3;
291 case S_16BYTE_LITERALS
:
292 power2LiteralSize
= 4;
295 llvm_unreachable("invalid literal section type");
298 live
.resize(data
.size() >> power2LiteralSize
, !config
->deadStrip
);
301 uint64_t WordLiteralInputSection::getOffset(uint64_t off
) const {
302 auto *osec
= cast
<WordLiteralSection
>(parent
);
303 const uintptr_t buf
= reinterpret_cast<uintptr_t>(data
.data());
304 switch (sectionType(getFlags())) {
305 case S_4BYTE_LITERALS
:
306 return osec
->getLiteral4Offset(buf
+ (off
& ~3LLU)) | (off
& 3);
307 case S_8BYTE_LITERALS
:
308 return osec
->getLiteral8Offset(buf
+ (off
& ~7LLU)) | (off
& 7);
309 case S_16BYTE_LITERALS
:
310 return osec
->getLiteral16Offset(buf
+ (off
& ~15LLU)) | (off
& 15);
312 llvm_unreachable("invalid literal section type");
316 bool macho::isCodeSection(const InputSection
*isec
) {
317 uint32_t type
= sectionType(isec
->getFlags());
318 if (type
!= S_REGULAR
&& type
!= S_COALESCED
)
321 uint32_t attr
= isec
->getFlags() & SECTION_ATTRIBUTES_USR
;
322 if (attr
== S_ATTR_PURE_INSTRUCTIONS
)
325 if (isec
->getSegName() == segment_names::text
)
326 return StringSwitch
<bool>(isec
->getName())
327 .Cases(section_names::textCoalNt
, section_names::staticInit
, true)
333 bool macho::isCfStringSection(const InputSection
*isec
) {
334 return isec
->getName() == section_names::cfString
&&
335 isec
->getSegName() == segment_names::data
;
338 bool macho::isClassRefsSection(const InputSection
*isec
) {
339 return isec
->getName() == section_names::objcClassRefs
&&
340 isec
->getSegName() == segment_names::data
;
343 bool macho::isEhFrameSection(const InputSection
*isec
) {
344 return isec
->getName() == section_names::ehFrame
&&
345 isec
->getSegName() == segment_names::text
;
348 bool macho::isGccExceptTabSection(const InputSection
*isec
) {
349 return isec
->getName() == section_names::gccExceptTab
&&
350 isec
->getSegName() == segment_names::text
;
353 std::string
lld::toString(const InputSection
*isec
) {
354 return (toString(isec
->getFile()) + ":(" + isec
->getName() + ")").str();