1 //===- ICF.cpp ------------------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 #include "ConcatOutputSection.h"
12 #include "InputSection.h"
13 #include "SymbolTable.h"
15 #include "UnwindInfoSection.h"
17 #include "lld/Common/CommonLinkerContext.h"
18 #include "llvm/Support/LEB128.h"
19 #include "llvm/Support/Parallel.h"
20 #include "llvm/Support/TimeProfiler.h"
21 #include "llvm/Support/xxhash.h"
27 using namespace lld::macho
;
29 static constexpr bool verboseDiagnostics
= false;
33 ICF(std::vector
<ConcatInputSection
*> &inputs
);
36 using EqualsFn
= bool (ICF::*)(const ConcatInputSection
*,
37 const ConcatInputSection
*);
38 void segregate(size_t begin
, size_t end
, EqualsFn
);
39 size_t findBoundary(size_t begin
, size_t end
);
40 void forEachClassRange(size_t begin
, size_t end
,
41 llvm::function_ref
<void(size_t, size_t)> func
);
42 void forEachClass(llvm::function_ref
<void(size_t, size_t)> func
);
44 bool equalsConstant(const ConcatInputSection
*ia
,
45 const ConcatInputSection
*ib
);
46 bool equalsVariable(const ConcatInputSection
*ia
,
47 const ConcatInputSection
*ib
);
49 // ICF needs a copy of the inputs vector because its equivalence-class
50 // segregation algorithm destroys the proper sequence.
51 std::vector
<ConcatInputSection
*> icfInputs
;
54 std::atomic
<bool> icfRepeat
{false};
55 std::atomic
<uint64_t> equalsConstantCount
{0};
56 std::atomic
<uint64_t> equalsVariableCount
{0};
59 ICF::ICF(std::vector
<ConcatInputSection
*> &inputs
) {
60 icfInputs
.assign(inputs
.begin(), inputs
.end());
63 // ICF = Identical Code Folding
65 // We only fold __TEXT,__text, so this is really "code" folding, and not
66 // "COMDAT" folding. String and scalar constant literals are deduplicated
69 // Summary of segments & sections:
71 // The __TEXT segment is readonly at the MMU. Some sections are already
72 // deduplicated elsewhere (__TEXT,__cstring & __TEXT,__literal*) and some are
73 // synthetic and inherently free of duplicates (__TEXT,__stubs &
74 // __TEXT,__unwind_info). Note that we don't yet run ICF on __TEXT,__const,
75 // because doing so induces many test failures.
77 // The __LINKEDIT segment is readonly at the MMU, yet entirely synthetic, and
78 // thus ineligible for ICF.
80 // The __DATA_CONST segment is read/write at the MMU, but is logically const to
81 // the application after dyld applies fixups to pointer data. We currently
82 // fold only the __DATA_CONST,__cfstring section.
84 // The __DATA segment is read/write at the MMU, and as application-writeable
85 // data, none of its sections are eligible for ICF.
87 // Please see the large block comment in lld/ELF/ICF.cpp for an explanation
88 // of the segregation algorithm.
90 // FIXME(gkm): implement keep-unique attributes
91 // FIXME(gkm): implement address-significance tables for MachO object files
93 // Compare "non-moving" parts of two ConcatInputSections, namely everything
94 // except references to other ConcatInputSections.
95 bool ICF::equalsConstant(const ConcatInputSection
*ia
,
96 const ConcatInputSection
*ib
) {
97 if (verboseDiagnostics
)
98 ++equalsConstantCount
;
99 // We can only fold within the same OutputSection.
100 if (ia
->parent
!= ib
->parent
)
102 if (ia
->data
.size() != ib
->data
.size())
104 if (ia
->data
!= ib
->data
)
106 if (ia
->relocs
.size() != ib
->relocs
.size())
108 auto f
= [](const Reloc
&ra
, const Reloc
&rb
) {
109 if (ra
.type
!= rb
.type
)
111 if (ra
.pcrel
!= rb
.pcrel
)
113 if (ra
.length
!= rb
.length
)
115 if (ra
.offset
!= rb
.offset
)
117 if (ra
.referent
.is
<Symbol
*>() != rb
.referent
.is
<Symbol
*>())
120 InputSection
*isecA
, *isecB
;
124 if (ra
.referent
.is
<Symbol
*>()) {
125 const auto *sa
= ra
.referent
.get
<Symbol
*>();
126 const auto *sb
= rb
.referent
.get
<Symbol
*>();
127 if (sa
->kind() != sb
->kind())
129 // ICF runs before Undefineds are treated (and potentially converted into
131 if (isa
<DylibSymbol
>(sa
) || isa
<Undefined
>(sa
))
132 return sa
== sb
&& ra
.addend
== rb
.addend
;
133 assert(isa
<Defined
>(sa
));
134 const auto *da
= cast
<Defined
>(sa
);
135 const auto *db
= cast
<Defined
>(sb
);
136 if (!da
->isec
|| !db
->isec
) {
137 assert(da
->isAbsolute() && db
->isAbsolute());
138 return da
->value
+ ra
.addend
== db
->value
+ rb
.addend
;
145 isecA
= ra
.referent
.get
<InputSection
*>();
146 isecB
= rb
.referent
.get
<InputSection
*>();
149 if (isecA
->parent
!= isecB
->parent
)
151 // Sections with identical parents should be of the same kind.
152 assert(isecA
->kind() == isecB
->kind());
153 // We will compare ConcatInputSection contents in equalsVariable.
154 if (isa
<ConcatInputSection
>(isecA
))
155 return ra
.addend
== rb
.addend
;
156 // Else we have two literal sections. References to them are equal iff their
157 // offsets in the output section are equal.
158 if (ra
.referent
.is
<Symbol
*>())
159 // For symbol relocs, we compare the contents at the symbol address. We
160 // don't do `getOffset(value + addend)` because value + addend may not be
161 // a valid offset in the literal section.
162 return isecA
->getOffset(valueA
) == isecB
->getOffset(valueB
) &&
163 ra
.addend
== rb
.addend
;
165 assert(valueA
== 0 && valueB
== 0);
166 // For section relocs, we compare the content at the section offset.
167 return isecA
->getOffset(ra
.addend
) == isecB
->getOffset(rb
.addend
);
170 return std::equal(ia
->relocs
.begin(), ia
->relocs
.end(), ib
->relocs
.begin(),
174 // Compare the "moving" parts of two ConcatInputSections -- i.e. everything not
175 // handled by equalsConstant().
176 bool ICF::equalsVariable(const ConcatInputSection
*ia
,
177 const ConcatInputSection
*ib
) {
178 if (verboseDiagnostics
)
179 ++equalsVariableCount
;
180 assert(ia
->relocs
.size() == ib
->relocs
.size());
181 auto f
= [this](const Reloc
&ra
, const Reloc
&rb
) {
182 // We already filtered out mismatching values/addends in equalsConstant.
183 if (ra
.referent
== rb
.referent
)
185 const ConcatInputSection
*isecA
, *isecB
;
186 if (ra
.referent
.is
<Symbol
*>()) {
187 // Matching DylibSymbols are already filtered out by the
188 // identical-referent check above. Non-matching DylibSymbols were filtered
189 // out in equalsConstant(). So we can safely cast to Defined here.
190 const auto *da
= cast
<Defined
>(ra
.referent
.get
<Symbol
*>());
191 const auto *db
= cast
<Defined
>(rb
.referent
.get
<Symbol
*>());
192 if (da
->isAbsolute())
194 isecA
= dyn_cast
<ConcatInputSection
>(da
->isec
);
196 return true; // literal sections were checked in equalsConstant.
197 isecB
= cast
<ConcatInputSection
>(db
->isec
);
199 const auto *sa
= ra
.referent
.get
<InputSection
*>();
200 const auto *sb
= rb
.referent
.get
<InputSection
*>();
201 isecA
= dyn_cast
<ConcatInputSection
>(sa
);
204 isecB
= cast
<ConcatInputSection
>(sb
);
206 return isecA
->icfEqClass
[icfPass
% 2] == isecB
->icfEqClass
[icfPass
% 2];
208 if (!std::equal(ia
->relocs
.begin(), ia
->relocs
.end(), ib
->relocs
.begin(), f
))
211 // If there are symbols with associated unwind info, check that the unwind
212 // info matches. For simplicity, we only handle the case where there are only
213 // symbols at offset zero within the section (which is typically the case with
214 // .subsections_via_symbols.)
215 auto hasUnwind
= [](Defined
*d
) { return d
->unwindEntry
!= nullptr; };
216 const auto *itA
= llvm::find_if(ia
->symbols
, hasUnwind
);
217 const auto *itB
= llvm::find_if(ib
->symbols
, hasUnwind
);
218 if (itA
== ia
->symbols
.end())
219 return itB
== ib
->symbols
.end();
220 if (itB
== ib
->symbols
.end())
222 const Defined
*da
= *itA
;
223 const Defined
*db
= *itB
;
224 if (da
->unwindEntry
->icfEqClass
[icfPass
% 2] !=
225 db
->unwindEntry
->icfEqClass
[icfPass
% 2] ||
226 da
->value
!= 0 || db
->value
!= 0)
228 auto isZero
= [](Defined
*d
) { return d
->value
== 0; };
229 return std::find_if_not(std::next(itA
), ia
->symbols
.end(), isZero
) ==
231 std::find_if_not(std::next(itB
), ib
->symbols
.end(), isZero
) ==
235 // Find the first InputSection after BEGIN whose equivalence class differs
236 size_t ICF::findBoundary(size_t begin
, size_t end
) {
237 uint64_t beginHash
= icfInputs
[begin
]->icfEqClass
[icfPass
% 2];
238 for (size_t i
= begin
+ 1; i
< end
; ++i
)
239 if (beginHash
!= icfInputs
[i
]->icfEqClass
[icfPass
% 2])
244 // Invoke FUNC on subranges with matching equivalence class
245 void ICF::forEachClassRange(size_t begin
, size_t end
,
246 llvm::function_ref
<void(size_t, size_t)> func
) {
247 while (begin
< end
) {
248 size_t mid
= findBoundary(begin
, end
);
254 // Split icfInputs into shards, then parallelize invocation of FUNC on subranges
255 // with matching equivalence class
256 void ICF::forEachClass(llvm::function_ref
<void(size_t, size_t)> func
) {
257 // Only use threads when the benefits outweigh the overhead.
258 const size_t threadingThreshold
= 1024;
259 if (icfInputs
.size() < threadingThreshold
) {
260 forEachClassRange(0, icfInputs
.size(), func
);
265 // Shard into non-overlapping intervals, and call FUNC in parallel. The
266 // sharding must be completed before any calls to FUNC are made so that FUNC
267 // can modify the InputSection in its shard without causing data races.
268 const size_t shards
= 256;
269 size_t step
= icfInputs
.size() / shards
;
270 size_t boundaries
[shards
+ 1];
272 boundaries
[shards
] = icfInputs
.size();
273 parallelFor(1, shards
, [&](size_t i
) {
274 boundaries
[i
] = findBoundary((i
- 1) * step
, icfInputs
.size());
276 parallelFor(1, shards
+ 1, [&](size_t i
) {
277 if (boundaries
[i
- 1] < boundaries
[i
]) {
278 forEachClassRange(boundaries
[i
- 1], boundaries
[i
], func
);
285 // Into each origin-section hash, combine all reloc referent section hashes.
286 for (icfPass
= 0; icfPass
< 2; ++icfPass
) {
287 parallelForEach(icfInputs
, [&](ConcatInputSection
*isec
) {
288 uint32_t hash
= isec
->icfEqClass
[icfPass
% 2];
289 for (const Reloc
&r
: isec
->relocs
) {
290 if (auto *sym
= r
.referent
.dyn_cast
<Symbol
*>()) {
291 if (auto *defined
= dyn_cast
<Defined
>(sym
)) {
293 if (auto *referentIsec
=
294 dyn_cast
<ConcatInputSection
>(defined
->isec
))
295 hash
+= defined
->value
+ referentIsec
->icfEqClass
[icfPass
% 2];
297 hash
+= defined
->isec
->kind() +
298 defined
->isec
->getOffset(defined
->value
);
300 hash
+= defined
->value
;
303 // ICF runs before Undefined diags
304 assert(isa
<Undefined
>(sym
) || isa
<DylibSymbol
>(sym
));
308 // Set MSB to 1 to avoid collisions with non-hashed classes.
309 isec
->icfEqClass
[(icfPass
+ 1) % 2] = hash
| (1ull << 31);
314 icfInputs
, [](const ConcatInputSection
*a
, const ConcatInputSection
*b
) {
315 return a
->icfEqClass
[0] < b
->icfEqClass
[0];
317 forEachClass([&](size_t begin
, size_t end
) {
318 segregate(begin
, end
, &ICF::equalsConstant
);
321 // Split equivalence groups by comparing relocations until convergence
324 forEachClass([&](size_t begin
, size_t end
) {
325 segregate(begin
, end
, &ICF::equalsVariable
);
328 log("ICF needed " + Twine(icfPass
) + " iterations");
329 if (verboseDiagnostics
) {
330 log("equalsConstant() called " + Twine(equalsConstantCount
) + " times");
331 log("equalsVariable() called " + Twine(equalsVariableCount
) + " times");
334 // Fold sections within equivalence classes
335 forEachClass([&](size_t begin
, size_t end
) {
338 ConcatInputSection
*beginIsec
= icfInputs
[begin
];
339 for (size_t i
= begin
+ 1; i
< end
; ++i
)
340 beginIsec
->foldIdentical(icfInputs
[i
]);
344 // Split an equivalence class into smaller classes.
345 void ICF::segregate(size_t begin
, size_t end
, EqualsFn equals
) {
346 while (begin
< end
) {
347 // Divide [begin, end) into two. Let mid be the start index of the
349 auto bound
= std::stable_partition(
350 icfInputs
.begin() + begin
+ 1, icfInputs
.begin() + end
,
351 [&](ConcatInputSection
*isec
) {
352 return (this->*equals
)(icfInputs
[begin
], isec
);
354 size_t mid
= bound
- icfInputs
.begin();
356 // Split [begin, end) into [begin, mid) and [mid, end). We use mid as an
357 // equivalence class ID because every group ends with a unique index.
358 for (size_t i
= begin
; i
< mid
; ++i
)
359 icfInputs
[i
]->icfEqClass
[(icfPass
+ 1) % 2] = mid
;
361 // If we created a group, we need to iterate the main loop again.
369 void macho::markSymAsAddrSig(Symbol
*s
) {
370 if (auto *d
= dyn_cast_or_null
<Defined
>(s
))
372 d
->isec
->keepUnique
= true;
375 void macho::markAddrSigSymbols() {
376 TimeTraceScope
timeScope("Mark addrsig symbols");
377 for (InputFile
*file
: inputFiles
) {
378 ObjFile
*obj
= dyn_cast
<ObjFile
>(file
);
382 Section
*addrSigSection
= obj
->addrSigSection
;
385 assert(addrSigSection
->subsections
.size() == 1);
387 const InputSection
*isec
= addrSigSection
->subsections
[0].isec
;
389 for (const Reloc
&r
: isec
->relocs
) {
390 if (auto *sym
= r
.referent
.dyn_cast
<Symbol
*>())
391 markSymAsAddrSig(sym
);
393 error(toString(isec
) + ": unexpected section relocation");
398 void macho::foldIdenticalSections(bool onlyCfStrings
) {
399 TimeTraceScope
timeScope("Fold Identical Code Sections");
400 // The ICF equivalence-class segregation algorithm relies on pre-computed
401 // hashes of InputSection::data for the ConcatOutputSection::inputs and all
402 // sections referenced by their relocs. We could recursively traverse the
403 // relocs to find every referenced InputSection, but that precludes easy
404 // parallelization. Therefore, we hash every InputSection here where we have
405 // them all accessible as simple vectors.
407 // If an InputSection is ineligible for ICF, we give it a unique ID to force
408 // it into an unfoldable singleton equivalence class. Begin the unique-ID
409 // space at inputSections.size(), so that it will never intersect with
410 // equivalence-class IDs which begin at 0. Since hashes & unique IDs never
411 // coexist with equivalence-class IDs, this is not necessary, but might help
412 // someone keep the numbers straight in case we ever need to debug the
414 std::vector
<ConcatInputSection
*> foldable
;
415 uint64_t icfUniqueID
= inputSections
.size();
416 for (ConcatInputSection
*isec
: inputSections
) {
417 bool isFoldableWithAddendsRemoved
= isCfStringSection(isec
) ||
418 isClassRefsSection(isec
) ||
419 isSelRefsSection(isec
);
420 // NOTE: __objc_selrefs is typically marked as no_dead_strip by MC, but we
421 // can still fold it.
422 bool hasFoldableFlags
= (isSelRefsSection(isec
) ||
423 sectionType(isec
->getFlags()) == MachO::S_REGULAR
);
424 // FIXME: consider non-code __text sections as foldable?
425 bool isFoldable
= (!onlyCfStrings
|| isCfStringSection(isec
)) &&
426 (isCodeSection(isec
) || isFoldableWithAddendsRemoved
||
427 isGccExceptTabSection(isec
)) &&
428 !isec
->keepUnique
&& !isec
->hasAltEntry
&&
429 !isec
->shouldOmitFromOutput() && hasFoldableFlags
;
431 foldable
.push_back(isec
);
432 for (Defined
*d
: isec
->symbols
)
434 foldable
.push_back(d
->unwindEntry
);
436 // Some sections have embedded addends that foil ICF's hashing / equality
437 // checks. (We can ignore embedded addends when doing ICF because the same
438 // information gets recorded in our Reloc structs.) We therefore create a
439 // mutable copy of the section data and zero out the embedded addends
440 // before performing any hashing / equality checks.
441 if (isFoldableWithAddendsRemoved
) {
442 // We have to do this copying serially as the BumpPtrAllocator is not
443 // thread-safe. FIXME: Make a thread-safe allocator.
444 MutableArrayRef
<uint8_t> copy
= isec
->data
.copy(bAlloc());
445 for (const Reloc
&r
: isec
->relocs
)
446 target
->relocateOne(copy
.data() + r
.offset
, r
, /*va=*/0,
450 } else if (!isEhFrameSection(isec
)) {
451 // EH frames are gathered as foldables from unwindEntry above; give a
452 // unique ID to everything else.
453 isec
->icfEqClass
[0] = ++icfUniqueID
;
456 parallelForEach(foldable
, [](ConcatInputSection
*isec
) {
457 assert(isec
->icfEqClass
[0] == 0); // don't overwrite a unique ID!
458 // Turn-on the top bit to guarantee that valid hashes have no collisions
459 // with the small-integer unique IDs for ICF-ineligible sections
460 isec
->icfEqClass
[0] = xxh3_64bits(isec
->data
) | (1ull << 31);
462 // Now that every input section is either hashed or marked as unique, run the
463 // segregation algorithm to detect foldable subsections.