1 //===- ICF.cpp ------------------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 #include "ConcatOutputSection.h"
12 #include "InputSection.h"
13 #include "SymbolTable.h"
15 #include "UnwindInfoSection.h"
17 #include "lld/Common/CommonLinkerContext.h"
18 #include "llvm/Support/LEB128.h"
19 #include "llvm/Support/Parallel.h"
20 #include "llvm/Support/TimeProfiler.h"
21 #include "llvm/Support/xxhash.h"
27 using namespace lld::macho
;
29 static constexpr bool verboseDiagnostics
= false;
33 ICF(std::vector
<ConcatInputSection
*> &inputs
);
36 using EqualsFn
= bool (ICF::*)(const ConcatInputSection
*,
37 const ConcatInputSection
*);
38 void segregate(size_t begin
, size_t end
, EqualsFn
);
39 size_t findBoundary(size_t begin
, size_t end
);
40 void forEachClassRange(size_t begin
, size_t end
,
41 llvm::function_ref
<void(size_t, size_t)> func
);
42 void forEachClass(llvm::function_ref
<void(size_t, size_t)> func
);
44 bool equalsConstant(const ConcatInputSection
*ia
,
45 const ConcatInputSection
*ib
);
46 bool equalsVariable(const ConcatInputSection
*ia
,
47 const ConcatInputSection
*ib
);
48 void applySafeThunksToRange(size_t begin
, size_t end
);
50 // ICF needs a copy of the inputs vector because its equivalence-class
51 // segregation algorithm destroys the proper sequence.
52 std::vector
<ConcatInputSection
*> icfInputs
;
55 std::atomic
<bool> icfRepeat
{false};
56 std::atomic
<uint64_t> equalsConstantCount
{0};
57 std::atomic
<uint64_t> equalsVariableCount
{0};
60 ICF::ICF(std::vector
<ConcatInputSection
*> &inputs
) {
61 icfInputs
.assign(inputs
.begin(), inputs
.end());
64 // ICF = Identical Code Folding
66 // We only fold __TEXT,__text, so this is really "code" folding, and not
67 // "COMDAT" folding. String and scalar constant literals are deduplicated
70 // Summary of segments & sections:
72 // The __TEXT segment is readonly at the MMU. Some sections are already
73 // deduplicated elsewhere (__TEXT,__cstring & __TEXT,__literal*) and some are
74 // synthetic and inherently free of duplicates (__TEXT,__stubs &
75 // __TEXT,__unwind_info). Note that we don't yet run ICF on __TEXT,__const,
76 // because doing so induces many test failures.
78 // The __LINKEDIT segment is readonly at the MMU, yet entirely synthetic, and
79 // thus ineligible for ICF.
81 // The __DATA_CONST segment is read/write at the MMU, but is logically const to
82 // the application after dyld applies fixups to pointer data. We currently
83 // fold only the __DATA_CONST,__cfstring section.
85 // The __DATA segment is read/write at the MMU, and as application-writeable
86 // data, none of its sections are eligible for ICF.
88 // Please see the large block comment in lld/ELF/ICF.cpp for an explanation
89 // of the segregation algorithm.
91 // FIXME(gkm): implement keep-unique attributes
92 // FIXME(gkm): implement address-significance tables for MachO object files
94 // Compare "non-moving" parts of two ConcatInputSections, namely everything
95 // except references to other ConcatInputSections.
96 bool ICF::equalsConstant(const ConcatInputSection
*ia
,
97 const ConcatInputSection
*ib
) {
98 if (verboseDiagnostics
)
99 ++equalsConstantCount
;
100 // We can only fold within the same OutputSection.
101 if (ia
->parent
!= ib
->parent
)
103 if (ia
->data
.size() != ib
->data
.size())
105 if (ia
->data
!= ib
->data
)
107 if (ia
->relocs
.size() != ib
->relocs
.size())
109 auto f
= [](const Reloc
&ra
, const Reloc
&rb
) {
110 if (ra
.type
!= rb
.type
)
112 if (ra
.pcrel
!= rb
.pcrel
)
114 if (ra
.length
!= rb
.length
)
116 if (ra
.offset
!= rb
.offset
)
118 if (ra
.referent
.is
<Symbol
*>() != rb
.referent
.is
<Symbol
*>())
121 InputSection
*isecA
, *isecB
;
125 if (ra
.referent
.is
<Symbol
*>()) {
126 const auto *sa
= ra
.referent
.get
<Symbol
*>();
127 const auto *sb
= rb
.referent
.get
<Symbol
*>();
128 if (sa
->kind() != sb
->kind())
130 // ICF runs before Undefineds are treated (and potentially converted into
132 if (isa
<DylibSymbol
>(sa
) || isa
<Undefined
>(sa
))
133 return sa
== sb
&& ra
.addend
== rb
.addend
;
134 assert(isa
<Defined
>(sa
));
135 const auto *da
= cast
<Defined
>(sa
);
136 const auto *db
= cast
<Defined
>(sb
);
137 if (!da
->isec() || !db
->isec()) {
138 assert(da
->isAbsolute() && db
->isAbsolute());
139 return da
->value
+ ra
.addend
== db
->value
+ rb
.addend
;
146 isecA
= ra
.referent
.get
<InputSection
*>();
147 isecB
= rb
.referent
.get
<InputSection
*>();
150 // Typically, we should not encounter sections marked with `keepUnique` at
151 // this point as they would have resulted in different hashes and therefore
152 // no need for a full comparison.
153 // However, in `safe_thunks` mode, it's possible for two different
154 // relocations to reference identical `keepUnique` functions that will be
155 // distinguished later via thunks - so we need to handle this case
157 if ((isecA
!= isecB
) && ((isecA
->keepUnique
&& isCodeSection(isecA
)) ||
158 (isecB
->keepUnique
&& isCodeSection(isecB
))))
161 if (isecA
->parent
!= isecB
->parent
)
163 // Sections with identical parents should be of the same kind.
164 assert(isecA
->kind() == isecB
->kind());
165 // We will compare ConcatInputSection contents in equalsVariable.
166 if (isa
<ConcatInputSection
>(isecA
))
167 return ra
.addend
== rb
.addend
;
168 // Else we have two literal sections. References to them are equal iff their
169 // offsets in the output section are equal.
170 if (ra
.referent
.is
<Symbol
*>())
171 // For symbol relocs, we compare the contents at the symbol address. We
172 // don't do `getOffset(value + addend)` because value + addend may not be
173 // a valid offset in the literal section.
174 return isecA
->getOffset(valueA
) == isecB
->getOffset(valueB
) &&
175 ra
.addend
== rb
.addend
;
177 assert(valueA
== 0 && valueB
== 0);
178 // For section relocs, we compare the content at the section offset.
179 return isecA
->getOffset(ra
.addend
) == isecB
->getOffset(rb
.addend
);
182 return std::equal(ia
->relocs
.begin(), ia
->relocs
.end(), ib
->relocs
.begin(),
186 // Compare the "moving" parts of two ConcatInputSections -- i.e. everything not
187 // handled by equalsConstant().
188 bool ICF::equalsVariable(const ConcatInputSection
*ia
,
189 const ConcatInputSection
*ib
) {
190 if (verboseDiagnostics
)
191 ++equalsVariableCount
;
192 assert(ia
->relocs
.size() == ib
->relocs
.size());
193 auto f
= [this](const Reloc
&ra
, const Reloc
&rb
) {
194 // We already filtered out mismatching values/addends in equalsConstant.
195 if (ra
.referent
== rb
.referent
)
197 const ConcatInputSection
*isecA
, *isecB
;
198 if (ra
.referent
.is
<Symbol
*>()) {
199 // Matching DylibSymbols are already filtered out by the
200 // identical-referent check above. Non-matching DylibSymbols were filtered
201 // out in equalsConstant(). So we can safely cast to Defined here.
202 const auto *da
= cast
<Defined
>(ra
.referent
.get
<Symbol
*>());
203 const auto *db
= cast
<Defined
>(rb
.referent
.get
<Symbol
*>());
204 if (da
->isAbsolute())
206 isecA
= dyn_cast
<ConcatInputSection
>(da
->isec());
208 return true; // literal sections were checked in equalsConstant.
209 isecB
= cast
<ConcatInputSection
>(db
->isec());
211 const auto *sa
= ra
.referent
.get
<InputSection
*>();
212 const auto *sb
= rb
.referent
.get
<InputSection
*>();
213 isecA
= dyn_cast
<ConcatInputSection
>(sa
);
216 isecB
= cast
<ConcatInputSection
>(sb
);
218 return isecA
->icfEqClass
[icfPass
% 2] == isecB
->icfEqClass
[icfPass
% 2];
220 if (!std::equal(ia
->relocs
.begin(), ia
->relocs
.end(), ib
->relocs
.begin(), f
))
223 // If there are symbols with associated unwind info, check that the unwind
224 // info matches. For simplicity, we only handle the case where there are only
225 // symbols at offset zero within the section (which is typically the case with
226 // .subsections_via_symbols.)
227 auto hasUnwind
= [](Defined
*d
) { return d
->unwindEntry() != nullptr; };
228 const auto *itA
= llvm::find_if(ia
->symbols
, hasUnwind
);
229 const auto *itB
= llvm::find_if(ib
->symbols
, hasUnwind
);
230 if (itA
== ia
->symbols
.end())
231 return itB
== ib
->symbols
.end();
232 if (itB
== ib
->symbols
.end())
234 const Defined
*da
= *itA
;
235 const Defined
*db
= *itB
;
236 if (da
->unwindEntry()->icfEqClass
[icfPass
% 2] !=
237 db
->unwindEntry()->icfEqClass
[icfPass
% 2] ||
238 da
->value
!= 0 || db
->value
!= 0)
240 auto isZero
= [](Defined
*d
) { return d
->value
== 0; };
241 return std::find_if_not(std::next(itA
), ia
->symbols
.end(), isZero
) ==
243 std::find_if_not(std::next(itB
), ib
->symbols
.end(), isZero
) ==
247 // Find the first InputSection after BEGIN whose equivalence class differs
248 size_t ICF::findBoundary(size_t begin
, size_t end
) {
249 uint64_t beginHash
= icfInputs
[begin
]->icfEqClass
[icfPass
% 2];
250 for (size_t i
= begin
+ 1; i
< end
; ++i
)
251 if (beginHash
!= icfInputs
[i
]->icfEqClass
[icfPass
% 2])
256 // Invoke FUNC on subranges with matching equivalence class
257 void ICF::forEachClassRange(size_t begin
, size_t end
,
258 llvm::function_ref
<void(size_t, size_t)> func
) {
259 while (begin
< end
) {
260 size_t mid
= findBoundary(begin
, end
);
266 // Given a range of identical icfInputs, replace address significant functions
267 // with a thunk that is just a direct branch to the first function in the
268 // series. This way we keep only one main body of the function but we still
269 // retain the address uniqueness of relevant functions by having them be a
270 // direct branch thunk rather than containing a full copy of the actual function
272 void ICF::applySafeThunksToRange(size_t begin
, size_t end
) {
273 // If the functions we're dealing with are smaller than the thunk size, then
274 // just leave them all as-is - creating thunks would be a net loss.
275 uint32_t thunkSize
= target
->getICFSafeThunkSize();
276 if (icfInputs
[begin
]->data
.size() <= thunkSize
)
279 // When creating a unique ICF thunk, use the first section as the section that
280 // all thunks will branch to.
281 ConcatInputSection
*masterIsec
= icfInputs
[begin
];
283 for (size_t i
= begin
+ 1; i
< end
; ++i
) {
284 ConcatInputSection
*isec
= icfInputs
[i
];
285 // When we're done processing keepUnique entries, we can stop. Sorting
286 // guaratees that all keepUnique will be at the front.
287 if (!isec
->keepUnique
)
290 ConcatInputSection
*thunk
=
291 makeSyntheticInputSection(isec
->getSegName(), isec
->getName());
292 addInputSection(thunk
);
294 target
->initICFSafeThunkBody(thunk
, masterIsec
);
295 thunk
->foldIdentical(isec
, Symbol::ICFFoldKind::Thunk
);
297 // Since we're folding the target function into a thunk, we need to adjust
298 // the symbols that now got relocated from the target function to the thunk.
299 // Since the thunk is only one branch, we move all symbols to offset 0 and
300 // make sure that the size of all non-zero-size symbols is equal to the size
302 for (auto *sym
: thunk
->symbols
) {
305 sym
->size
= thunkSize
;
310 // Split icfInputs into shards, then parallelize invocation of FUNC on subranges
311 // with matching equivalence class
312 void ICF::forEachClass(llvm::function_ref
<void(size_t, size_t)> func
) {
313 // Only use threads when the benefits outweigh the overhead.
314 const size_t threadingThreshold
= 1024;
315 if (icfInputs
.size() < threadingThreshold
) {
316 forEachClassRange(0, icfInputs
.size(), func
);
321 // Shard into non-overlapping intervals, and call FUNC in parallel. The
322 // sharding must be completed before any calls to FUNC are made so that FUNC
323 // can modify the InputSection in its shard without causing data races.
324 const size_t shards
= 256;
325 size_t step
= icfInputs
.size() / shards
;
326 size_t boundaries
[shards
+ 1];
328 boundaries
[shards
] = icfInputs
.size();
329 parallelFor(1, shards
, [&](size_t i
) {
330 boundaries
[i
] = findBoundary((i
- 1) * step
, icfInputs
.size());
332 parallelFor(1, shards
+ 1, [&](size_t i
) {
333 if (boundaries
[i
- 1] < boundaries
[i
]) {
334 forEachClassRange(boundaries
[i
- 1], boundaries
[i
], func
);
341 // Into each origin-section hash, combine all reloc referent section hashes.
342 for (icfPass
= 0; icfPass
< 2; ++icfPass
) {
343 parallelForEach(icfInputs
, [&](ConcatInputSection
*isec
) {
344 uint32_t hash
= isec
->icfEqClass
[icfPass
% 2];
345 for (const Reloc
&r
: isec
->relocs
) {
346 if (auto *sym
= r
.referent
.dyn_cast
<Symbol
*>()) {
347 if (auto *defined
= dyn_cast
<Defined
>(sym
)) {
348 if (defined
->isec()) {
349 if (auto *referentIsec
=
350 dyn_cast
<ConcatInputSection
>(defined
->isec()))
351 hash
+= defined
->value
+ referentIsec
->icfEqClass
[icfPass
% 2];
353 hash
+= defined
->isec()->kind() +
354 defined
->isec()->getOffset(defined
->value
);
356 hash
+= defined
->value
;
359 // ICF runs before Undefined diags
360 assert(isa
<Undefined
>(sym
) || isa
<DylibSymbol
>(sym
));
364 // Set MSB to 1 to avoid collisions with non-hashed classes.
365 isec
->icfEqClass
[(icfPass
+ 1) % 2] = hash
| (1ull << 31);
370 icfInputs
, [](const ConcatInputSection
*a
, const ConcatInputSection
*b
) {
371 // When using safe_thunks, ensure that we first sort by icfEqClass and
372 // then by keepUnique (descending). This guarantees that within an
373 // equivalence class, the keepUnique inputs are always first.
374 if (config
->icfLevel
== ICFLevel::safe_thunks
)
375 if (a
->icfEqClass
[0] == b
->icfEqClass
[0])
376 return a
->keepUnique
> b
->keepUnique
;
377 return a
->icfEqClass
[0] < b
->icfEqClass
[0];
379 forEachClass([&](size_t begin
, size_t end
) {
380 segregate(begin
, end
, &ICF::equalsConstant
);
383 // Split equivalence groups by comparing relocations until convergence
386 forEachClass([&](size_t begin
, size_t end
) {
387 segregate(begin
, end
, &ICF::equalsVariable
);
390 log("ICF needed " + Twine(icfPass
) + " iterations");
391 if (verboseDiagnostics
) {
392 log("equalsConstant() called " + Twine(equalsConstantCount
) + " times");
393 log("equalsVariable() called " + Twine(equalsVariableCount
) + " times");
396 // When using safe_thunks, we need to create thunks for all keepUnique
397 // functions that can be deduplicated. Since we're creating / adding new
398 // InputSections, we can't paralellize this.
399 if (config
->icfLevel
== ICFLevel::safe_thunks
)
400 forEachClassRange(0, icfInputs
.size(), [&](size_t begin
, size_t end
) {
401 applySafeThunksToRange(begin
, end
);
404 // Fold sections within equivalence classes
405 forEachClass([&](size_t begin
, size_t end
) {
408 bool useSafeThunks
= config
->icfLevel
== ICFLevel::safe_thunks
;
410 // For ICF level safe_thunks, replace keepUnique function bodies with
411 // thunks. For all other ICF levles, directly merge the functions.
413 ConcatInputSection
*beginIsec
= icfInputs
[begin
];
414 for (size_t i
= begin
+ 1; i
< end
; ++i
) {
415 // Skip keepUnique inputs when using safe_thunks (already handeled above)
416 if (useSafeThunks
&& icfInputs
[i
]->keepUnique
) {
417 // Assert keepUnique sections are either small or replaced with thunks.
418 assert(!icfInputs
[i
]->live
||
419 icfInputs
[i
]->data
.size() <= target
->getICFSafeThunkSize());
420 assert(!icfInputs
[i
]->replacement
||
421 icfInputs
[i
]->replacement
->data
.size() ==
422 target
->getICFSafeThunkSize());
425 beginIsec
->foldIdentical(icfInputs
[i
]);
430 // Split an equivalence class into smaller classes.
431 void ICF::segregate(size_t begin
, size_t end
, EqualsFn equals
) {
432 while (begin
< end
) {
433 // Divide [begin, end) into two. Let mid be the start index of the
435 auto bound
= std::stable_partition(
436 icfInputs
.begin() + begin
+ 1, icfInputs
.begin() + end
,
437 [&](ConcatInputSection
*isec
) {
438 return (this->*equals
)(icfInputs
[begin
], isec
);
440 size_t mid
= bound
- icfInputs
.begin();
442 // Split [begin, end) into [begin, mid) and [mid, end). We use mid as an
443 // equivalence class ID because every group ends with a unique index.
444 for (size_t i
= begin
; i
< mid
; ++i
)
445 icfInputs
[i
]->icfEqClass
[(icfPass
+ 1) % 2] = mid
;
447 // If we created a group, we need to iterate the main loop again.
455 void macho::markSymAsAddrSig(Symbol
*s
) {
456 if (auto *d
= dyn_cast_or_null
<Defined
>(s
))
458 d
->isec()->keepUnique
= true;
461 void macho::markAddrSigSymbols() {
462 TimeTraceScope
timeScope("Mark addrsig symbols");
463 for (InputFile
*file
: inputFiles
) {
464 ObjFile
*obj
= dyn_cast
<ObjFile
>(file
);
468 Section
*addrSigSection
= obj
->addrSigSection
;
471 assert(addrSigSection
->subsections
.size() == 1);
473 const InputSection
*isec
= addrSigSection
->subsections
[0].isec
;
475 for (const Reloc
&r
: isec
->relocs
) {
476 if (auto *sym
= r
.referent
.dyn_cast
<Symbol
*>())
477 markSymAsAddrSig(sym
);
479 error(toString(isec
) + ": unexpected section relocation");
484 // Given a symbol that was folded into a thunk, return the symbol pointing to
485 // the actual body of the function. We use this approach rather than storing the
486 // needed info in the Defined itself in order to minimize memory usage.
487 Defined
*macho::getBodyForThunkFoldedSym(Defined
*foldedSym
) {
488 assert(isa
<ConcatInputSection
>(foldedSym
->originalIsec
) &&
489 "thunk-folded ICF symbol expected to be on a ConcatInputSection");
490 // foldedSec is the InputSection that was marked as deleted upon fold
491 ConcatInputSection
*foldedSec
=
492 cast
<ConcatInputSection
>(foldedSym
->originalIsec
);
494 // thunkBody is the actual live thunk, containing the code that branches to
495 // the actual body of the function.
496 InputSection
*thunkBody
= foldedSec
->replacement
;
498 // The actual (merged) body of the function that the thunk jumps to. This will
499 // end up in the final binary.
500 InputSection
*functionBody
= target
->getThunkBranchTarget(thunkBody
);
502 for (Symbol
*sym
: functionBody
->symbols
) {
503 Defined
*d
= dyn_cast
<Defined
>(sym
);
504 // The symbol needs to be at the start of the InputSection
505 if (d
&& d
->value
== 0)
509 llvm_unreachable("could not find body symbol for ICF-generated thunk");
511 void macho::foldIdenticalSections(bool onlyCfStrings
) {
512 TimeTraceScope
timeScope("Fold Identical Code Sections");
513 // The ICF equivalence-class segregation algorithm relies on pre-computed
514 // hashes of InputSection::data for the ConcatOutputSection::inputs and all
515 // sections referenced by their relocs. We could recursively traverse the
516 // relocs to find every referenced InputSection, but that precludes easy
517 // parallelization. Therefore, we hash every InputSection here where we have
518 // them all accessible as simple vectors.
520 // If an InputSection is ineligible for ICF, we give it a unique ID to force
521 // it into an unfoldable singleton equivalence class. Begin the unique-ID
522 // space at inputSections.size(), so that it will never intersect with
523 // equivalence-class IDs which begin at 0. Since hashes & unique IDs never
524 // coexist with equivalence-class IDs, this is not necessary, but might help
525 // someone keep the numbers straight in case we ever need to debug the
527 std::vector
<ConcatInputSection
*> foldable
;
528 uint64_t icfUniqueID
= inputSections
.size();
529 for (ConcatInputSection
*isec
: inputSections
) {
530 bool isFoldableWithAddendsRemoved
= isCfStringSection(isec
) ||
531 isClassRefsSection(isec
) ||
532 isSelRefsSection(isec
);
533 // NOTE: __objc_selrefs is typically marked as no_dead_strip by MC, but we
534 // can still fold it.
535 bool hasFoldableFlags
= (isSelRefsSection(isec
) ||
536 sectionType(isec
->getFlags()) == MachO::S_REGULAR
);
538 bool isCodeSec
= isCodeSection(isec
);
540 // When keepUnique is true, the section is not foldable. Unless we are at
541 // icf level safe_thunks, in which case we still want to fold code sections.
542 // When using safe_thunks we'll apply the safe_thunks logic at merge time
543 // based on the 'keepUnique' flag.
544 bool noUniqueRequirement
=
546 ((config
->icfLevel
== ICFLevel::safe_thunks
) && isCodeSec
);
548 // FIXME: consider non-code __text sections as foldable?
549 bool isFoldable
= (!onlyCfStrings
|| isCfStringSection(isec
)) &&
550 (isCodeSec
|| isFoldableWithAddendsRemoved
||
551 isGccExceptTabSection(isec
)) &&
552 noUniqueRequirement
&& !isec
->hasAltEntry
&&
553 !isec
->shouldOmitFromOutput() && hasFoldableFlags
;
555 foldable
.push_back(isec
);
556 for (Defined
*d
: isec
->symbols
)
557 if (d
->unwindEntry())
558 foldable
.push_back(d
->unwindEntry());
560 // Some sections have embedded addends that foil ICF's hashing / equality
561 // checks. (We can ignore embedded addends when doing ICF because the same
562 // information gets recorded in our Reloc structs.) We therefore create a
563 // mutable copy of the section data and zero out the embedded addends
564 // before performing any hashing / equality checks.
565 if (isFoldableWithAddendsRemoved
) {
566 // We have to do this copying serially as the BumpPtrAllocator is not
567 // thread-safe. FIXME: Make a thread-safe allocator.
568 MutableArrayRef
<uint8_t> copy
= isec
->data
.copy(bAlloc());
569 for (const Reloc
&r
: isec
->relocs
)
570 target
->relocateOne(copy
.data() + r
.offset
, r
, /*va=*/0,
574 } else if (!isEhFrameSection(isec
)) {
575 // EH frames are gathered as foldables from unwindEntry above; give a
576 // unique ID to everything else.
577 isec
->icfEqClass
[0] = ++icfUniqueID
;
580 parallelForEach(foldable
, [](ConcatInputSection
*isec
) {
581 assert(isec
->icfEqClass
[0] == 0); // don't overwrite a unique ID!
582 // Turn-on the top bit to guarantee that valid hashes have no collisions
583 // with the small-integer unique IDs for ICF-ineligible sections
584 isec
->icfEqClass
[0] = xxh3_64bits(isec
->data
) | (1ull << 31);
586 // Now that every input section is either hashed or marked as unique, run the
587 // segregation algorithm to detect foldable subsections.