1 //===- bolt/Passes/IdenticalCodeFolding.cpp -------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the IdenticalCodeFolding class.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Passes/IdenticalCodeFolding.h"
14 #include "bolt/Core/HashUtilities.h"
15 #include "bolt/Core/ParallelUtilities.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/Support/CommandLine.h"
18 #include "llvm/Support/ThreadPool.h"
19 #include "llvm/Support/Timer.h"
24 #include <unordered_map>
26 #define DEBUG_TYPE "bolt-icf"
33 extern cl::OptionCategory BoltOptCategory
;
36 ICFUseDFS("icf-dfs", cl::desc("use DFS ordering when using -icf option"),
37 cl::ReallyHidden
, cl::cat(BoltOptCategory
));
41 cl::desc("time icf steps"),
44 cl::cat(BoltOptCategory
));
47 /// Compare two jump tables in 2 functions. The function relies on consistent
48 /// ordering of basic blocks in both binary functions (e.g. DFS).
49 static bool equalJumpTables(const JumpTable
&JumpTableA
,
50 const JumpTable
&JumpTableB
,
51 const BinaryFunction
&FunctionA
,
52 const BinaryFunction
&FunctionB
) {
53 if (JumpTableA
.EntrySize
!= JumpTableB
.EntrySize
)
56 if (JumpTableA
.Type
!= JumpTableB
.Type
)
59 if (JumpTableA
.getSize() != JumpTableB
.getSize())
62 for (uint64_t Index
= 0; Index
< JumpTableA
.Entries
.size(); ++Index
) {
63 const MCSymbol
*LabelA
= JumpTableA
.Entries
[Index
];
64 const MCSymbol
*LabelB
= JumpTableB
.Entries
[Index
];
66 const BinaryBasicBlock
*TargetA
= FunctionA
.getBasicBlockForLabel(LabelA
);
67 const BinaryBasicBlock
*TargetB
= FunctionB
.getBasicBlockForLabel(LabelB
);
69 if (!TargetA
|| !TargetB
) {
70 assert((TargetA
|| LabelA
== FunctionA
.getFunctionEndLabel()) &&
71 "no target basic block found");
72 assert((TargetB
|| LabelB
== FunctionB
.getFunctionEndLabel()) &&
73 "no target basic block found");
75 if (TargetA
!= TargetB
)
81 assert(TargetA
&& TargetB
&& "cannot locate target block(s)");
83 if (TargetA
->getLayoutIndex() != TargetB
->getLayoutIndex())
90 /// Helper function that compares an instruction of this function to the
91 /// given instruction of the given function. The functions should have
93 template <class Compare
>
94 static bool isInstrEquivalentWith(const MCInst
&InstA
,
95 const BinaryBasicBlock
&BBA
,
97 const BinaryBasicBlock
&BBB
, Compare Comp
) {
98 if (InstA
.getOpcode() != InstB
.getOpcode())
101 const BinaryContext
&BC
= BBA
.getFunction()->getBinaryContext();
103 // In this function we check for special conditions:
105 // * instructions with landing pads
107 // Most of the common cases should be handled by MCPlus::equals()
108 // that compares regular instruction operands.
110 // NB: there's no need to compare jump table indirect jump instructions
111 // separately as jump tables are handled by comparing corresponding
113 const std::optional
<MCPlus::MCLandingPad
> EHInfoA
= BC
.MIB
->getEHInfo(InstA
);
114 const std::optional
<MCPlus::MCLandingPad
> EHInfoB
= BC
.MIB
->getEHInfo(InstB
);
116 if (EHInfoA
|| EHInfoB
) {
117 if (!EHInfoA
&& (EHInfoB
->first
|| EHInfoB
->second
))
120 if (!EHInfoB
&& (EHInfoA
->first
|| EHInfoA
->second
))
123 if (EHInfoA
&& EHInfoB
) {
124 // Action indices should match.
125 if (EHInfoA
->second
!= EHInfoB
->second
)
128 if (!EHInfoA
->first
!= !EHInfoB
->first
)
131 if (EHInfoA
->first
&& EHInfoB
->first
) {
132 const BinaryBasicBlock
*LPA
= BBA
.getLandingPad(EHInfoA
->first
);
133 const BinaryBasicBlock
*LPB
= BBB
.getLandingPad(EHInfoB
->first
);
134 assert(LPA
&& LPB
&& "cannot locate landing pad(s)");
136 if (LPA
->getLayoutIndex() != LPB
->getLayoutIndex())
142 return BC
.MIB
->equals(InstA
, InstB
, Comp
);
145 /// Returns true if this function has identical code and CFG with
146 /// the given function \p BF.
148 /// If \p CongruentSymbols is set to true, then symbolic operands that reference
149 /// potentially identical but different functions are ignored during the
151 static bool isIdenticalWith(const BinaryFunction
&A
, const BinaryFunction
&B
,
152 bool CongruentSymbols
) {
153 assert(A
.hasCFG() && B
.hasCFG() && "both functions should have CFG");
155 // Compare the two functions, one basic block at a time.
156 // Currently we require two identical basic blocks to have identical
157 // instruction sequences and the same index in their corresponding
158 // functions. The latter is important for CFG equality.
160 if (A
.getLayout().block_size() != B
.getLayout().block_size())
163 // Comparing multi-entry functions could be non-trivial.
164 if (A
.isMultiEntry() || B
.isMultiEntry())
167 if (A
.hasIslandsInfo() || B
.hasIslandsInfo())
170 // Process both functions in either DFS or existing order.
171 SmallVector
<const BinaryBasicBlock
*, 0> OrderA
;
172 SmallVector
<const BinaryBasicBlock
*, 0> OrderB
;
173 if (opts::ICFUseDFS
) {
174 copy(A
.dfs(), std::back_inserter(OrderA
));
175 copy(B
.dfs(), std::back_inserter(OrderB
));
177 copy(A
.getLayout().blocks(), std::back_inserter(OrderA
));
178 copy(B
.getLayout().blocks(), std::back_inserter(OrderB
));
181 const BinaryContext
&BC
= A
.getBinaryContext();
183 auto BBI
= OrderB
.begin();
184 for (const BinaryBasicBlock
*BB
: OrderA
) {
185 const BinaryBasicBlock
*OtherBB
= *BBI
;
187 if (BB
->getLayoutIndex() != OtherBB
->getLayoutIndex())
190 // Compare successor basic blocks.
191 // NOTE: the comparison for jump tables is only partially verified here.
192 if (BB
->succ_size() != OtherBB
->succ_size())
195 auto SuccBBI
= OtherBB
->succ_begin();
196 for (const BinaryBasicBlock
*SuccBB
: BB
->successors()) {
197 const BinaryBasicBlock
*SuccOtherBB
= *SuccBBI
;
198 if (SuccBB
->getLayoutIndex() != SuccOtherBB
->getLayoutIndex())
203 // Compare all instructions including pseudos.
204 auto I
= BB
->begin(), E
= BB
->end();
205 auto OtherI
= OtherBB
->begin(), OtherE
= OtherBB
->end();
206 while (I
!= E
&& OtherI
!= OtherE
) {
208 auto AreSymbolsIdentical
= [&](const MCSymbol
*SymbolA
,
209 const MCSymbol
*SymbolB
) {
210 if (SymbolA
== SymbolB
)
213 // All local symbols are considered identical since they affect a
214 // control flow and we check the control flow separately.
215 // If a local symbol is escaped, then the function (potentially) has
216 // multiple entry points and we exclude such functions from
218 if (SymbolA
->isTemporary() && SymbolB
->isTemporary())
221 // Compare symbols as functions.
222 uint64_t EntryIDA
= 0;
223 uint64_t EntryIDB
= 0;
224 const BinaryFunction
*FunctionA
=
225 BC
.getFunctionForSymbol(SymbolA
, &EntryIDA
);
226 const BinaryFunction
*FunctionB
=
227 BC
.getFunctionForSymbol(SymbolB
, &EntryIDB
);
228 if (FunctionA
&& EntryIDA
)
230 if (FunctionB
&& EntryIDB
)
232 if (FunctionA
&& FunctionB
) {
233 // Self-referencing functions and recursive calls.
234 if (FunctionA
== &A
&& FunctionB
== &B
)
237 // Functions with different hash values can never become identical,
238 // hence A and B are different.
239 if (CongruentSymbols
)
240 return FunctionA
->getHash() == FunctionB
->getHash();
242 return FunctionA
== FunctionB
;
245 // One of the symbols represents a function, the other one does not.
246 if (FunctionA
!= FunctionB
)
249 // Check if symbols are jump tables.
250 const BinaryData
*SIA
= BC
.getBinaryDataByName(SymbolA
->getName());
253 const BinaryData
*SIB
= BC
.getBinaryDataByName(SymbolB
->getName());
257 assert((SIA
->getAddress() != SIB
->getAddress()) &&
258 "different symbols should not have the same value");
260 const JumpTable
*JumpTableA
=
261 A
.getJumpTableContainingAddress(SIA
->getAddress());
265 const JumpTable
*JumpTableB
=
266 B
.getJumpTableContainingAddress(SIB
->getAddress());
270 if ((SIA
->getAddress() - JumpTableA
->getAddress()) !=
271 (SIB
->getAddress() - JumpTableB
->getAddress()))
274 return equalJumpTables(*JumpTableA
, *JumpTableB
, A
, B
);
277 if (!isInstrEquivalentWith(*I
, *BB
, *OtherI
, *OtherBB
,
278 AreSymbolsIdentical
))
285 // One of the identical blocks may have a trailing unconditional jump that
286 // is ignored for CFG purposes.
287 const MCInst
*TrailingInstr
=
288 (I
!= E
? &(*I
) : (OtherI
!= OtherE
? &(*OtherI
) : nullptr));
289 if (TrailingInstr
&& !BC
.MIB
->isUnconditionalBranch(*TrailingInstr
))
295 // Compare exceptions action tables.
296 if (A
.getLSDAActionTable() != B
.getLSDAActionTable() ||
297 A
.getLSDATypeTable() != B
.getLSDATypeTable() ||
298 A
.getLSDATypeIndexTable() != B
.getLSDATypeIndexTable())
304 // This hash table is used to identify identical functions. It maps
305 // a function to a bucket of functions identical to it.
307 size_t operator()(const BinaryFunction
*F
) const { return F
->getHash(); }
310 /// Identify two congruent functions. Two functions are considered congruent,
311 /// if they are identical/equal except for some of their instruction operands
312 /// that reference potentially identical functions, i.e. functions that could
313 /// be folded later. Congruent functions are candidates for folding in our
314 /// iterative ICF algorithm.
316 /// Congruent functions are required to have identical hash.
317 struct KeyCongruent
{
318 bool operator()(const BinaryFunction
*A
, const BinaryFunction
*B
) const {
321 return isIdenticalWith(*A
, *B
, /*CongruentSymbols=*/true);
326 bool operator()(const BinaryFunction
*A
, const BinaryFunction
*B
) const {
329 return isIdenticalWith(*A
, *B
, /*CongruentSymbols=*/false);
333 typedef std::unordered_map
<BinaryFunction
*, std::set
<BinaryFunction
*>,
334 KeyHash
, KeyCongruent
>
337 typedef std::unordered_map
<BinaryFunction
*, std::vector
<BinaryFunction
*>,
344 Error
IdenticalCodeFolding::runOnFunctions(BinaryContext
&BC
) {
345 const size_t OriginalFunctionCount
= BC
.getBinaryFunctions().size();
346 uint64_t NumFunctionsFolded
= 0;
347 std::atomic
<uint64_t> NumJTFunctionsFolded
{0};
348 std::atomic
<uint64_t> BytesSavedEstimate
{0};
349 std::atomic
<uint64_t> NumCalled
{0};
350 std::atomic
<uint64_t> NumFoldedLastIteration
{0};
351 CongruentBucketsMap CongruentBuckets
;
353 // Hash all the functions
354 auto hashFunctions
= [&]() {
355 NamedRegionTimer
HashFunctionsTimer("hashing", "hashing", "ICF breakdown",
356 "ICF breakdown", opts::TimeICF
);
357 ParallelUtilities::WorkFuncTy WorkFun
= [&](BinaryFunction
&BF
) {
358 // Make sure indices are in-order.
360 BF
.getLayout().updateLayoutIndices(BF
.dfs());
362 BF
.getLayout().updateLayoutIndices();
364 // Pre-compute hash before pushing into hashtable.
365 // Hash instruction operands to minimize hash collisions.
367 opts::ICFUseDFS
, HashFunction::Default
,
368 [&BC
](const MCOperand
&Op
) { return hashInstOperand(BC
, Op
); });
371 ParallelUtilities::PredicateTy SkipFunc
= [&](const BinaryFunction
&BF
) {
372 return !shouldOptimize(BF
);
375 ParallelUtilities::runOnEachFunction(
376 BC
, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL
, WorkFun
, SkipFunc
,
377 "hashFunctions", /*ForceSequential*/ false, 2);
380 // Creates buckets with congruent functions - functions that potentially
382 auto createCongruentBuckets
= [&]() {
383 NamedRegionTimer
CongruentBucketsTimer("congruent buckets",
384 "congruent buckets", "ICF breakdown",
385 "ICF breakdown", opts::TimeICF
);
386 for (auto &BFI
: BC
.getBinaryFunctions()) {
387 BinaryFunction
&BF
= BFI
.second
;
388 if (!this->shouldOptimize(BF
))
390 CongruentBuckets
[&BF
].emplace(&BF
);
394 // Partition each set of congruent functions into sets of identical functions
396 auto performFoldingPass
= [&]() {
397 NamedRegionTimer
FoldingPassesTimer("folding passes", "folding passes",
398 "ICF breakdown", "ICF breakdown",
400 Timer
SinglePass("single fold pass", "single fold pass");
401 LLVM_DEBUG(SinglePass
.startTimer());
403 ThreadPoolInterface
*ThPool
;
404 if (!opts::NoThreads
)
405 ThPool
= &ParallelUtilities::getThreadPool();
407 // Fold identical functions within a single congruent bucket
408 auto processSingleBucket
= [&](std::set
<BinaryFunction
*> &Candidates
) {
409 Timer
T("folding single congruent list", "folding single congruent list");
410 LLVM_DEBUG(T
.startTimer());
412 // Identical functions go into the same bucket.
413 IdenticalBucketsMap IdenticalBuckets
;
414 for (BinaryFunction
*BF
: Candidates
) {
415 IdenticalBuckets
[BF
].emplace_back(BF
);
418 for (auto &IBI
: IdenticalBuckets
) {
419 // Functions identified as identical.
420 std::vector
<BinaryFunction
*> &Twins
= IBI
.second
;
421 if (Twins
.size() < 2)
424 // Fold functions. Keep the order consistent across invocations with
425 // different options.
427 Twins
, [](const BinaryFunction
*A
, const BinaryFunction
*B
) {
428 return A
->getFunctionNumber() < B
->getFunctionNumber();
431 BinaryFunction
*ParentBF
= Twins
[0];
432 if (!ParentBF
->hasFunctionsFoldedInto())
433 NumCalled
+= ParentBF
->getKnownExecutionCount();
434 for (unsigned I
= 1; I
< Twins
.size(); ++I
) {
435 BinaryFunction
*ChildBF
= Twins
[I
];
436 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: folding " << *ChildBF
<< " into "
437 << *ParentBF
<< '\n');
439 // Remove child function from the list of candidates.
440 auto FI
= Candidates
.find(ChildBF
);
441 assert(FI
!= Candidates
.end() &&
442 "function expected to be in the set");
443 Candidates
.erase(FI
);
445 // Fold the function and remove from the list of processed functions.
446 BytesSavedEstimate
+= ChildBF
->getSize();
447 if (!ChildBF
->hasFunctionsFoldedInto())
448 NumCalled
+= ChildBF
->getKnownExecutionCount();
449 BC
.foldFunction(*ChildBF
, *ParentBF
);
451 ++NumFoldedLastIteration
;
453 if (ParentBF
->hasJumpTables())
454 ++NumJTFunctionsFolded
;
458 LLVM_DEBUG(T
.stopTimer());
461 // Create a task for each congruent bucket
462 for (auto &Entry
: CongruentBuckets
) {
463 std::set
<BinaryFunction
*> &Bucket
= Entry
.second
;
464 if (Bucket
.size() < 2)
468 processSingleBucket(Bucket
);
470 ThPool
->async(processSingleBucket
, std::ref(Bucket
));
473 if (!opts::NoThreads
)
476 LLVM_DEBUG(SinglePass
.stopTimer());
480 createCongruentBuckets();
482 unsigned Iteration
= 1;
483 // We repeat the pass until no new modifications happen.
485 NumFoldedLastIteration
= 0;
486 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ICF iteration " << Iteration
<< "...\n");
488 performFoldingPass();
490 NumFunctionsFolded
+= NumFoldedLastIteration
;
493 } while (NumFoldedLastIteration
> 0);
496 // Print functions that are congruent but not identical.
497 for (auto &CBI
: CongruentBuckets
) {
498 std::set
<BinaryFunction
*> &Candidates
= CBI
.second
;
499 if (Candidates
.size() < 2)
501 dbgs() << "BOLT-DEBUG: the following " << Candidates
.size()
502 << " functions (each of size " << (*Candidates
.begin())->getSize()
503 << " bytes) are congruent but not identical:\n";
504 for (BinaryFunction
*BF
: Candidates
) {
505 dbgs() << " " << *BF
;
506 if (BF
->getKnownExecutionCount())
507 dbgs() << " (executed " << BF
->getKnownExecutionCount() << " times)";
513 if (NumFunctionsFolded
)
514 BC
.outs() << "BOLT-INFO: ICF folded " << NumFunctionsFolded
<< " out of "
515 << OriginalFunctionCount
<< " functions in " << Iteration
516 << " passes. " << NumJTFunctionsFolded
517 << " functions had jump tables.\n"
518 << "BOLT-INFO: Removing all identical functions will save "
519 << format("%.2lf", (double)BytesSavedEstimate
/ 1024)
520 << " KB of code space. Folded functions were called " << NumCalled
521 << " times based on profile.\n";
523 return Error::success();