1 //===- bolt/Passes/ReorderFunctions.cpp - Function reordering pass --------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements ReorderFunctions class.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Passes/ReorderFunctions.h"
14 #include "bolt/Passes/HFSort.h"
15 #include "bolt/Utils/Utils.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/Support/CommandLine.h"
18 #include "llvm/Transforms/Utils/CodeLayout.h"
21 #define DEBUG_TYPE "hfsort"
27 extern cl::OptionCategory BoltOptCategory
;
28 extern cl::opt
<unsigned> Verbosity
;
29 extern cl::opt
<uint32_t> RandomSeed
;
31 extern size_t padFunction(const bolt::BinaryFunction
&Function
);
33 extern cl::opt
<bolt::ReorderFunctions::ReorderType
> ReorderFunctions
;
34 cl::opt
<bolt::ReorderFunctions::ReorderType
> ReorderFunctions(
36 cl::desc("reorder and cluster functions (works only with relocations)"),
37 cl::init(bolt::ReorderFunctions::RT_NONE
),
38 cl::values(clEnumValN(bolt::ReorderFunctions::RT_NONE
, "none",
39 "do not reorder functions"),
40 clEnumValN(bolt::ReorderFunctions::RT_EXEC_COUNT
, "exec-count",
41 "order by execution count"),
42 clEnumValN(bolt::ReorderFunctions::RT_HFSORT
, "hfsort",
43 "use hfsort algorithm"),
44 clEnumValN(bolt::ReorderFunctions::RT_HFSORT_PLUS
, "hfsort+",
45 "use cache-directed sort"),
46 clEnumValN(bolt::ReorderFunctions::RT_CDSORT
, "cdsort",
47 "use cache-directed sort"),
48 clEnumValN(bolt::ReorderFunctions::RT_PETTIS_HANSEN
,
49 "pettis-hansen", "use Pettis-Hansen algorithm"),
50 clEnumValN(bolt::ReorderFunctions::RT_RANDOM
, "random",
51 "reorder functions randomly"),
52 clEnumValN(bolt::ReorderFunctions::RT_USER
, "user",
53 "use function order specified by -function-order")),
54 cl::ZeroOrMore
, cl::cat(BoltOptCategory
),
55 cl::callback([](const bolt::ReorderFunctions::ReorderType
&option
) {
56 if (option
== bolt::ReorderFunctions::RT_HFSORT_PLUS
) {
57 errs() << "BOLT-WARNING: '-reorder-functions=hfsort+' is deprecated,"
58 << " please use '-reorder-functions=cdsort' instead\n";
59 ReorderFunctions
= bolt::ReorderFunctions::RT_CDSORT
;
63 static cl::opt
<bool> ReorderFunctionsUseHotSize(
64 "reorder-functions-use-hot-size",
65 cl::desc("use a function's hot size when doing clustering"), cl::init(true),
66 cl::cat(BoltOptCategory
));
68 static cl::opt
<std::string
> FunctionOrderFile(
70 cl::desc("file containing an ordered list of functions to use for function "
72 cl::cat(BoltOptCategory
));
74 static cl::opt
<std::string
> GenerateFunctionOrderFile(
75 "generate-function-order",
76 cl::desc("file to dump the ordered list of functions to use for function "
78 cl::cat(BoltOptCategory
));
80 static cl::opt
<std::string
> LinkSectionsFile(
81 "generate-link-sections",
82 cl::desc("generate a list of function sections in a format suitable for "
83 "inclusion in a linker script"),
84 cl::cat(BoltOptCategory
));
87 UseEdgeCounts("use-edge-counts",
88 cl::desc("use edge count data when doing clustering"),
89 cl::init(true), cl::cat(BoltOptCategory
));
91 static cl::opt
<bool> CgFromPerfData(
93 cl::desc("use perf data directly when constructing the call graph"
94 " for stale functions"),
95 cl::init(true), cl::ZeroOrMore
, cl::cat(BoltOptCategory
));
97 static cl::opt
<bool> CgIgnoreRecursiveCalls(
98 "cg-ignore-recursive-calls",
99 cl::desc("ignore recursive calls when constructing the call graph"),
100 cl::init(true), cl::cat(BoltOptCategory
));
102 static cl::opt
<bool> CgUseSplitHotSize(
103 "cg-use-split-hot-size",
104 cl::desc("use hot/cold data on basic blocks to determine hot sizes for "
105 "call graph functions"),
106 cl::init(false), cl::ZeroOrMore
, cl::cat(BoltOptCategory
));
113 using NodeId
= CallGraph::NodeId
;
114 using Arc
= CallGraph::Arc
;
115 using Node
= CallGraph::Node
;
117 void ReorderFunctions::reorder(BinaryContext
&BC
,
118 std::vector
<Cluster
> &&Clusters
,
119 std::map
<uint64_t, BinaryFunction
> &BFs
) {
120 std::vector
<uint64_t> FuncAddr(Cg
.numNodes()); // Just for computing stats
121 uint64_t TotalSize
= 0;
124 // Set order of hot functions based on clusters.
125 for (const Cluster
&Cluster
: Clusters
) {
126 for (const NodeId FuncId
: Cluster
.targets()) {
127 Cg
.nodeIdToFunc(FuncId
)->setIndex(Index
++);
128 FuncAddr
[FuncId
] = TotalSize
;
129 TotalSize
+= Cg
.size(FuncId
);
133 // Assign valid index for functions with valid profile.
134 for (auto &It
: BFs
) {
135 BinaryFunction
&BF
= It
.second
;
136 if (!BF
.hasValidIndex() && BF
.hasValidProfile())
137 BF
.setIndex(Index
++);
140 if (opts::ReorderFunctions
== RT_NONE
)
143 printStats(BC
, Clusters
, FuncAddr
);
146 void ReorderFunctions::printStats(BinaryContext
&BC
,
147 const std::vector
<Cluster
> &Clusters
,
148 const std::vector
<uint64_t> &FuncAddr
) {
149 if (opts::Verbosity
== 0) {
151 if (!DebugFlag
|| !isCurrentDebugType("hfsort"))
158 bool PrintDetailed
= opts::Verbosity
> 1;
161 (DebugFlag
&& isCurrentDebugType("hfsort") && opts::Verbosity
> 0);
163 uint64_t TotalSize
= 0;
164 uint64_t CurPage
= 0;
165 uint64_t Hotfuncs
= 0;
166 double TotalDistance
= 0;
167 double TotalCalls
= 0;
168 double TotalCalls64B
= 0;
169 double TotalCalls4KB
= 0;
170 double TotalCalls2MB
= 0;
172 BC
.outs() << "BOLT-INFO: Function reordering page layout\n"
173 << "BOLT-INFO: ============== page 0 ==============\n";
174 for (const Cluster
&Cluster
: Clusters
) {
177 "BOLT-INFO: -------- density = %.3lf (%u / %u) --------\n",
178 Cluster
.density(), Cluster
.samples(), Cluster
.size());
180 for (NodeId FuncId
: Cluster
.targets()) {
181 if (Cg
.samples(FuncId
) > 0) {
185 BC
.outs() << "BOLT-INFO: hot func " << *Cg
.nodeIdToFunc(FuncId
)
186 << " (" << Cg
.size(FuncId
) << ")\n";
190 for (NodeId Dst
: Cg
.successors(FuncId
)) {
191 if (FuncId
== Dst
) // ignore recursive calls in stats
193 const Arc
&Arc
= *Cg
.findArc(FuncId
, Dst
);
194 const auto D
= std::abs(FuncAddr
[Arc
.dst()] -
195 (FuncAddr
[FuncId
] + Arc
.avgCallOffset()));
196 const double W
= Arc
.weight();
197 if (D
< 64 && PrintDetailed
&& opts::Verbosity
> 2)
198 BC
.outs() << "BOLT-INFO: short (" << D
<< "B) call:\n"
199 << "BOLT-INFO: Src: " << *Cg
.nodeIdToFunc(FuncId
)
201 << "BOLT-INFO: Dst: " << *Cg
.nodeIdToFunc(Dst
) << "\n"
202 << "BOLT-INFO: Weight = " << W
<< "\n"
203 << "BOLT-INFO: AvgOffset = " << Arc
.avgCallOffset()
212 Dist
+= Arc
.weight() * D
;
214 BC
.outs() << format("BOLT-INFO: arc: %u [@%lu+%.1lf] -> %u [@%lu]: "
215 "weight = %.0lf, callDist = %f\n",
216 Arc
.src(), FuncAddr
[Arc
.src()],
217 Arc
.avgCallOffset(), Arc
.dst(),
218 FuncAddr
[Arc
.dst()], Arc
.weight(), D
);
221 TotalDistance
+= Dist
;
222 TotalSize
+= Cg
.size(FuncId
);
225 BC
.outs() << format("BOLT-INFO: start = %6u : avgCallDist = %lu : ",
226 TotalSize
, Calls
? Dist
/ Calls
: 0)
227 << Cg
.nodeIdToFunc(FuncId
)->getPrintName() << '\n';
228 const uint64_t NewPage
= TotalSize
/ HugePageSize
;
229 if (NewPage
!= CurPage
) {
232 "BOLT-INFO: ============== page %u ==============\n", CurPage
);
238 BC
.outs() << "BOLT-INFO: Function reordering stats\n"
239 << format("BOLT-INFO: Number of hot functions: %u\n"
240 "BOLT-INFO: Number of clusters: %lu\n",
241 Hotfuncs
, Clusters
.size())
242 << format("BOLT-INFO: Final average call distance = %.1lf "
244 TotalCalls
? TotalDistance
/ TotalCalls
: 0,
245 TotalDistance
, TotalCalls
)
246 << format("BOLT-INFO: Total Calls = %.0lf\n", TotalCalls
);
249 << format("BOLT-INFO: Total Calls within 64B = %.0lf (%.2lf%%)\n",
250 TotalCalls64B
, 100 * TotalCalls64B
/ TotalCalls
)
251 << format("BOLT-INFO: Total Calls within 4KB = %.0lf (%.2lf%%)\n",
252 TotalCalls4KB
, 100 * TotalCalls4KB
/ TotalCalls
)
253 << format("BOLT-INFO: Total Calls within 2MB = %.0lf (%.2lf%%)\n",
254 TotalCalls2MB
, 100 * TotalCalls2MB
/ TotalCalls
);
257 Error
ReorderFunctions::readFunctionOrderFile(
258 std::vector
<std::string
> &FunctionNames
) {
259 std::ifstream
FuncsFile(opts::FunctionOrderFile
, std::ios::in
);
261 return createFatalBOLTError(Twine("Ordered functions file \"") +
262 Twine(opts::FunctionOrderFile
) +
263 Twine("\" can't be opened."));
265 std::string FuncName
;
266 while (std::getline(FuncsFile
, FuncName
))
267 FunctionNames
.push_back(FuncName
);
268 return Error::success();
271 Error
ReorderFunctions::runOnFunctions(BinaryContext
&BC
) {
272 auto &BFs
= BC
.getBinaryFunctions();
273 if (opts::ReorderFunctions
!= RT_NONE
&&
274 opts::ReorderFunctions
!= RT_EXEC_COUNT
&&
275 opts::ReorderFunctions
!= RT_USER
) {
278 [](const BinaryFunction
&BF
) {
279 if (!BF
.hasProfile())
281 if (BF
.getState() != BinaryFunction::State::CFG
)
285 opts::CgFromPerfData
,
286 /*IncludeSplitCalls=*/false, opts::ReorderFunctionsUseHotSize
,
287 opts::CgUseSplitHotSize
, opts::UseEdgeCounts
,
288 opts::CgIgnoreRecursiveCalls
);
289 Cg
.normalizeArcWeights();
292 std::vector
<Cluster
> Clusters
;
294 switch (opts::ReorderFunctions
) {
297 case RT_EXEC_COUNT
: {
298 std::vector
<BinaryFunction
*> SortedFunctions(BFs
.size());
299 llvm::transform(llvm::make_second_range(BFs
), SortedFunctions
.begin(),
300 [](BinaryFunction
&BF
) { return &BF
; });
301 llvm::stable_sort(SortedFunctions
,
302 [&](const BinaryFunction
*A
, const BinaryFunction
*B
) {
307 const size_t PadA
= opts::padFunction(*A
);
308 const size_t PadB
= opts::padFunction(*B
);
309 if (!PadA
|| !PadB
) {
315 if (!A
->hasProfile())
317 if (!B
->hasProfile())
319 return A
->getExecutionCount() > B
->getExecutionCount();
322 for (BinaryFunction
*BF
: SortedFunctions
)
323 if (BF
->hasProfile()) {
324 BF
->setIndex(Index
++);
325 LLVM_DEBUG(if (opts::Verbosity
> 1) {
326 dbgs() << "BOLT-INFO: hot func " << BF
->getPrintName() << " ("
327 << BF
->getExecutionCount() << ")\n";
332 Clusters
= clusterize(Cg
);
335 // It is required that the sum of incoming arc weights is not greater
336 // than the number of samples for every function. Ensuring the call graph
337 // obeys the property before running the algorithm.
338 Cg
.adjustArcWeights();
340 // Initialize CFG nodes and their data
341 std::vector
<uint64_t> FuncSizes
;
342 std::vector
<uint64_t> FuncCounts
;
343 std::vector
<codelayout::EdgeCount
> CallCounts
;
344 std::vector
<uint64_t> CallOffsets
;
345 for (NodeId F
= 0; F
< Cg
.numNodes(); ++F
) {
346 FuncSizes
.push_back(Cg
.size(F
));
347 FuncCounts
.push_back(Cg
.samples(F
));
348 for (NodeId Succ
: Cg
.successors(F
)) {
349 const Arc
&Arc
= *Cg
.findArc(F
, Succ
);
350 CallCounts
.push_back({F
, Succ
, uint64_t(Arc
.weight())});
351 CallOffsets
.push_back(uint64_t(Arc
.avgCallOffset()));
355 // Run the layout algorithm.
356 std::vector
<uint64_t> Result
= codelayout::computeCacheDirectedLayout(
357 FuncSizes
, FuncCounts
, CallCounts
, CallOffsets
);
359 // Create a single cluster from the computed order of hot functions.
360 std::vector
<CallGraph::NodeId
> NodeOrder(Result
.begin(), Result
.end());
361 Clusters
.emplace_back(Cluster(NodeOrder
, Cg
));
363 case RT_PETTIS_HANSEN
:
364 Clusters
= pettisAndHansen(Cg
);
367 std::srand(opts::RandomSeed
);
368 Clusters
= randomClusters(Cg
);
371 // Build LTOCommonNameMap
372 StringMap
<std::vector
<uint64_t>> LTOCommonNameMap
;
373 for (const BinaryFunction
&BF
: llvm::make_second_range(BFs
))
374 for (StringRef Name
: BF
.getNames())
375 if (std::optional
<StringRef
> LTOCommonName
= getLTOCommonName(Name
))
376 LTOCommonNameMap
[*LTOCommonName
].push_back(BF
.getAddress());
379 uint32_t InvalidEntries
= 0;
380 std::vector
<std::string
> FunctionNames
;
381 if (Error E
= readFunctionOrderFile(FunctionNames
))
382 return Error(std::move(E
));
384 for (const std::string
&Function
: FunctionNames
) {
385 std::vector
<uint64_t> FuncAddrs
;
387 BinaryData
*BD
= BC
.getBinaryDataByName(Function
);
389 // If we can't find the main symbol name, look for alternates.
390 uint32_t LocalID
= 1;
392 const std::string FuncName
= Function
+ "/" + std::to_string(LocalID
);
393 BD
= BC
.getBinaryDataByName(FuncName
);
395 FuncAddrs
.push_back(BD
->getAddress());
400 // Strip LTO suffixes
401 if (std::optional
<StringRef
> CommonName
= getLTOCommonName(Function
))
402 if (LTOCommonNameMap
.contains(*CommonName
))
403 llvm::append_range(FuncAddrs
, LTOCommonNameMap
[*CommonName
]);
405 FuncAddrs
.push_back(BD
->getAddress());
408 if (FuncAddrs
.empty()) {
409 if (opts::Verbosity
>= 1)
410 BC
.errs() << "BOLT-WARNING: Reorder functions: can't find function "
411 << "for " << Function
<< "\n";
416 for (const uint64_t FuncAddr
: FuncAddrs
) {
417 const BinaryData
*FuncBD
= BC
.getBinaryDataAtAddress(FuncAddr
);
420 BinaryFunction
*BF
= BC
.getFunctionForSymbol(FuncBD
->getSymbol());
422 if (opts::Verbosity
>= 1)
423 BC
.errs() << "BOLT-WARNING: Reorder functions: can't find function "
424 << "for " << Function
<< "\n";
428 if (!BF
->hasValidIndex())
429 BF
->setIndex(Index
++);
430 else if (opts::Verbosity
> 0)
431 BC
.errs() << "BOLT-WARNING: Duplicate reorder entry for " << Function
436 BC
.errs() << "BOLT-WARNING: Reorder functions: can't find functions for "
437 << InvalidEntries
<< " entries in -function-order list\n";
441 llvm_unreachable("unexpected layout type");
444 reorder(BC
, std::move(Clusters
), BFs
);
446 BC
.HasFinalizedFunctionOrder
= true;
448 std::unique_ptr
<std::ofstream
> FuncsFile
;
449 if (!opts::GenerateFunctionOrderFile
.empty()) {
450 FuncsFile
= std::make_unique
<std::ofstream
>(opts::GenerateFunctionOrderFile
,
453 BC
.errs() << "BOLT-ERROR: ordered functions file "
454 << opts::GenerateFunctionOrderFile
<< " cannot be opened\n";
455 return createFatalBOLTError("");
459 std::unique_ptr
<std::ofstream
> LinkSectionsFile
;
460 if (!opts::LinkSectionsFile
.empty()) {
462 std::make_unique
<std::ofstream
>(opts::LinkSectionsFile
, std::ios::out
);
463 if (!LinkSectionsFile
) {
464 BC
.errs() << "BOLT-ERROR: link sections file " << opts::LinkSectionsFile
465 << " cannot be opened\n";
466 return createFatalBOLTError("");
470 if (FuncsFile
|| LinkSectionsFile
) {
471 std::vector
<BinaryFunction
*> SortedFunctions(BFs
.size());
472 llvm::transform(llvm::make_second_range(BFs
), SortedFunctions
.begin(),
473 [](BinaryFunction
&BF
) { return &BF
; });
475 // Sort functions by index.
476 llvm::stable_sort(SortedFunctions
,
477 [](const BinaryFunction
*A
, const BinaryFunction
*B
) {
478 if (A
->hasValidIndex() && B
->hasValidIndex())
479 return A
->getIndex() < B
->getIndex();
480 if (A
->hasValidIndex() && !B
->hasValidIndex())
482 if (!A
->hasValidIndex() && B
->hasValidIndex())
484 return A
->getAddress() < B
->getAddress();
487 for (const BinaryFunction
*Func
: SortedFunctions
) {
488 if (!Func
->hasValidIndex())
490 if (Func
->isPLTFunction())
494 *FuncsFile
<< Func
->getOneName().str() << '\n';
496 if (LinkSectionsFile
) {
497 const char *Indent
= "";
498 std::vector
<StringRef
> AllNames
= Func
->getNames();
499 llvm::sort(AllNames
);
500 for (StringRef Name
: AllNames
) {
501 const size_t SlashPos
= Name
.find('/');
502 if (SlashPos
!= std::string::npos
) {
503 // Avoid duplicates for local functions.
504 if (Name
.find('/', SlashPos
+ 1) != std::string::npos
)
506 Name
= Name
.substr(0, SlashPos
);
508 *LinkSectionsFile
<< Indent
<< ".text." << Name
.str() << '\n';
516 BC
.outs() << "BOLT-INFO: dumped function order to "
517 << opts::GenerateFunctionOrderFile
<< '\n';
520 if (LinkSectionsFile
) {
521 LinkSectionsFile
->close();
522 BC
.outs() << "BOLT-INFO: dumped linker section order to "
523 << opts::LinkSectionsFile
<< '\n';
526 return Error::success();