Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / bolt / lib / Passes / ReorderFunctions.cpp
blobd656499cada37abe04c5e91a550c131659474112
1 //===- bolt/Passes/ReorderFunctions.cpp - Function reordering pass --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements ReorderFunctions class.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Passes/ReorderFunctions.h"
14 #include "bolt/Passes/HFSort.h"
15 #include "bolt/Utils/Utils.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/Support/CommandLine.h"
18 #include "llvm/Transforms/Utils/CodeLayout.h"
19 #include <fstream>
21 #define DEBUG_TYPE "hfsort"
23 using namespace llvm;
25 namespace opts {
27 extern cl::OptionCategory BoltOptCategory;
28 extern cl::opt<unsigned> Verbosity;
29 extern cl::opt<uint32_t> RandomSeed;
31 extern size_t padFunction(const bolt::BinaryFunction &Function);
33 cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions(
34 "reorder-functions",
35 cl::desc("reorder and cluster functions (works only with relocations)"),
36 cl::init(bolt::ReorderFunctions::RT_NONE),
37 cl::values(clEnumValN(bolt::ReorderFunctions::RT_NONE, "none",
38 "do not reorder functions"),
39 clEnumValN(bolt::ReorderFunctions::RT_EXEC_COUNT, "exec-count",
40 "order by execution count"),
41 clEnumValN(bolt::ReorderFunctions::RT_HFSORT, "hfsort",
42 "use hfsort algorithm"),
43 clEnumValN(bolt::ReorderFunctions::RT_HFSORT_PLUS, "hfsort+",
44 "use hfsort+ algorithm"),
45 clEnumValN(bolt::ReorderFunctions::RT_CDS, "cds",
46 "use cache-directed sort"),
47 clEnumValN(bolt::ReorderFunctions::RT_PETTIS_HANSEN,
48 "pettis-hansen", "use Pettis-Hansen algorithm"),
49 clEnumValN(bolt::ReorderFunctions::RT_RANDOM, "random",
50 "reorder functions randomly"),
51 clEnumValN(bolt::ReorderFunctions::RT_USER, "user",
52 "use function order specified by -function-order")),
53 cl::ZeroOrMore, cl::cat(BoltOptCategory));
55 static cl::opt<bool> ReorderFunctionsUseHotSize(
56 "reorder-functions-use-hot-size",
57 cl::desc("use a function's hot size when doing clustering"), cl::init(true),
58 cl::cat(BoltOptCategory));
60 static cl::opt<std::string> FunctionOrderFile(
61 "function-order",
62 cl::desc("file containing an ordered list of functions to use for function "
63 "reordering"),
64 cl::cat(BoltOptCategory));
66 static cl::opt<std::string> GenerateFunctionOrderFile(
67 "generate-function-order",
68 cl::desc("file to dump the ordered list of functions to use for function "
69 "reordering"),
70 cl::cat(BoltOptCategory));
72 static cl::opt<std::string> LinkSectionsFile(
73 "generate-link-sections",
74 cl::desc("generate a list of function sections in a format suitable for "
75 "inclusion in a linker script"),
76 cl::cat(BoltOptCategory));
78 static cl::opt<bool>
79 UseEdgeCounts("use-edge-counts",
80 cl::desc("use edge count data when doing clustering"),
81 cl::init(true), cl::cat(BoltOptCategory));
83 static cl::opt<bool> CgFromPerfData(
84 "cg-from-perf-data",
85 cl::desc("use perf data directly when constructing the call graph"
86 " for stale functions"),
87 cl::init(true), cl::ZeroOrMore, cl::cat(BoltOptCategory));
89 static cl::opt<bool> CgIgnoreRecursiveCalls(
90 "cg-ignore-recursive-calls",
91 cl::desc("ignore recursive calls when constructing the call graph"),
92 cl::init(true), cl::cat(BoltOptCategory));
94 static cl::opt<bool> CgUseSplitHotSize(
95 "cg-use-split-hot-size",
96 cl::desc("use hot/cold data on basic blocks to determine hot sizes for "
97 "call graph functions"),
98 cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory));
100 } // namespace opts
102 namespace llvm {
103 namespace bolt {
105 using NodeId = CallGraph::NodeId;
106 using Arc = CallGraph::Arc;
107 using Node = CallGraph::Node;
109 void ReorderFunctions::reorder(std::vector<Cluster> &&Clusters,
110 std::map<uint64_t, BinaryFunction> &BFs) {
111 std::vector<uint64_t> FuncAddr(Cg.numNodes()); // Just for computing stats
112 uint64_t TotalSize = 0;
113 uint32_t Index = 0;
115 // Set order of hot functions based on clusters.
116 for (const Cluster &Cluster : Clusters) {
117 for (const NodeId FuncId : Cluster.targets()) {
118 Cg.nodeIdToFunc(FuncId)->setIndex(Index++);
119 FuncAddr[FuncId] = TotalSize;
120 TotalSize += Cg.size(FuncId);
124 // Assign valid index for functions with valid profile.
125 for (auto &It : BFs) {
126 BinaryFunction &BF = It.second;
127 if (!BF.hasValidIndex() && BF.hasValidProfile())
128 BF.setIndex(Index++);
131 if (opts::ReorderFunctions == RT_NONE)
132 return;
134 printStats(Clusters, FuncAddr);
137 void ReorderFunctions::printStats(const std::vector<Cluster> &Clusters,
138 const std::vector<uint64_t> &FuncAddr) {
139 if (opts::Verbosity == 0) {
140 #ifndef NDEBUG
141 if (!DebugFlag || !isCurrentDebugType("hfsort"))
142 return;
143 #else
144 return;
145 #endif
148 bool PrintDetailed = opts::Verbosity > 1;
149 #ifndef NDEBUG
150 PrintDetailed |=
151 (DebugFlag && isCurrentDebugType("hfsort") && opts::Verbosity > 0);
152 #endif
153 uint64_t TotalSize = 0;
154 uint64_t CurPage = 0;
155 uint64_t Hotfuncs = 0;
156 double TotalDistance = 0;
157 double TotalCalls = 0;
158 double TotalCalls64B = 0;
159 double TotalCalls4KB = 0;
160 double TotalCalls2MB = 0;
161 if (PrintDetailed)
162 outs() << "BOLT-INFO: Function reordering page layout\n"
163 << "BOLT-INFO: ============== page 0 ==============\n";
164 for (const Cluster &Cluster : Clusters) {
165 if (PrintDetailed)
166 outs() << format(
167 "BOLT-INFO: -------- density = %.3lf (%u / %u) --------\n",
168 Cluster.density(), Cluster.samples(), Cluster.size());
170 for (NodeId FuncId : Cluster.targets()) {
171 if (Cg.samples(FuncId) > 0) {
172 Hotfuncs++;
174 if (PrintDetailed)
175 outs() << "BOLT-INFO: hot func " << *Cg.nodeIdToFunc(FuncId) << " ("
176 << Cg.size(FuncId) << ")\n";
178 uint64_t Dist = 0;
179 uint64_t Calls = 0;
180 for (NodeId Dst : Cg.successors(FuncId)) {
181 if (FuncId == Dst) // ignore recursive calls in stats
182 continue;
183 const Arc &Arc = *Cg.findArc(FuncId, Dst);
184 const auto D = std::abs(FuncAddr[Arc.dst()] -
185 (FuncAddr[FuncId] + Arc.avgCallOffset()));
186 const double W = Arc.weight();
187 if (D < 64 && PrintDetailed && opts::Verbosity > 2)
188 outs() << "BOLT-INFO: short (" << D << "B) call:\n"
189 << "BOLT-INFO: Src: " << *Cg.nodeIdToFunc(FuncId) << "\n"
190 << "BOLT-INFO: Dst: " << *Cg.nodeIdToFunc(Dst) << "\n"
191 << "BOLT-INFO: Weight = " << W << "\n"
192 << "BOLT-INFO: AvgOffset = " << Arc.avgCallOffset()
193 << "\n";
194 Calls += W;
195 if (D < 64)
196 TotalCalls64B += W;
197 if (D < 4096)
198 TotalCalls4KB += W;
199 if (D < (2 << 20))
200 TotalCalls2MB += W;
201 Dist += Arc.weight() * D;
202 if (PrintDetailed)
203 outs() << format("BOLT-INFO: arc: %u [@%lu+%.1lf] -> %u [@%lu]: "
204 "weight = %.0lf, callDist = %f\n",
205 Arc.src(), FuncAddr[Arc.src()],
206 Arc.avgCallOffset(), Arc.dst(),
207 FuncAddr[Arc.dst()], Arc.weight(), D);
209 TotalCalls += Calls;
210 TotalDistance += Dist;
211 TotalSize += Cg.size(FuncId);
213 if (PrintDetailed) {
214 outs() << format("BOLT-INFO: start = %6u : avgCallDist = %lu : ",
215 TotalSize, Calls ? Dist / Calls : 0)
216 << Cg.nodeIdToFunc(FuncId)->getPrintName() << '\n';
217 const uint64_t NewPage = TotalSize / HugePageSize;
218 if (NewPage != CurPage) {
219 CurPage = NewPage;
220 outs() << format(
221 "BOLT-INFO: ============== page %u ==============\n", CurPage);
227 outs() << "BOLT-INFO: Function reordering stats\n"
228 << format("BOLT-INFO: Number of hot functions: %u\n"
229 "BOLT-INFO: Number of clusters: %lu\n",
230 Hotfuncs, Clusters.size())
231 << format("BOLT-INFO: Final average call distance = %.1lf "
232 "(%.0lf / %.0lf)\n",
233 TotalCalls ? TotalDistance / TotalCalls : 0, TotalDistance,
234 TotalCalls)
235 << format("BOLT-INFO: Total Calls = %.0lf\n", TotalCalls);
236 if (TotalCalls)
237 outs() << format("BOLT-INFO: Total Calls within 64B = %.0lf (%.2lf%%)\n",
238 TotalCalls64B, 100 * TotalCalls64B / TotalCalls)
239 << format("BOLT-INFO: Total Calls within 4KB = %.0lf (%.2lf%%)\n",
240 TotalCalls4KB, 100 * TotalCalls4KB / TotalCalls)
241 << format("BOLT-INFO: Total Calls within 2MB = %.0lf (%.2lf%%)\n",
242 TotalCalls2MB, 100 * TotalCalls2MB / TotalCalls);
245 std::vector<std::string> ReorderFunctions::readFunctionOrderFile() {
246 std::vector<std::string> FunctionNames;
247 std::ifstream FuncsFile(opts::FunctionOrderFile, std::ios::in);
248 if (!FuncsFile) {
249 errs() << "Ordered functions file \"" << opts::FunctionOrderFile
250 << "\" can't be opened.\n";
251 exit(1);
253 std::string FuncName;
254 while (std::getline(FuncsFile, FuncName))
255 FunctionNames.push_back(FuncName);
256 return FunctionNames;
259 void ReorderFunctions::runOnFunctions(BinaryContext &BC) {
260 auto &BFs = BC.getBinaryFunctions();
261 if (opts::ReorderFunctions != RT_NONE &&
262 opts::ReorderFunctions != RT_EXEC_COUNT &&
263 opts::ReorderFunctions != RT_USER) {
264 Cg = buildCallGraph(
266 [](const BinaryFunction &BF) {
267 if (!BF.hasProfile())
268 return true;
269 if (BF.getState() != BinaryFunction::State::CFG)
270 return true;
271 return false;
273 opts::CgFromPerfData,
274 /*IncludeSplitCalls=*/false, opts::ReorderFunctionsUseHotSize,
275 opts::CgUseSplitHotSize, opts::UseEdgeCounts,
276 opts::CgIgnoreRecursiveCalls);
277 Cg.normalizeArcWeights();
280 std::vector<Cluster> Clusters;
282 switch (opts::ReorderFunctions) {
283 case RT_NONE:
284 break;
285 case RT_EXEC_COUNT: {
286 std::vector<BinaryFunction *> SortedFunctions(BFs.size());
287 llvm::transform(llvm::make_second_range(BFs), SortedFunctions.begin(),
288 [](BinaryFunction &BF) { return &BF; });
289 llvm::stable_sort(SortedFunctions,
290 [&](const BinaryFunction *A, const BinaryFunction *B) {
291 if (A->isIgnored())
292 return false;
293 if (B->isIgnored())
294 return true;
295 const size_t PadA = opts::padFunction(*A);
296 const size_t PadB = opts::padFunction(*B);
297 if (!PadA || !PadB) {
298 if (PadA)
299 return true;
300 if (PadB)
301 return false;
303 if (!A->hasProfile())
304 return false;
305 if (!B->hasProfile())
306 return true;
307 return A->getExecutionCount() > B->getExecutionCount();
309 uint32_t Index = 0;
310 for (BinaryFunction *BF : SortedFunctions)
311 if (BF->hasProfile()) {
312 BF->setIndex(Index++);
313 LLVM_DEBUG(if (opts::Verbosity > 1) {
314 dbgs() << "BOLT-INFO: hot func " << BF->getPrintName() << " ("
315 << BF->getExecutionCount() << ")\n";
318 } break;
319 case RT_HFSORT:
320 Clusters = clusterize(Cg);
321 break;
322 case RT_HFSORT_PLUS:
323 Clusters = hfsortPlus(Cg);
324 break;
325 case RT_CDS: {
326 // It is required that the sum of incoming arc weights is not greater
327 // than the number of samples for every function. Ensuring the call graph
328 // obeys the property before running the algorithm.
329 Cg.adjustArcWeights();
331 // Initialize CFG nodes and their data
332 std::vector<uint64_t> FuncSizes;
333 std::vector<uint64_t> FuncCounts;
334 std::vector<codelayout::EdgeCount> CallCounts;
335 std::vector<uint64_t> CallOffsets;
336 for (NodeId F = 0; F < Cg.numNodes(); ++F) {
337 FuncSizes.push_back(Cg.size(F));
338 FuncCounts.push_back(Cg.samples(F));
339 for (NodeId Succ : Cg.successors(F)) {
340 const Arc &Arc = *Cg.findArc(F, Succ);
341 CallCounts.push_back({F, Succ, uint64_t(Arc.weight())});
342 CallOffsets.push_back(uint64_t(Arc.avgCallOffset()));
346 // Run the layout algorithm.
347 std::vector<uint64_t> Result = codelayout::computeCacheDirectedLayout(
348 FuncSizes, FuncCounts, CallCounts, CallOffsets);
350 // Create a single cluster from the computed order of hot functions.
351 std::vector<CallGraph::NodeId> NodeOrder(Result.begin(), Result.end());
352 Clusters.emplace_back(Cluster(NodeOrder, Cg));
353 } break;
354 case RT_PETTIS_HANSEN:
355 Clusters = pettisAndHansen(Cg);
356 break;
357 case RT_RANDOM:
358 std::srand(opts::RandomSeed);
359 Clusters = randomClusters(Cg);
360 break;
361 case RT_USER: {
362 // Build LTOCommonNameMap
363 StringMap<std::vector<uint64_t>> LTOCommonNameMap;
364 for (const BinaryFunction &BF : llvm::make_second_range(BFs))
365 for (StringRef Name : BF.getNames())
366 if (std::optional<StringRef> LTOCommonName = getLTOCommonName(Name))
367 LTOCommonNameMap[*LTOCommonName].push_back(BF.getAddress());
369 uint32_t Index = 0;
370 uint32_t InvalidEntries = 0;
371 for (const std::string &Function : readFunctionOrderFile()) {
372 std::vector<uint64_t> FuncAddrs;
374 BinaryData *BD = BC.getBinaryDataByName(Function);
375 if (!BD) {
376 // If we can't find the main symbol name, look for alternates.
377 uint32_t LocalID = 1;
378 while (true) {
379 const std::string FuncName = Function + "/" + std::to_string(LocalID);
380 BD = BC.getBinaryDataByName(FuncName);
381 if (BD)
382 FuncAddrs.push_back(BD->getAddress());
383 else
384 break;
385 LocalID++;
387 // Strip LTO suffixes
388 if (std::optional<StringRef> CommonName = getLTOCommonName(Function))
389 if (LTOCommonNameMap.contains(*CommonName))
390 llvm::append_range(FuncAddrs, LTOCommonNameMap[*CommonName]);
391 } else {
392 FuncAddrs.push_back(BD->getAddress());
395 if (FuncAddrs.empty()) {
396 if (opts::Verbosity >= 1)
397 errs() << "BOLT-WARNING: Reorder functions: can't find function "
398 << "for " << Function << "\n";
399 ++InvalidEntries;
400 continue;
403 for (const uint64_t FuncAddr : FuncAddrs) {
404 const BinaryData *FuncBD = BC.getBinaryDataAtAddress(FuncAddr);
405 assert(FuncBD);
407 BinaryFunction *BF = BC.getFunctionForSymbol(FuncBD->getSymbol());
408 if (!BF) {
409 if (opts::Verbosity >= 1)
410 errs() << "BOLT-WARNING: Reorder functions: can't find function "
411 << "for " << Function << "\n";
412 ++InvalidEntries;
413 break;
415 if (!BF->hasValidIndex())
416 BF->setIndex(Index++);
417 else if (opts::Verbosity > 0)
418 errs() << "BOLT-WARNING: Duplicate reorder entry for " << Function
419 << "\n";
422 if (InvalidEntries)
423 errs() << "BOLT-WARNING: Reorder functions: can't find functions for "
424 << InvalidEntries << " entries in -function-order list\n";
425 } break;
428 reorder(std::move(Clusters), BFs);
430 std::unique_ptr<std::ofstream> FuncsFile;
431 if (!opts::GenerateFunctionOrderFile.empty()) {
432 FuncsFile = std::make_unique<std::ofstream>(opts::GenerateFunctionOrderFile,
433 std::ios::out);
434 if (!FuncsFile) {
435 errs() << "BOLT-ERROR: ordered functions file "
436 << opts::GenerateFunctionOrderFile << " cannot be opened\n";
437 exit(1);
441 std::unique_ptr<std::ofstream> LinkSectionsFile;
442 if (!opts::LinkSectionsFile.empty()) {
443 LinkSectionsFile =
444 std::make_unique<std::ofstream>(opts::LinkSectionsFile, std::ios::out);
445 if (!LinkSectionsFile) {
446 errs() << "BOLT-ERROR: link sections file " << opts::LinkSectionsFile
447 << " cannot be opened\n";
448 exit(1);
452 if (FuncsFile || LinkSectionsFile) {
453 std::vector<BinaryFunction *> SortedFunctions(BFs.size());
454 llvm::transform(llvm::make_second_range(BFs), SortedFunctions.begin(),
455 [](BinaryFunction &BF) { return &BF; });
457 // Sort functions by index.
458 llvm::stable_sort(SortedFunctions,
459 [](const BinaryFunction *A, const BinaryFunction *B) {
460 if (A->hasValidIndex() && B->hasValidIndex())
461 return A->getIndex() < B->getIndex();
462 if (A->hasValidIndex() && !B->hasValidIndex())
463 return true;
464 if (!A->hasValidIndex() && B->hasValidIndex())
465 return false;
466 return A->getAddress() < B->getAddress();
469 for (const BinaryFunction *Func : SortedFunctions) {
470 if (!Func->hasValidIndex())
471 break;
472 if (Func->isPLTFunction())
473 continue;
475 if (FuncsFile)
476 *FuncsFile << Func->getOneName().str() << '\n';
478 if (LinkSectionsFile) {
479 const char *Indent = "";
480 std::vector<StringRef> AllNames = Func->getNames();
481 llvm::sort(AllNames);
482 for (StringRef Name : AllNames) {
483 const size_t SlashPos = Name.find('/');
484 if (SlashPos != std::string::npos) {
485 // Avoid duplicates for local functions.
486 if (Name.find('/', SlashPos + 1) != std::string::npos)
487 continue;
488 Name = Name.substr(0, SlashPos);
490 *LinkSectionsFile << Indent << ".text." << Name.str() << '\n';
491 Indent = " ";
496 if (FuncsFile) {
497 FuncsFile->close();
498 outs() << "BOLT-INFO: dumped function order to "
499 << opts::GenerateFunctionOrderFile << '\n';
502 if (LinkSectionsFile) {
503 LinkSectionsFile->close();
504 outs() << "BOLT-INFO: dumped linker section order to "
505 << opts::LinkSectionsFile << '\n';
510 } // namespace bolt
511 } // namespace llvm