Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / bolt / lib / Passes / ReorderData.cpp
blob6e1f9b6d77512e12572ad2a6198bb261de95ad81
1 //===- bolt/Passes/ReorderSection.cpp - Reordering of section data --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements ReorderData class.
11 //===----------------------------------------------------------------------===//
13 // TODO:
14 // - make sure writeable data isn't put on same cache line unless temporally
15 // local
16 // - estimate temporal locality by looking at CFG?
18 #include "bolt/Passes/ReorderData.h"
19 #include "llvm/ADT/MapVector.h"
20 #include <algorithm>
22 #undef DEBUG_TYPE
23 #define DEBUG_TYPE "reorder-data"
25 using namespace llvm;
26 using namespace bolt;
28 namespace opts {
29 extern cl::OptionCategory BoltCategory;
30 extern cl::OptionCategory BoltOptCategory;
31 extern cl::opt<JumpTableSupportLevel> JumpTables;
33 static cl::opt<bool>
34 PrintReorderedData("print-reordered-data",
35 cl::desc("print section contents after reordering"),
36 cl::Hidden, cl::cat(BoltCategory));
38 cl::list<std::string>
39 ReorderData("reorder-data",
40 cl::CommaSeparated,
41 cl::desc("list of sections to reorder"),
42 cl::value_desc("section1,section2,section3,..."),
43 cl::cat(BoltOptCategory));
45 enum ReorderAlgo : char {
46 REORDER_COUNT = 0,
47 REORDER_FUNCS = 1
50 static cl::opt<ReorderAlgo>
51 ReorderAlgorithm("reorder-data-algo",
52 cl::desc("algorithm used to reorder data sections"),
53 cl::init(REORDER_COUNT),
54 cl::values(
55 clEnumValN(REORDER_COUNT,
56 "count",
57 "sort hot data by read counts"),
58 clEnumValN(REORDER_FUNCS,
59 "funcs",
60 "sort hot data by hot function usage and count")),
61 cl::ZeroOrMore,
62 cl::cat(BoltOptCategory));
64 static cl::opt<unsigned>
65 ReorderDataMaxSymbols("reorder-data-max-symbols",
66 cl::desc("maximum number of symbols to reorder"),
67 cl::init(std::numeric_limits<unsigned>::max()),
68 cl::cat(BoltOptCategory));
70 static cl::opt<unsigned> ReorderDataMaxBytes(
71 "reorder-data-max-bytes", cl::desc("maximum number of bytes to reorder"),
72 cl::init(std::numeric_limits<unsigned>::max()), cl::cat(BoltOptCategory));
74 static cl::list<std::string>
75 ReorderSymbols("reorder-symbols",
76 cl::CommaSeparated,
77 cl::desc("list of symbol names that can be reordered"),
78 cl::value_desc("symbol1,symbol2,symbol3,..."),
79 cl::Hidden,
80 cl::cat(BoltCategory));
82 static cl::list<std::string>
83 SkipSymbols("reorder-skip-symbols",
84 cl::CommaSeparated,
85 cl::desc("list of symbol names that cannot be reordered"),
86 cl::value_desc("symbol1,symbol2,symbol3,..."),
87 cl::Hidden,
88 cl::cat(BoltCategory));
90 static cl::opt<bool> ReorderInplace("reorder-data-inplace",
91 cl::desc("reorder data sections in place"),
93 cl::cat(BoltOptCategory));
96 namespace llvm {
97 namespace bolt {
99 namespace {
101 static constexpr uint16_t MinAlignment = 16;
103 bool isSupported(const BinarySection &BS) { return BS.isData() && !BS.isTLS(); }
105 bool filterSymbol(const BinaryData *BD) {
106 if (!BD->isAtomic() || BD->isJumpTable() || !BD->isMoveable())
107 return false;
109 bool IsValid = true;
111 if (!opts::ReorderSymbols.empty()) {
112 IsValid = llvm::any_of(opts::ReorderSymbols, [&](const std::string &Name) {
113 return BD->hasName(Name);
117 if (!IsValid)
118 return false;
120 if (!opts::SkipSymbols.empty()) {
121 for (const std::string &Name : opts::SkipSymbols) {
122 if (BD->hasName(Name)) {
123 IsValid = false;
124 break;
129 return IsValid;
132 } // namespace
134 using DataOrder = ReorderData::DataOrder;
136 void ReorderData::printOrder(const BinarySection &Section,
137 DataOrder::const_iterator Begin,
138 DataOrder::const_iterator End) const {
139 uint64_t TotalSize = 0;
140 bool PrintHeader = false;
141 while (Begin != End) {
142 const BinaryData *BD = Begin->first;
144 if (!PrintHeader) {
145 outs() << "BOLT-INFO: Hot global symbols for " << Section.getName()
146 << ":\n";
147 PrintHeader = true;
150 outs() << "BOLT-INFO: " << *BD << ", moveable=" << BD->isMoveable()
151 << format(", weight=%.5f\n", double(Begin->second) / BD->getSize());
153 TotalSize += BD->getSize();
154 ++Begin;
156 if (TotalSize)
157 outs() << "BOLT-INFO: Total hot symbol size = " << TotalSize << "\n";
160 DataOrder ReorderData::baseOrder(BinaryContext &BC,
161 const BinarySection &Section) const {
162 DataOrder Order;
163 for (auto &Entry : BC.getBinaryDataForSection(Section)) {
164 BinaryData *BD = Entry.second;
165 if (!BD->isAtomic()) // skip sub-symbols
166 continue;
167 auto BDCI = BinaryDataCounts.find(BD);
168 uint64_t BDCount = BDCI == BinaryDataCounts.end() ? 0 : BDCI->second;
169 Order.emplace_back(BD, BDCount);
171 return Order;
174 void ReorderData::assignMemData(BinaryContext &BC) {
175 // Map of sections (or heap/stack) to count/size.
176 MapVector<StringRef, uint64_t> Counts;
177 MapVector<StringRef, uint64_t> JumpTableCounts;
178 uint64_t TotalCount = 0;
179 for (auto &BFI : BC.getBinaryFunctions()) {
180 const BinaryFunction &BF = BFI.second;
181 if (!BF.hasMemoryProfile())
182 continue;
184 for (const BinaryBasicBlock &BB : BF) {
185 for (const MCInst &Inst : BB) {
186 auto ErrorOrMemAccessProfile =
187 BC.MIB->tryGetAnnotationAs<MemoryAccessProfile>(
188 Inst, "MemoryAccessProfile");
189 if (!ErrorOrMemAccessProfile)
190 continue;
192 const MemoryAccessProfile &MemAccessProfile =
193 ErrorOrMemAccessProfile.get();
194 for (const AddressAccess &AccessInfo :
195 MemAccessProfile.AddressAccessInfo) {
196 if (BinaryData *BD = AccessInfo.MemoryObject) {
197 BinaryDataCounts[BD->getAtomicRoot()] += AccessInfo.Count;
198 Counts[BD->getSectionName()] += AccessInfo.Count;
199 if (BD->getAtomicRoot()->isJumpTable())
200 JumpTableCounts[BD->getSectionName()] += AccessInfo.Count;
201 } else {
202 Counts["Heap/stack"] += AccessInfo.Count;
204 TotalCount += AccessInfo.Count;
210 if (!Counts.empty()) {
211 outs() << "BOLT-INFO: Memory stats breakdown:\n";
212 for (const auto &KV : Counts) {
213 StringRef Section = KV.first;
214 const uint64_t Count = KV.second;
215 outs() << "BOLT-INFO: " << Section << " = " << Count
216 << format(" (%.1f%%)\n", 100.0 * Count / TotalCount);
217 if (JumpTableCounts.count(Section) != 0) {
218 const uint64_t JTCount = JumpTableCounts[Section];
219 outs() << "BOLT-INFO: jump tables = " << JTCount
220 << format(" (%.1f%%)\n", 100.0 * JTCount / Count);
223 outs() << "BOLT-INFO: Total memory events: " << TotalCount << "\n";
227 /// Only consider moving data that is used by the hottest functions with
228 /// valid profiles.
229 std::pair<DataOrder, unsigned>
230 ReorderData::sortedByFunc(BinaryContext &BC, const BinarySection &Section,
231 std::map<uint64_t, BinaryFunction> &BFs) const {
232 std::map<BinaryData *, std::set<BinaryFunction *>> BDtoFunc;
233 std::map<BinaryData *, uint64_t> BDtoFuncCount;
235 auto dataUses = [&BC](const BinaryFunction &BF, bool OnlyHot) {
236 std::set<BinaryData *> Uses;
237 for (const BinaryBasicBlock &BB : BF) {
238 if (OnlyHot && BB.isCold())
239 continue;
241 for (const MCInst &Inst : BB) {
242 auto ErrorOrMemAccessProfile =
243 BC.MIB->tryGetAnnotationAs<MemoryAccessProfile>(
244 Inst, "MemoryAccessProfile");
245 if (!ErrorOrMemAccessProfile)
246 continue;
248 const MemoryAccessProfile &MemAccessProfile =
249 ErrorOrMemAccessProfile.get();
250 for (const AddressAccess &AccessInfo :
251 MemAccessProfile.AddressAccessInfo) {
252 if (AccessInfo.MemoryObject)
253 Uses.insert(AccessInfo.MemoryObject);
257 return Uses;
260 for (auto &Entry : BFs) {
261 BinaryFunction &BF = Entry.second;
262 if (BF.hasValidProfile()) {
263 for (BinaryData *BD : dataUses(BF, true)) {
264 if (!BC.getFunctionForSymbol(BD->getSymbol())) {
265 BDtoFunc[BD->getAtomicRoot()].insert(&BF);
266 BDtoFuncCount[BD->getAtomicRoot()] += BF.getKnownExecutionCount();
272 DataOrder Order = baseOrder(BC, Section);
273 unsigned SplitPoint = Order.size();
275 llvm::sort(
276 Order,
277 [&](const DataOrder::value_type &A, const DataOrder::value_type &B) {
278 // Total execution counts of functions referencing BD.
279 const uint64_t ACount = BDtoFuncCount[A.first];
280 const uint64_t BCount = BDtoFuncCount[B.first];
281 // Weight by number of loads/data size.
282 const double AWeight = double(A.second) / A.first->getSize();
283 const double BWeight = double(B.second) / B.first->getSize();
284 return (ACount > BCount ||
285 (ACount == BCount &&
286 (AWeight > BWeight ||
287 (AWeight == BWeight &&
288 A.first->getAddress() < B.first->getAddress()))));
291 for (unsigned Idx = 0; Idx < Order.size(); ++Idx) {
292 if (!BDtoFuncCount[Order[Idx].first]) {
293 SplitPoint = Idx;
294 break;
298 return std::make_pair(Order, SplitPoint);
301 std::pair<DataOrder, unsigned>
302 ReorderData::sortedByCount(BinaryContext &BC,
303 const BinarySection &Section) const {
304 DataOrder Order = baseOrder(BC, Section);
305 unsigned SplitPoint = Order.size();
307 llvm::sort(Order, [](const DataOrder::value_type &A,
308 const DataOrder::value_type &B) {
309 // Weight by number of loads/data size.
310 const double AWeight = double(A.second) / A.first->getSize();
311 const double BWeight = double(B.second) / B.first->getSize();
312 return (AWeight > BWeight ||
313 (AWeight == BWeight &&
314 (A.first->getSize() < B.first->getSize() ||
315 (A.first->getSize() == B.first->getSize() &&
316 A.first->getAddress() < B.first->getAddress()))));
319 for (unsigned Idx = 0; Idx < Order.size(); ++Idx) {
320 if (!Order[Idx].second) {
321 SplitPoint = Idx;
322 break;
326 return std::make_pair(Order, SplitPoint);
329 // TODO
330 // add option for cache-line alignment (or just use cache-line when section
331 // is writeable)?
332 void ReorderData::setSectionOrder(BinaryContext &BC,
333 BinarySection &OutputSection,
334 DataOrder::iterator Begin,
335 DataOrder::iterator End) {
336 std::vector<BinaryData *> NewOrder;
337 unsigned NumReordered = 0;
338 uint64_t Offset = 0;
339 uint64_t Count = 0;
341 // Get the total count just for stats
342 uint64_t TotalCount = 0;
343 for (auto Itr = Begin; Itr != End; ++Itr)
344 TotalCount += Itr->second;
346 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: setSectionOrder for "
347 << OutputSection.getName() << "\n");
349 for (; Begin != End; ++Begin) {
350 BinaryData *BD = Begin->first;
352 // We can't move certain symbols.
353 if (!filterSymbol(BD))
354 continue;
356 ++NumReordered;
357 if (NumReordered > opts::ReorderDataMaxSymbols) {
358 if (!NewOrder.empty())
359 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: processing ending on symbol "
360 << *NewOrder.back() << "\n");
361 break;
364 uint16_t Alignment = std::max(BD->getAlignment(), MinAlignment);
365 Offset = alignTo(Offset, Alignment);
367 if ((Offset + BD->getSize()) > opts::ReorderDataMaxBytes) {
368 if (!NewOrder.empty())
369 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: processing ending on symbol "
370 << *NewOrder.back() << "\n");
371 break;
374 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: " << BD->getName() << " @ 0x"
375 << Twine::utohexstr(Offset) << "\n");
377 BD->setOutputLocation(OutputSection, Offset);
379 // reorder sub-symbols
380 for (std::pair<const uint64_t, BinaryData *> &SubBD :
381 BC.getSubBinaryData(BD)) {
382 if (!SubBD.second->isJumpTable()) {
383 uint64_t SubOffset =
384 Offset + SubBD.second->getAddress() - BD->getAddress();
385 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: SubBD " << SubBD.second->getName()
386 << " @ " << SubOffset << "\n");
387 SubBD.second->setOutputLocation(OutputSection, SubOffset);
391 Offset += BD->getSize();
392 Count += Begin->second;
393 NewOrder.push_back(BD);
396 OutputSection.reorderContents(NewOrder, opts::ReorderInplace);
398 outs() << "BOLT-INFO: reorder-data: " << Count << "/" << TotalCount
399 << format(" (%.1f%%)", 100.0 * Count / TotalCount) << " events, "
400 << Offset << " hot bytes\n";
403 bool ReorderData::markUnmoveableSymbols(BinaryContext &BC,
404 BinarySection &Section) const {
405 // Private symbols currently can't be moved because data can "leak" across
406 // the boundary of one symbol to the next, e.g. a string that has a common
407 // suffix might start in one private symbol and end with the common
408 // suffix in another.
409 auto isPrivate = [&](const BinaryData *BD) {
410 auto Prefix = std::string("PG") + BC.AsmInfo->getPrivateGlobalPrefix();
411 return BD->getName().startswith(Prefix.str());
413 auto Range = BC.getBinaryDataForSection(Section);
414 bool FoundUnmoveable = false;
415 for (auto Itr = Range.begin(); Itr != Range.end(); ++Itr) {
416 BinaryData *Next =
417 std::next(Itr) != Range.end() ? std::next(Itr)->second : nullptr;
418 if (Itr->second->getName().startswith("PG.")) {
419 BinaryData *Prev =
420 Itr != Range.begin() ? std::prev(Itr)->second : nullptr;
421 bool PrevIsPrivate = Prev && isPrivate(Prev);
422 bool NextIsPrivate = Next && isPrivate(Next);
423 if (isPrivate(Itr->second) && (PrevIsPrivate || NextIsPrivate))
424 Itr->second->setIsMoveable(false);
425 } else {
426 // check for overlapping symbols.
427 if (Next && Itr->second->getEndAddress() != Next->getAddress() &&
428 Next->containsAddress(Itr->second->getEndAddress())) {
429 Itr->second->setIsMoveable(false);
430 Next->setIsMoveable(false);
433 FoundUnmoveable |= !Itr->second->isMoveable();
435 return FoundUnmoveable;
438 void ReorderData::runOnFunctions(BinaryContext &BC) {
439 static const char *DefaultSections[] = {".rodata", ".data", ".bss", nullptr};
441 if (!BC.HasRelocations || opts::ReorderData.empty())
442 return;
444 // For now
445 if (opts::JumpTables > JTS_BASIC) {
446 outs() << "BOLT-WARNING: jump table support must be basic for "
447 << "data reordering to work.\n";
448 return;
451 assignMemData(BC);
453 std::vector<BinarySection *> Sections;
455 for (const std::string &SectionName : opts::ReorderData) {
456 if (SectionName == "default") {
457 for (unsigned I = 0; DefaultSections[I]; ++I)
458 if (ErrorOr<BinarySection &> Section =
459 BC.getUniqueSectionByName(DefaultSections[I]))
460 Sections.push_back(&*Section);
461 continue;
464 ErrorOr<BinarySection &> Section = BC.getUniqueSectionByName(SectionName);
465 if (!Section) {
466 outs() << "BOLT-WARNING: Section " << SectionName
467 << " not found, skipping.\n";
468 continue;
471 if (!isSupported(*Section)) {
472 outs() << "BOLT-ERROR: Section " << SectionName << " not supported.\n";
473 exit(1);
476 Sections.push_back(&*Section);
479 for (BinarySection *Section : Sections) {
480 const bool FoundUnmoveable = markUnmoveableSymbols(BC, *Section);
482 DataOrder Order;
483 unsigned SplitPointIdx;
485 if (opts::ReorderAlgorithm == opts::ReorderAlgo::REORDER_COUNT) {
486 outs() << "BOLT-INFO: reorder-sections: ordering data by count\n";
487 std::tie(Order, SplitPointIdx) = sortedByCount(BC, *Section);
488 } else {
489 outs() << "BOLT-INFO: reorder-sections: ordering data by funcs\n";
490 std::tie(Order, SplitPointIdx) =
491 sortedByFunc(BC, *Section, BC.getBinaryFunctions());
493 auto SplitPoint = Order.begin() + SplitPointIdx;
495 if (opts::PrintReorderedData)
496 printOrder(*Section, Order.begin(), SplitPoint);
498 if (!opts::ReorderInplace || FoundUnmoveable) {
499 if (opts::ReorderInplace && FoundUnmoveable)
500 outs() << "BOLT-INFO: Found unmoveable symbols in "
501 << Section->getName() << " falling back to splitting "
502 << "instead of in-place reordering.\n";
504 // Rename sections.
505 BinarySection &Hot =
506 BC.registerSection(Section->getName() + ".hot", *Section);
507 Hot.setOutputName(Section->getName());
508 Section->setOutputName(".bolt.org" + Section->getName());
510 // Reorder contents of original section.
511 setSectionOrder(BC, Hot, Order.begin(), SplitPoint);
513 // This keeps the original data from thinking it has been moved.
514 for (std::pair<const uint64_t, BinaryData *> &Entry :
515 BC.getBinaryDataForSection(*Section)) {
516 if (!Entry.second->isMoved()) {
517 Entry.second->setSection(*Section);
518 Entry.second->setOutputSection(*Section);
521 } else {
522 outs() << "BOLT-WARNING: Inplace section reordering not supported yet.\n";
523 setSectionOrder(BC, *Section, Order.begin(), Order.end());
528 } // namespace bolt
529 } // namespace llvm