1 //===- BPSectionOrderer.cpp--------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "BPSectionOrderer.h"
10 #include "InputSection.h"
11 #include "lld/Common/ErrorHandler.h"
12 #include "llvm/ADT/DenseMap.h"
13 #include "llvm/ADT/StringMap.h"
14 #include "llvm/ProfileData/InstrProfReader.h"
15 #include "llvm/Support/BalancedPartitioning.h"
16 #include "llvm/Support/TimeProfiler.h"
17 #include "llvm/Support/VirtualFileSystem.h"
18 #include "llvm/Support/xxhash.h"
20 #define DEBUG_TYPE "bp-section-orderer"
22 using namespace lld::macho
;
24 using UtilityNodes
= SmallVector
<BPFunctionNode::UtilityNodeT
>;
26 /// Symbols can be appended with "(.__uniq.xxxx)?.llvm.yyyy" where "xxxx" and
27 /// "yyyy" are numbers that could change between builds. We need to use the root
28 /// symbol name before this suffix so these symbols can be matched with profiles
29 /// which may have different suffixes.
30 static StringRef
getRootSymbol(StringRef Name
) {
31 auto [P0
, S0
] = Name
.rsplit(".llvm.");
32 auto [P1
, S1
] = P0
.rsplit(".__uniq.");
36 static uint64_t getRelocHash(StringRef kind
, uint64_t sectionIdx
,
37 uint64_t offset
, uint64_t addend
) {
38 return xxHash64((kind
+ ": " + Twine::utohexstr(sectionIdx
) + " + " +
39 Twine::utohexstr(offset
) + " + " + Twine::utohexstr(addend
))
44 getRelocHash(const Reloc
&reloc
,
45 const DenseMap
<const InputSection
*, uint64_t> §ionToIdx
) {
46 auto *isec
= reloc
.getReferentInputSection();
47 std::optional
<uint64_t> sectionIdx
;
48 auto sectionIdxIt
= sectionToIdx
.find(isec
);
49 if (sectionIdxIt
!= sectionToIdx
.end())
50 sectionIdx
= sectionIdxIt
->getSecond();
53 kind
= ("Section " + Twine(static_cast<uint8_t>(isec
->kind()))).str();
54 if (auto *sym
= reloc
.referent
.dyn_cast
<Symbol
*>()) {
55 kind
+= (" Symbol " + Twine(static_cast<uint8_t>(sym
->kind()))).str();
56 if (auto *d
= dyn_cast
<Defined
>(sym
))
57 return getRelocHash(kind
, sectionIdx
.value_or(0), d
->value
, reloc
.addend
);
59 return getRelocHash(kind
, sectionIdx
.value_or(0), 0, reloc
.addend
);
62 /// Given \p sectionIdxs, a list of section indexes, return a list of utility
63 /// nodes for each section index. If \p duplicateSectionIdx is provided,
64 /// populate it with nearly identical sections. Increment \p maxUN to be the
65 /// largest utility node we have used so far.
66 static SmallVector
<std::pair
<unsigned, UtilityNodes
>> getUnsForCompression(
67 ArrayRef
<const InputSection
*> sections
,
68 const DenseMap
<const InputSection
*, uint64_t> §ionToIdx
,
69 ArrayRef
<unsigned> sectionIdxs
,
70 DenseMap
<unsigned, SmallVector
<unsigned>> *duplicateSectionIdxs
,
71 BPFunctionNode::UtilityNodeT
&maxUN
) {
72 TimeTraceScope
timeScope("Build nodes for compression");
74 SmallVector
<std::pair
<unsigned, SmallVector
<uint64_t>>> sectionHashes
;
75 sectionHashes
.reserve(sectionIdxs
.size());
76 SmallVector
<uint64_t> hashes
;
77 for (unsigned sectionIdx
: sectionIdxs
) {
78 const auto *isec
= sections
[sectionIdx
];
79 constexpr unsigned windowSize
= 4;
81 for (size_t i
= 0; i
< isec
->data
.size(); i
++) {
82 auto window
= isec
->data
.drop_front(i
).take_front(windowSize
);
83 hashes
.push_back(xxHash64(window
));
85 for (const auto &r
: isec
->relocs
) {
86 if (r
.length
== 0 || r
.referent
.isNull() || r
.offset
>= isec
->data
.size())
88 uint64_t relocHash
= getRelocHash(r
, sectionToIdx
);
89 uint32_t start
= (r
.offset
< windowSize
) ? 0 : r
.offset
- windowSize
+ 1;
90 for (uint32_t i
= start
; i
< r
.offset
+ r
.length
; i
++) {
91 auto window
= isec
->data
.drop_front(i
).take_front(windowSize
);
92 hashes
.push_back(xxHash64(window
) + relocHash
);
97 hashes
.erase(std::unique(hashes
.begin(), hashes
.end()), hashes
.end());
99 sectionHashes
.emplace_back(sectionIdx
, hashes
);
103 DenseMap
<uint64_t, unsigned> hashFrequency
;
104 for (auto &[sectionIdx
, hashes
] : sectionHashes
)
105 for (auto hash
: hashes
)
106 ++hashFrequency
[hash
];
108 if (duplicateSectionIdxs
) {
109 // Merge section that are nearly identical
110 SmallVector
<std::pair
<unsigned, SmallVector
<uint64_t>>> newSectionHashes
;
111 DenseMap
<uint64_t, unsigned> wholeHashToSectionIdx
;
112 for (auto &[sectionIdx
, hashes
] : sectionHashes
) {
113 uint64_t wholeHash
= 0;
114 for (auto hash
: hashes
)
115 if (hashFrequency
[hash
] > 5)
117 auto [it
, wasInserted
] =
118 wholeHashToSectionIdx
.insert(std::make_pair(wholeHash
, sectionIdx
));
120 newSectionHashes
.emplace_back(sectionIdx
, hashes
);
122 (*duplicateSectionIdxs
)[it
->getSecond()].push_back(sectionIdx
);
125 sectionHashes
= newSectionHashes
;
127 // Recompute hash frequencies
128 hashFrequency
.clear();
129 for (auto &[sectionIdx
, hashes
] : sectionHashes
)
130 for (auto hash
: hashes
)
131 ++hashFrequency
[hash
];
134 // Filter rare and common hashes and assign each a unique utility node that
135 // doesn't conflict with the trace utility nodes
136 DenseMap
<uint64_t, BPFunctionNode::UtilityNodeT
> hashToUN
;
137 for (auto &[hash
, frequency
] : hashFrequency
) {
138 if (frequency
<= 1 || frequency
* 2 > sectionHashes
.size())
140 hashToUN
[hash
] = ++maxUN
;
143 SmallVector
<std::pair
<unsigned, UtilityNodes
>> sectionUns
;
144 for (auto &[sectionIdx
, hashes
] : sectionHashes
) {
146 for (auto &hash
: hashes
) {
147 auto it
= hashToUN
.find(hash
);
148 if (it
!= hashToUN
.end())
149 uns
.push_back(it
->second
);
151 sectionUns
.emplace_back(sectionIdx
, uns
);
156 DenseMap
<const InputSection
*, size_t> lld::macho::runBalancedPartitioning(
157 size_t &highestAvailablePriority
, StringRef profilePath
,
158 bool forFunctionCompression
, bool forDataCompression
,
159 bool compressionSortStartupFunctions
, bool verbose
) {
161 SmallVector
<const InputSection
*> sections
;
162 DenseMap
<const InputSection
*, uint64_t> sectionToIdx
;
163 StringMap
<DenseSet
<unsigned>> symbolToSectionIdxs
;
164 for (const auto *file
: inputFiles
) {
165 for (auto *sec
: file
->sections
) {
166 for (auto &subsec
: sec
->subsections
) {
167 auto *isec
= subsec
.isec
;
168 if (!isec
|| isec
->data
.empty() || !isec
->data
.data())
170 unsigned sectionIdx
= sections
.size();
171 sectionToIdx
.try_emplace(isec
, sectionIdx
);
172 sections
.push_back(isec
);
173 for (Symbol
*sym
: isec
->symbols
)
174 if (auto *d
= dyn_cast_or_null
<Defined
>(sym
))
175 symbolToSectionIdxs
[d
->getName()].insert(sectionIdx
);
180 StringMap
<DenseSet
<unsigned>> rootSymbolToSectionIdxs
;
181 for (auto &entry
: symbolToSectionIdxs
) {
182 StringRef name
= entry
.getKey();
183 auto §ionIdxs
= entry
.getValue();
184 name
= getRootSymbol(name
);
185 rootSymbolToSectionIdxs
[name
].insert(sectionIdxs
.begin(),
187 // Linkage names can be prefixed with "_" or "l_" on Mach-O. See
188 // Mangler::getNameWithPrefix() for details.
189 if (name
.consume_front("_") || name
.consume_front("l_"))
190 rootSymbolToSectionIdxs
[name
].insert(sectionIdxs
.begin(),
194 BPFunctionNode::UtilityNodeT maxUN
= 0;
195 DenseMap
<unsigned, UtilityNodes
> startupSectionIdxUNs
;
196 // Used to define the initial order for startup functions.
197 DenseMap
<unsigned, size_t> sectionIdxToTimestamp
;
198 std::unique_ptr
<InstrProfReader
> reader
;
199 if (!profilePath
.empty()) {
200 auto fs
= vfs::getRealFileSystem();
201 auto readerOrErr
= InstrProfReader::create(profilePath
, *fs
);
202 lld::checkError(readerOrErr
.takeError());
204 reader
= std::move(readerOrErr
.get());
205 for (auto &entry
: *reader
) {
209 auto &traces
= reader
->getTemporalProfTraces();
211 DenseMap
<unsigned, BPFunctionNode::UtilityNodeT
> sectionIdxToFirstUN
;
212 for (size_t traceIdx
= 0; traceIdx
< traces
.size(); traceIdx
++) {
213 uint64_t currentSize
= 0, cutoffSize
= 1;
214 size_t cutoffTimestamp
= 1;
215 auto &trace
= traces
[traceIdx
].FunctionNameRefs
;
216 for (size_t timestamp
= 0; timestamp
< trace
.size(); timestamp
++) {
217 auto [Filename
, ParsedFuncName
] = getParsedIRPGOName(
218 reader
->getSymtab().getFuncOrVarName(trace
[timestamp
]));
219 ParsedFuncName
= getRootSymbol(ParsedFuncName
);
221 auto sectionIdxsIt
= rootSymbolToSectionIdxs
.find(ParsedFuncName
);
222 if (sectionIdxsIt
== rootSymbolToSectionIdxs
.end())
224 auto §ionIdxs
= sectionIdxsIt
->getValue();
225 // If the same symbol is found in multiple sections, they might be
226 // identical, so we arbitrarily use the size from the first section.
227 currentSize
+= sections
[*sectionIdxs
.begin()]->getSize();
229 // Since BalancedPartitioning is sensitive to the initial order, we need
230 // to explicitly define it to be ordered by earliest timestamp.
231 for (unsigned sectionIdx
: sectionIdxs
) {
232 auto [it
, wasInserted
] =
233 sectionIdxToTimestamp
.try_emplace(sectionIdx
, timestamp
);
235 it
->getSecond() = std::min
<size_t>(it
->getSecond(), timestamp
);
238 if (timestamp
>= cutoffTimestamp
|| currentSize
>= cutoffSize
) {
240 cutoffSize
= 2 * currentSize
;
241 cutoffTimestamp
= 2 * cutoffTimestamp
;
243 for (unsigned sectionIdx
: sectionIdxs
)
244 sectionIdxToFirstUN
.try_emplace(sectionIdx
, maxUN
);
246 for (auto &[sectionIdx
, firstUN
] : sectionIdxToFirstUN
)
247 for (auto un
= firstUN
; un
<= maxUN
; ++un
)
248 startupSectionIdxUNs
[sectionIdx
].push_back(un
);
250 sectionIdxToFirstUN
.clear();
254 SmallVector
<unsigned> sectionIdxsForFunctionCompression
,
255 sectionIdxsForDataCompression
;
256 for (unsigned sectionIdx
= 0; sectionIdx
< sections
.size(); sectionIdx
++) {
257 if (startupSectionIdxUNs
.count(sectionIdx
))
259 const auto *isec
= sections
[sectionIdx
];
260 if (isCodeSection(isec
)) {
261 if (forFunctionCompression
)
262 sectionIdxsForFunctionCompression
.push_back(sectionIdx
);
264 if (forDataCompression
)
265 sectionIdxsForDataCompression
.push_back(sectionIdx
);
269 if (compressionSortStartupFunctions
) {
270 SmallVector
<unsigned> startupIdxs
;
271 for (auto &[sectionIdx
, uns
] : startupSectionIdxUNs
)
272 startupIdxs
.push_back(sectionIdx
);
273 auto unsForStartupFunctionCompression
=
274 getUnsForCompression(sections
, sectionToIdx
, startupIdxs
,
275 /*duplicateSectionIdxs=*/nullptr, maxUN
);
276 for (auto &[sectionIdx
, compressionUns
] :
277 unsForStartupFunctionCompression
) {
278 auto &uns
= startupSectionIdxUNs
[sectionIdx
];
279 uns
.append(compressionUns
);
281 uns
.erase(std::unique(uns
.begin(), uns
.end()), uns
.end());
285 // Map a section index (order directly) to a list of duplicate section indices
286 // (not ordered directly).
287 DenseMap
<unsigned, SmallVector
<unsigned>> duplicateSectionIdxs
;
288 auto unsForFunctionCompression
= getUnsForCompression(
289 sections
, sectionToIdx
, sectionIdxsForFunctionCompression
,
290 &duplicateSectionIdxs
, maxUN
);
291 auto unsForDataCompression
= getUnsForCompression(
292 sections
, sectionToIdx
, sectionIdxsForDataCompression
,
293 &duplicateSectionIdxs
, maxUN
);
295 std::vector
<BPFunctionNode
> nodesForStartup
, nodesForFunctionCompression
,
296 nodesForDataCompression
;
297 for (auto &[sectionIdx
, uns
] : startupSectionIdxUNs
)
298 nodesForStartup
.emplace_back(sectionIdx
, uns
);
299 for (auto &[sectionIdx
, uns
] : unsForFunctionCompression
)
300 nodesForFunctionCompression
.emplace_back(sectionIdx
, uns
);
301 for (auto &[sectionIdx
, uns
] : unsForDataCompression
)
302 nodesForDataCompression
.emplace_back(sectionIdx
, uns
);
304 // Use the first timestamp to define the initial order for startup nodes.
305 llvm::sort(nodesForStartup
, [§ionIdxToTimestamp
](auto &L
, auto &R
) {
306 return std::make_pair(sectionIdxToTimestamp
[L
.Id
], L
.Id
) <
307 std::make_pair(sectionIdxToTimestamp
[R
.Id
], R
.Id
);
309 // Sort compression nodes by their Id (which is the section index) because the
310 // input linker order tends to be not bad.
311 llvm::sort(nodesForFunctionCompression
,
312 [](auto &L
, auto &R
) { return L
.Id
< R
.Id
; });
313 llvm::sort(nodesForDataCompression
,
314 [](auto &L
, auto &R
) { return L
.Id
< R
.Id
; });
317 TimeTraceScope
timeScope("Balanced Partitioning");
318 BalancedPartitioningConfig config
;
319 BalancedPartitioning
bp(config
);
320 bp
.run(nodesForStartup
);
321 bp
.run(nodesForFunctionCompression
);
322 bp
.run(nodesForDataCompression
);
325 unsigned numStartupSections
= 0;
326 unsigned numCodeCompressionSections
= 0;
327 unsigned numDuplicateCodeSections
= 0;
328 unsigned numDataCompressionSections
= 0;
329 unsigned numDuplicateDataSections
= 0;
330 SetVector
<const InputSection
*> orderedSections
;
331 // Order startup functions,
332 for (auto &node
: nodesForStartup
) {
333 const auto *isec
= sections
[node
.Id
];
334 if (orderedSections
.insert(isec
))
335 ++numStartupSections
;
337 // then functions for compression,
338 for (auto &node
: nodesForFunctionCompression
) {
339 const auto *isec
= sections
[node
.Id
];
340 if (orderedSections
.insert(isec
))
341 ++numCodeCompressionSections
;
343 auto It
= duplicateSectionIdxs
.find(node
.Id
);
344 if (It
== duplicateSectionIdxs
.end())
346 for (auto dupSecIdx
: It
->getSecond()) {
347 const auto *dupIsec
= sections
[dupSecIdx
];
348 if (orderedSections
.insert(dupIsec
))
349 ++numDuplicateCodeSections
;
352 // then data for compression.
353 for (auto &node
: nodesForDataCompression
) {
354 const auto *isec
= sections
[node
.Id
];
355 if (orderedSections
.insert(isec
))
356 ++numDataCompressionSections
;
357 auto It
= duplicateSectionIdxs
.find(node
.Id
);
358 if (It
== duplicateSectionIdxs
.end())
360 for (auto dupSecIdx
: It
->getSecond()) {
361 const auto *dupIsec
= sections
[dupSecIdx
];
362 if (orderedSections
.insert(dupIsec
))
363 ++numDuplicateDataSections
;
368 unsigned numTotalOrderedSections
=
369 numStartupSections
+ numCodeCompressionSections
+
370 numDuplicateCodeSections
+ numDataCompressionSections
+
371 numDuplicateDataSections
;
373 << "Ordered " << numTotalOrderedSections
374 << " sections using balanced partitioning:\n Functions for startup: "
375 << numStartupSections
376 << "\n Functions for compression: " << numCodeCompressionSections
377 << "\n Duplicate functions: " << numDuplicateCodeSections
378 << "\n Data for compression: " << numDataCompressionSections
379 << "\n Duplicate data: " << numDuplicateDataSections
<< "\n";
381 if (!profilePath
.empty()) {
382 // Evaluate this function order for startup
383 StringMap
<std::pair
<uint64_t, uint64_t>> symbolToPageNumbers
;
384 const uint64_t pageSize
= (1 << 14);
385 uint64_t currentAddress
= 0;
386 for (const auto *isec
: orderedSections
) {
387 for (Symbol
*sym
: isec
->symbols
) {
388 if (auto *d
= dyn_cast_or_null
<Defined
>(sym
)) {
389 uint64_t startAddress
= currentAddress
+ d
->value
;
390 uint64_t endAddress
= startAddress
+ d
->size
;
391 uint64_t firstPage
= startAddress
/ pageSize
;
392 // I think the kernel might pull in a few pages when one it touched,
393 // so it might be more accurate to force lastPage to be aligned by
395 uint64_t lastPage
= endAddress
/ pageSize
;
396 StringRef rootSymbol
= d
->getName();
397 rootSymbol
= getRootSymbol(rootSymbol
);
398 symbolToPageNumbers
.try_emplace(rootSymbol
, firstPage
, lastPage
);
399 if (rootSymbol
.consume_front("_") || rootSymbol
.consume_front("l_"))
400 symbolToPageNumbers
.try_emplace(rootSymbol
, firstPage
, lastPage
);
404 currentAddress
+= isec
->getSize();
407 // The area under the curve F where F(t) is the total number of page
410 for (auto &trace
: reader
->getTemporalProfTraces()) {
411 SmallSet
<uint64_t, 0> touchedPages
;
412 for (unsigned step
= 0; step
< trace
.FunctionNameRefs
.size(); step
++) {
413 auto traceId
= trace
.FunctionNameRefs
[step
];
414 auto [Filename
, ParsedFuncName
] =
415 getParsedIRPGOName(reader
->getSymtab().getFuncOrVarName(traceId
));
416 ParsedFuncName
= getRootSymbol(ParsedFuncName
);
417 auto it
= symbolToPageNumbers
.find(ParsedFuncName
);
418 if (it
!= symbolToPageNumbers
.end()) {
419 auto &[firstPage
, lastPage
] = it
->getValue();
420 for (uint64_t i
= firstPage
; i
<= lastPage
; i
++)
421 touchedPages
.insert(i
);
423 area
+= touchedPages
.size();
426 dbgs() << "Total area under the page fault curve: " << (float)area
431 DenseMap
<const InputSection
*, size_t> sectionPriorities
;
432 for (const auto *isec
: orderedSections
)
433 sectionPriorities
[isec
] = --highestAvailablePriority
;
434 return sectionPriorities
;