1 //===- SectionPriorities.cpp ----------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// This is based on the ELF port, see ELF/CallGraphSort.cpp for the details
10 /// about the algorithm.
12 //===----------------------------------------------------------------------===//
14 #include "SectionPriorities.h"
15 #include "BPSectionOrderer.h"
17 #include "InputFiles.h"
21 #include "lld/Common/Args.h"
22 #include "lld/Common/CommonLinkerContext.h"
23 #include "lld/Common/ErrorHandler.h"
24 #include "llvm/ADT/DenseMap.h"
25 #include "llvm/ADT/MapVector.h"
26 #include "llvm/Support/Path.h"
27 #include "llvm/Support/TimeProfiler.h"
28 #include "llvm/Support/raw_ostream.h"
33 using namespace llvm::MachO
;
34 using namespace llvm::sys
;
36 using namespace lld::macho
;
38 PriorityBuilder
macho::priorityBuilder
;
42 size_t highestAvailablePriority
= std::numeric_limits
<size_t>::max();
50 Cluster(int sec
, size_t s
) : next(sec
), prev(sec
), size(s
) {}
52 double getDensity() const {
55 return double(weight
) / double(size
);
62 uint64_t initialWeight
= 0;
63 Edge bestPred
= {-1, 0};
68 CallGraphSort(const MapVector
<SectionPair
, uint64_t> &profile
);
70 DenseMap
<const InputSection
*, size_t> run();
73 std::vector
<Cluster
> clusters
;
74 std::vector
<const InputSection
*> sections
;
76 // Maximum amount the combined cluster density can be worse than the original
77 // cluster to consider merging.
78 constexpr int MAX_DENSITY_DEGRADATION
= 8;
79 } // end anonymous namespace
81 // Take the edge list in callGraphProfile, resolve symbol names to Symbols, and
82 // generate a graph between InputSections with the provided weights.
83 CallGraphSort::CallGraphSort(const MapVector
<SectionPair
, uint64_t> &profile
) {
84 DenseMap
<const InputSection
*, int> secToCluster
;
86 auto getOrCreateCluster
= [&](const InputSection
*isec
) -> int {
87 auto res
= secToCluster
.try_emplace(isec
, clusters
.size());
89 sections
.push_back(isec
);
90 clusters
.emplace_back(clusters
.size(), isec
->getSize());
92 return res
.first
->second
;
96 for (const std::pair
<SectionPair
, uint64_t> &c
: profile
) {
97 const auto fromSec
= c
.first
.first
->canonical();
98 const auto toSec
= c
.first
.second
->canonical();
99 uint64_t weight
= c
.second
;
100 // Ignore edges between input sections belonging to different output
101 // sections. This is done because otherwise we would end up with clusters
102 // containing input sections that can't actually be placed adjacently in the
103 // output. This messes with the cluster size and density calculations. We
104 // would also end up moving input sections in other output sections without
105 // moving them closer to what calls them.
106 if (fromSec
->parent
!= toSec
->parent
)
109 int from
= getOrCreateCluster(fromSec
);
110 int to
= getOrCreateCluster(toSec
);
112 clusters
[to
].weight
+= weight
;
117 // Remember the best edge.
118 Cluster
&toC
= clusters
[to
];
119 if (toC
.bestPred
.from
== -1 || toC
.bestPred
.weight
< weight
) {
120 toC
.bestPred
.from
= from
;
121 toC
.bestPred
.weight
= weight
;
124 for (Cluster
&c
: clusters
)
125 c
.initialWeight
= c
.weight
;
128 // It's bad to merge clusters which would degrade the density too much.
129 static bool isNewDensityBad(Cluster
&a
, Cluster
&b
) {
130 double newDensity
= double(a
.weight
+ b
.weight
) / double(a
.size
+ b
.size
);
131 return newDensity
< a
.getDensity() / MAX_DENSITY_DEGRADATION
;
134 // Find the leader of V's belonged cluster (represented as an equivalence
135 // class). We apply union-find path-halving technique (simple to implement) in
136 // the meantime as it decreases depths and the time complexity.
137 static int getLeader(std::vector
<int> &leaders
, int v
) {
138 while (leaders
[v
] != v
) {
139 leaders
[v
] = leaders
[leaders
[v
]];
145 static void mergeClusters(std::vector
<Cluster
> &cs
, Cluster
&into
, int intoIdx
,
146 Cluster
&from
, int fromIdx
) {
147 int tail1
= into
.prev
, tail2
= from
.prev
;
149 cs
[tail2
].next
= intoIdx
;
151 cs
[tail1
].next
= fromIdx
;
152 into
.size
+= from
.size
;
153 into
.weight
+= from
.weight
;
158 // Group InputSections into clusters using the Call-Chain Clustering heuristic
159 // then sort the clusters by density.
160 DenseMap
<const InputSection
*, size_t> CallGraphSort::run() {
161 const uint64_t maxClusterSize
= target
->getPageSize();
163 // Cluster indices sorted by density.
164 std::vector
<int> sorted(clusters
.size());
166 std::vector
<int> leaders(clusters
.size());
168 std::iota(leaders
.begin(), leaders
.end(), 0);
169 std::iota(sorted
.begin(), sorted
.end(), 0);
171 llvm::stable_sort(sorted
, [&](int a
, int b
) {
172 return clusters
[a
].getDensity() > clusters
[b
].getDensity();
175 for (int l
: sorted
) {
176 // The cluster index is the same as the index of its leader here because
177 // clusters[L] has not been merged into another cluster yet.
178 Cluster
&c
= clusters
[l
];
180 // Don't consider merging if the edge is unlikely.
181 if (c
.bestPred
.from
== -1 || c
.bestPred
.weight
* 10 <= c
.initialWeight
)
184 int predL
= getLeader(leaders
, c
.bestPred
.from
);
185 // Already in the same cluster.
189 Cluster
*predC
= &clusters
[predL
];
190 if (c
.size
+ predC
->size
> maxClusterSize
)
193 if (isNewDensityBad(*predC
, c
))
197 mergeClusters(clusters
, *predC
, predL
, c
, l
);
199 // Sort remaining non-empty clusters by density.
201 for (int i
= 0, e
= (int)clusters
.size(); i
!= e
; ++i
)
202 if (clusters
[i
].size
> 0)
204 llvm::stable_sort(sorted
, [&](int a
, int b
) {
205 return clusters
[a
].getDensity() > clusters
[b
].getDensity();
208 DenseMap
<const InputSection
*, size_t> orderMap
;
210 // Sections will be sorted by decreasing order. Absent sections will have
211 // priority 0 and be placed at the end of sections.
212 // NB: This is opposite from COFF/ELF to be compatible with the existing
214 int curOrder
= highestAvailablePriority
;
215 for (int leader
: sorted
) {
216 for (int i
= leader
;;) {
217 orderMap
[sections
[i
]] = curOrder
--;
218 i
= clusters
[i
].next
;
223 if (!config
->printSymbolOrder
.empty()) {
225 raw_fd_ostream
os(config
->printSymbolOrder
, ec
, sys::fs::OF_None
);
227 error("cannot open " + config
->printSymbolOrder
+ ": " + ec
.message());
230 // Print the symbols ordered by C3, in the order of decreasing curOrder
231 // Instead of sorting all the orderMap, just repeat the loops above.
232 for (int leader
: sorted
)
233 for (int i
= leader
;;) {
234 const InputSection
*isec
= sections
[i
];
235 // Search all the symbols in the file of the section
236 // and find out a Defined symbol with name that is within the
238 for (Symbol
*sym
: isec
->getFile()->symbols
) {
239 if (auto *d
= dyn_cast_or_null
<Defined
>(sym
)) {
240 if (d
->isec() == isec
)
241 os
<< sym
->getName() << "\n";
244 i
= clusters
[i
].next
;
253 std::optional
<size_t>
254 macho::PriorityBuilder::getSymbolPriority(const Defined
*sym
) {
255 if (sym
->isAbsolute())
258 auto it
= priorities
.find(sym
->getName());
259 if (it
== priorities
.end())
261 const SymbolPriorityEntry
&entry
= it
->second
;
262 const InputFile
*f
= sym
->isec()->getFile();
264 return entry
.anyObjectFile
;
265 // We don't use toString(InputFile *) here because it returns the full path
266 // for object files, and we only want the basename.
268 if (f
->archiveName
.empty())
269 filename
= path::filename(f
->getName());
271 filename
= saver().save(path::filename(f
->archiveName
) + "(" +
272 path::filename(f
->getName()) + ")");
273 return std::max(entry
.objectFiles
.lookup(filename
), entry
.anyObjectFile
);
276 void macho::PriorityBuilder::extractCallGraphProfile() {
277 TimeTraceScope
timeScope("Extract call graph profile");
278 bool hasOrderFile
= !priorities
.empty();
279 for (const InputFile
*file
: inputFiles
) {
280 auto *obj
= dyn_cast_or_null
<ObjFile
>(file
);
283 for (const CallGraphEntry
&entry
: obj
->callGraph
) {
284 assert(entry
.fromIndex
< obj
->symbols
.size() &&
285 entry
.toIndex
< obj
->symbols
.size());
286 auto *fromSym
= dyn_cast_or_null
<Defined
>(obj
->symbols
[entry
.fromIndex
]);
287 auto *toSym
= dyn_cast_or_null
<Defined
>(obj
->symbols
[entry
.toIndex
]);
288 if (fromSym
&& toSym
&&
290 (!getSymbolPriority(fromSym
) && !getSymbolPriority(toSym
))))
291 callGraphProfile
[{fromSym
->isec(), toSym
->isec()}] += entry
.count
;
296 void macho::PriorityBuilder::parseOrderFile(StringRef path
) {
297 assert(callGraphProfile
.empty() &&
298 "Order file must be parsed before call graph profile is processed");
299 std::optional
<MemoryBufferRef
> buffer
= readFile(path
);
301 error("Could not read order file at " + path
);
305 MemoryBufferRef mbref
= *buffer
;
306 for (StringRef line
: args::getLines(mbref
)) {
307 StringRef objectFile
, symbol
;
308 line
= line
.take_until([](char c
) { return c
== '#'; }); // ignore comments
311 CPUType cpuType
= StringSwitch
<CPUType
>(line
)
312 .StartsWith("i386:", CPU_TYPE_I386
)
313 .StartsWith("x86_64:", CPU_TYPE_X86_64
)
314 .StartsWith("arm:", CPU_TYPE_ARM
)
315 .StartsWith("arm64:", CPU_TYPE_ARM64
)
316 .StartsWith("ppc:", CPU_TYPE_POWERPC
)
317 .StartsWith("ppc64:", CPU_TYPE_POWERPC64
)
318 .Default(CPU_TYPE_ANY
);
320 if (cpuType
!= CPU_TYPE_ANY
&& cpuType
!= target
->cpuType
)
323 // Drop the CPU type as well as the colon
324 if (cpuType
!= CPU_TYPE_ANY
)
325 line
= line
.drop_until([](char c
) { return c
== ':'; }).drop_front();
327 constexpr std::array
<StringRef
, 2> fileEnds
= {".o:", ".o):"};
328 for (StringRef fileEnd
: fileEnds
) {
329 size_t pos
= line
.find(fileEnd
);
330 if (pos
!= StringRef::npos
) {
331 // Split the string around the colon
332 objectFile
= line
.take_front(pos
+ fileEnd
.size() - 1);
333 line
= line
.drop_front(pos
+ fileEnd
.size());
337 symbol
= line
.trim();
339 if (!symbol
.empty()) {
340 SymbolPriorityEntry
&entry
= priorities
[symbol
];
341 if (!objectFile
.empty())
342 entry
.objectFiles
.insert(
343 std::make_pair(objectFile
, highestAvailablePriority
));
345 entry
.anyObjectFile
=
346 std::max(entry
.anyObjectFile
, highestAvailablePriority
);
349 --highestAvailablePriority
;
353 DenseMap
<const InputSection
*, size_t>
354 macho::PriorityBuilder::buildInputSectionPriorities() {
355 DenseMap
<const InputSection
*, size_t> sectionPriorities
;
356 if (!config
->irpgoProfileSortProfilePath
.empty() ||
357 config
->functionOrderForCompression
|| config
->dataOrderForCompression
) {
358 TimeTraceScope
timeScope("Balanced Partitioning Section Orderer");
359 sectionPriorities
= runBalancedPartitioning(
360 highestAvailablePriority
, config
->irpgoProfileSortProfilePath
,
361 config
->functionOrderForCompression
, config
->dataOrderForCompression
,
362 config
->compressionSortStartupFunctions
,
363 config
->verboseBpSectionOrderer
);
364 } else if (config
->callGraphProfileSort
) {
365 // Sort sections by the profile data provided by __LLVM,__cg_profile
368 // This first builds a call graph based on the profile data then merges
369 // sections according to the C³ heuristic. All clusters are then sorted by a
370 // density metric to further improve locality.
371 TimeTraceScope
timeScope("Call graph profile sort");
372 sectionPriorities
= CallGraphSort(callGraphProfile
).run();
375 if (priorities
.empty())
376 return sectionPriorities
;
378 auto addSym
= [&](const Defined
*sym
) {
379 std::optional
<size_t> symbolPriority
= getSymbolPriority(sym
);
382 size_t &priority
= sectionPriorities
[sym
->isec()];
383 priority
= std::max(priority
, *symbolPriority
);
386 // TODO: Make sure this handles weak symbols correctly.
387 for (const InputFile
*file
: inputFiles
) {
388 if (isa
<ObjFile
>(file
))
389 for (Symbol
*sym
: file
->symbols
)
390 if (auto *d
= dyn_cast_or_null
<Defined
>(sym
))
394 return sectionPriorities
;