1 //===- SectionPriorities.cpp ----------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// This is based on the ELF port, see ELF/CallGraphSort.cpp for the details
10 /// about the algorithm.
12 //===----------------------------------------------------------------------===//
14 #include "SectionPriorities.h"
16 #include "InputFiles.h"
20 #include "lld/Common/Args.h"
21 #include "lld/Common/CommonLinkerContext.h"
22 #include "lld/Common/ErrorHandler.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/MapVector.h"
25 #include "llvm/Support/Path.h"
26 #include "llvm/Support/TimeProfiler.h"
27 #include "llvm/Support/raw_ostream.h"
32 using namespace llvm::MachO
;
33 using namespace llvm::sys
;
35 using namespace lld::macho
;
37 PriorityBuilder
macho::priorityBuilder
;
41 size_t highestAvailablePriority
= std::numeric_limits
<size_t>::max();
49 Cluster(int sec
, size_t s
) : next(sec
), prev(sec
), size(s
) {}
51 double getDensity() const {
54 return double(weight
) / double(size
);
61 uint64_t initialWeight
= 0;
62 Edge bestPred
= {-1, 0};
67 CallGraphSort(const MapVector
<SectionPair
, uint64_t> &profile
);
69 DenseMap
<const InputSection
*, size_t> run();
72 std::vector
<Cluster
> clusters
;
73 std::vector
<const InputSection
*> sections
;
75 // Maximum amount the combined cluster density can be worse than the original
76 // cluster to consider merging.
77 constexpr int MAX_DENSITY_DEGRADATION
= 8;
78 } // end anonymous namespace
80 // Take the edge list in callGraphProfile, resolve symbol names to Symbols, and
81 // generate a graph between InputSections with the provided weights.
82 CallGraphSort::CallGraphSort(const MapVector
<SectionPair
, uint64_t> &profile
) {
83 DenseMap
<const InputSection
*, int> secToCluster
;
85 auto getOrCreateCluster
= [&](const InputSection
*isec
) -> int {
86 auto res
= secToCluster
.try_emplace(isec
, clusters
.size());
88 sections
.push_back(isec
);
89 clusters
.emplace_back(clusters
.size(), isec
->getSize());
91 return res
.first
->second
;
95 for (const std::pair
<SectionPair
, uint64_t> &c
: profile
) {
96 const auto fromSec
= c
.first
.first
->canonical();
97 const auto toSec
= c
.first
.second
->canonical();
98 uint64_t weight
= c
.second
;
99 // Ignore edges between input sections belonging to different output
100 // sections. This is done because otherwise we would end up with clusters
101 // containing input sections that can't actually be placed adjacently in the
102 // output. This messes with the cluster size and density calculations. We
103 // would also end up moving input sections in other output sections without
104 // moving them closer to what calls them.
105 if (fromSec
->parent
!= toSec
->parent
)
108 int from
= getOrCreateCluster(fromSec
);
109 int to
= getOrCreateCluster(toSec
);
111 clusters
[to
].weight
+= weight
;
116 // Remember the best edge.
117 Cluster
&toC
= clusters
[to
];
118 if (toC
.bestPred
.from
== -1 || toC
.bestPred
.weight
< weight
) {
119 toC
.bestPred
.from
= from
;
120 toC
.bestPred
.weight
= weight
;
123 for (Cluster
&c
: clusters
)
124 c
.initialWeight
= c
.weight
;
127 // It's bad to merge clusters which would degrade the density too much.
128 static bool isNewDensityBad(Cluster
&a
, Cluster
&b
) {
129 double newDensity
= double(a
.weight
+ b
.weight
) / double(a
.size
+ b
.size
);
130 return newDensity
< a
.getDensity() / MAX_DENSITY_DEGRADATION
;
133 // Find the leader of V's belonged cluster (represented as an equivalence
134 // class). We apply union-find path-halving technique (simple to implement) in
135 // the meantime as it decreases depths and the time complexity.
136 static int getLeader(std::vector
<int> &leaders
, int v
) {
137 while (leaders
[v
] != v
) {
138 leaders
[v
] = leaders
[leaders
[v
]];
144 static void mergeClusters(std::vector
<Cluster
> &cs
, Cluster
&into
, int intoIdx
,
145 Cluster
&from
, int fromIdx
) {
146 int tail1
= into
.prev
, tail2
= from
.prev
;
148 cs
[tail2
].next
= intoIdx
;
150 cs
[tail1
].next
= fromIdx
;
151 into
.size
+= from
.size
;
152 into
.weight
+= from
.weight
;
157 // Group InputSections into clusters using the Call-Chain Clustering heuristic
158 // then sort the clusters by density.
159 DenseMap
<const InputSection
*, size_t> CallGraphSort::run() {
160 const uint64_t maxClusterSize
= target
->getPageSize();
162 // Cluster indices sorted by density.
163 std::vector
<int> sorted(clusters
.size());
165 std::vector
<int> leaders(clusters
.size());
167 std::iota(leaders
.begin(), leaders
.end(), 0);
168 std::iota(sorted
.begin(), sorted
.end(), 0);
170 llvm::stable_sort(sorted
, [&](int a
, int b
) {
171 return clusters
[a
].getDensity() > clusters
[b
].getDensity();
174 for (int l
: sorted
) {
175 // The cluster index is the same as the index of its leader here because
176 // clusters[L] has not been merged into another cluster yet.
177 Cluster
&c
= clusters
[l
];
179 // Don't consider merging if the edge is unlikely.
180 if (c
.bestPred
.from
== -1 || c
.bestPred
.weight
* 10 <= c
.initialWeight
)
183 int predL
= getLeader(leaders
, c
.bestPred
.from
);
184 // Already in the same cluster.
188 Cluster
*predC
= &clusters
[predL
];
189 if (c
.size
+ predC
->size
> maxClusterSize
)
192 if (isNewDensityBad(*predC
, c
))
196 mergeClusters(clusters
, *predC
, predL
, c
, l
);
198 // Sort remaining non-empty clusters by density.
200 for (int i
= 0, e
= (int)clusters
.size(); i
!= e
; ++i
)
201 if (clusters
[i
].size
> 0)
203 llvm::stable_sort(sorted
, [&](int a
, int b
) {
204 return clusters
[a
].getDensity() > clusters
[b
].getDensity();
207 DenseMap
<const InputSection
*, size_t> orderMap
;
209 // Sections will be sorted by decreasing order. Absent sections will have
210 // priority 0 and be placed at the end of sections.
211 // NB: This is opposite from COFF/ELF to be compatible with the existing
213 int curOrder
= highestAvailablePriority
;
214 for (int leader
: sorted
) {
215 for (int i
= leader
;;) {
216 orderMap
[sections
[i
]] = curOrder
--;
217 i
= clusters
[i
].next
;
222 if (!config
->printSymbolOrder
.empty()) {
224 raw_fd_ostream
os(config
->printSymbolOrder
, ec
, sys::fs::OF_None
);
226 error("cannot open " + config
->printSymbolOrder
+ ": " + ec
.message());
229 // Print the symbols ordered by C3, in the order of decreasing curOrder
230 // Instead of sorting all the orderMap, just repeat the loops above.
231 for (int leader
: sorted
)
232 for (int i
= leader
;;) {
233 const InputSection
*isec
= sections
[i
];
234 // Search all the symbols in the file of the section
235 // and find out a Defined symbol with name that is within the
237 for (Symbol
*sym
: isec
->getFile()->symbols
) {
238 if (auto *d
= dyn_cast_or_null
<Defined
>(sym
)) {
240 os
<< sym
->getName() << "\n";
243 i
= clusters
[i
].next
;
252 std::optional
<size_t>
253 macho::PriorityBuilder::getSymbolPriority(const Defined
*sym
) {
254 if (sym
->isAbsolute())
257 auto it
= priorities
.find(sym
->getName());
258 if (it
== priorities
.end())
260 const SymbolPriorityEntry
&entry
= it
->second
;
261 const InputFile
*f
= sym
->isec
->getFile();
263 return entry
.anyObjectFile
;
264 // We don't use toString(InputFile *) here because it returns the full path
265 // for object files, and we only want the basename.
267 if (f
->archiveName
.empty())
268 filename
= path::filename(f
->getName());
270 filename
= saver().save(path::filename(f
->archiveName
) + "(" +
271 path::filename(f
->getName()) + ")");
272 return std::max(entry
.objectFiles
.lookup(filename
), entry
.anyObjectFile
);
275 void macho::PriorityBuilder::extractCallGraphProfile() {
276 TimeTraceScope
timeScope("Extract call graph profile");
277 bool hasOrderFile
= !priorities
.empty();
278 for (const InputFile
*file
: inputFiles
) {
279 auto *obj
= dyn_cast_or_null
<ObjFile
>(file
);
282 for (const CallGraphEntry
&entry
: obj
->callGraph
) {
283 assert(entry
.fromIndex
< obj
->symbols
.size() &&
284 entry
.toIndex
< obj
->symbols
.size());
285 auto *fromSym
= dyn_cast_or_null
<Defined
>(obj
->symbols
[entry
.fromIndex
]);
286 auto *toSym
= dyn_cast_or_null
<Defined
>(obj
->symbols
[entry
.toIndex
]);
287 if (fromSym
&& toSym
&&
289 (!getSymbolPriority(fromSym
) && !getSymbolPriority(toSym
))))
290 callGraphProfile
[{fromSym
->isec
, toSym
->isec
}] += entry
.count
;
295 void macho::PriorityBuilder::parseOrderFile(StringRef path
) {
296 assert(callGraphProfile
.empty() &&
297 "Order file must be parsed before call graph profile is processed");
298 std::optional
<MemoryBufferRef
> buffer
= readFile(path
);
300 error("Could not read order file at " + path
);
304 MemoryBufferRef mbref
= *buffer
;
305 for (StringRef line
: args::getLines(mbref
)) {
306 StringRef objectFile
, symbol
;
307 line
= line
.take_until([](char c
) { return c
== '#'; }); // ignore comments
310 CPUType cpuType
= StringSwitch
<CPUType
>(line
)
311 .StartsWith("i386:", CPU_TYPE_I386
)
312 .StartsWith("x86_64:", CPU_TYPE_X86_64
)
313 .StartsWith("arm:", CPU_TYPE_ARM
)
314 .StartsWith("arm64:", CPU_TYPE_ARM64
)
315 .StartsWith("ppc:", CPU_TYPE_POWERPC
)
316 .StartsWith("ppc64:", CPU_TYPE_POWERPC64
)
317 .Default(CPU_TYPE_ANY
);
319 if (cpuType
!= CPU_TYPE_ANY
&& cpuType
!= target
->cpuType
)
322 // Drop the CPU type as well as the colon
323 if (cpuType
!= CPU_TYPE_ANY
)
324 line
= line
.drop_until([](char c
) { return c
== ':'; }).drop_front();
326 constexpr std::array
<StringRef
, 2> fileEnds
= {".o:", ".o):"};
327 for (StringRef fileEnd
: fileEnds
) {
328 size_t pos
= line
.find(fileEnd
);
329 if (pos
!= StringRef::npos
) {
330 // Split the string around the colon
331 objectFile
= line
.take_front(pos
+ fileEnd
.size() - 1);
332 line
= line
.drop_front(pos
+ fileEnd
.size());
336 symbol
= line
.trim();
338 if (!symbol
.empty()) {
339 SymbolPriorityEntry
&entry
= priorities
[symbol
];
340 if (!objectFile
.empty())
341 entry
.objectFiles
.insert(
342 std::make_pair(objectFile
, highestAvailablePriority
));
344 entry
.anyObjectFile
=
345 std::max(entry
.anyObjectFile
, highestAvailablePriority
);
348 --highestAvailablePriority
;
352 DenseMap
<const InputSection
*, size_t>
353 macho::PriorityBuilder::buildInputSectionPriorities() {
354 DenseMap
<const InputSection
*, size_t> sectionPriorities
;
355 if (config
->callGraphProfileSort
) {
356 // Sort sections by the profile data provided by __LLVM,__cg_profile
359 // This first builds a call graph based on the profile data then merges
360 // sections according to the C³ heuristic. All clusters are then sorted by a
361 // density metric to further improve locality.
362 TimeTraceScope
timeScope("Call graph profile sort");
363 sectionPriorities
= CallGraphSort(callGraphProfile
).run();
366 if (priorities
.empty())
367 return sectionPriorities
;
369 auto addSym
= [&](const Defined
*sym
) {
370 std::optional
<size_t> symbolPriority
= getSymbolPriority(sym
);
373 size_t &priority
= sectionPriorities
[sym
->isec
];
374 priority
= std::max(priority
, *symbolPriority
);
377 // TODO: Make sure this handles weak symbols correctly.
378 for (const InputFile
*file
: inputFiles
) {
379 if (isa
<ObjFile
>(file
))
380 for (Symbol
*sym
: file
->symbols
)
381 if (auto *d
= dyn_cast_or_null
<Defined
>(sym
))
385 return sectionPriorities
;