1 //===- SectionPriorities.cpp ----------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// This is based on the ELF port, see ELF/CallGraphSort.cpp for the details
10 /// about the algorithm.
12 //===----------------------------------------------------------------------===//
14 #include "SectionPriorities.h"
16 #include "InputFiles.h"
20 #include "lld/Common/Args.h"
21 #include "lld/Common/CommonLinkerContext.h"
22 #include "lld/Common/ErrorHandler.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/MapVector.h"
25 #include "llvm/ADT/Optional.h"
26 #include "llvm/Support/Path.h"
27 #include "llvm/Support/TimeProfiler.h"
28 #include "llvm/Support/raw_ostream.h"
32 using namespace llvm::MachO
;
33 using namespace llvm::sys
;
35 using namespace lld::macho
;
37 PriorityBuilder
macho::priorityBuilder
;
41 size_t highestAvailablePriority
= std::numeric_limits
<size_t>::max();
49 Cluster(int sec
, size_t s
) : next(sec
), prev(sec
), size(s
) {}
51 double getDensity() const {
54 return double(weight
) / double(size
);
61 uint64_t initialWeight
= 0;
62 Edge bestPred
= {-1, 0};
67 CallGraphSort(const MapVector
<SectionPair
, uint64_t> &profile
);
69 DenseMap
<const InputSection
*, size_t> run();
72 std::vector
<Cluster
> clusters
;
73 std::vector
<const InputSection
*> sections
;
75 // Maximum amount the combined cluster density can be worse than the original
76 // cluster to consider merging.
77 constexpr int MAX_DENSITY_DEGRADATION
= 8;
78 } // end anonymous namespace
80 // Take the edge list in callGraphProfile, resolve symbol names to Symbols, and
81 // generate a graph between InputSections with the provided weights.
82 CallGraphSort::CallGraphSort(const MapVector
<SectionPair
, uint64_t> &profile
) {
83 DenseMap
<const InputSection
*, int> secToCluster
;
85 auto getOrCreateCluster
= [&](const InputSection
*isec
) -> int {
86 auto res
= secToCluster
.try_emplace(isec
, clusters
.size());
88 sections
.push_back(isec
);
89 clusters
.emplace_back(clusters
.size(), isec
->getSize());
91 return res
.first
->second
;
95 for (const std::pair
<SectionPair
, uint64_t> &c
: profile
) {
96 const auto fromSec
= c
.first
.first
->canonical();
97 const auto toSec
= c
.first
.second
->canonical();
98 uint64_t weight
= c
.second
;
99 // Ignore edges between input sections belonging to different output
100 // sections. This is done because otherwise we would end up with clusters
101 // containing input sections that can't actually be placed adjacently in the
102 // output. This messes with the cluster size and density calculations. We
103 // would also end up moving input sections in other output sections without
104 // moving them closer to what calls them.
105 if (fromSec
->parent
!= toSec
->parent
)
108 int from
= getOrCreateCluster(fromSec
);
109 int to
= getOrCreateCluster(toSec
);
111 clusters
[to
].weight
+= weight
;
116 // Remember the best edge.
117 Cluster
&toC
= clusters
[to
];
118 if (toC
.bestPred
.from
== -1 || toC
.bestPred
.weight
< weight
) {
119 toC
.bestPred
.from
= from
;
120 toC
.bestPred
.weight
= weight
;
123 for (Cluster
&c
: clusters
)
124 c
.initialWeight
= c
.weight
;
127 // It's bad to merge clusters which would degrade the density too much.
128 static bool isNewDensityBad(Cluster
&a
, Cluster
&b
) {
129 double newDensity
= double(a
.weight
+ b
.weight
) / double(a
.size
+ b
.size
);
130 return newDensity
< a
.getDensity() / MAX_DENSITY_DEGRADATION
;
133 // Find the leader of V's belonged cluster (represented as an equivalence
134 // class). We apply union-find path-halving technique (simple to implement) in
135 // the meantime as it decreases depths and the time complexity.
136 static int getLeader(std::vector
<int> &leaders
, int v
) {
137 while (leaders
[v
] != v
) {
138 leaders
[v
] = leaders
[leaders
[v
]];
144 static void mergeClusters(std::vector
<Cluster
> &cs
, Cluster
&into
, int intoIdx
,
145 Cluster
&from
, int fromIdx
) {
146 int tail1
= into
.prev
, tail2
= from
.prev
;
148 cs
[tail2
].next
= intoIdx
;
150 cs
[tail1
].next
= fromIdx
;
151 into
.size
+= from
.size
;
152 into
.weight
+= from
.weight
;
157 // Group InputSections into clusters using the Call-Chain Clustering heuristic
158 // then sort the clusters by density.
159 DenseMap
<const InputSection
*, size_t> CallGraphSort::run() {
160 const uint64_t maxClusterSize
= target
->getPageSize();
162 // Cluster indices sorted by density.
163 std::vector
<int> sorted(clusters
.size());
165 std::vector
<int> leaders(clusters
.size());
167 std::iota(leaders
.begin(), leaders
.end(), 0);
168 std::iota(sorted
.begin(), sorted
.end(), 0);
170 llvm::stable_sort(sorted
, [&](int a
, int b
) {
171 return clusters
[a
].getDensity() > clusters
[b
].getDensity();
174 for (int l
: sorted
) {
175 // The cluster index is the same as the index of its leader here because
176 // clusters[L] has not been merged into another cluster yet.
177 Cluster
&c
= clusters
[l
];
179 // Don't consider merging if the edge is unlikely.
180 if (c
.bestPred
.from
== -1 || c
.bestPred
.weight
* 10 <= c
.initialWeight
)
183 int predL
= getLeader(leaders
, c
.bestPred
.from
);
184 // Already in the same cluster.
188 Cluster
*predC
= &clusters
[predL
];
189 if (c
.size
+ predC
->size
> maxClusterSize
)
192 if (isNewDensityBad(*predC
, c
))
196 mergeClusters(clusters
, *predC
, predL
, c
, l
);
198 // Sort remaining non-empty clusters by density.
200 for (int i
= 0, e
= (int)clusters
.size(); i
!= e
; ++i
)
201 if (clusters
[i
].size
> 0)
203 llvm::stable_sort(sorted
, [&](int a
, int b
) {
204 return clusters
[a
].getDensity() > clusters
[b
].getDensity();
207 DenseMap
<const InputSection
*, size_t> orderMap
;
209 // Sections will be sorted by decreasing order. Absent sections will have
210 // priority 0 and be placed at the end of sections.
211 // NB: This is opposite from COFF/ELF to be compatible with the existing
213 int curOrder
= highestAvailablePriority
;
214 for (int leader
: sorted
) {
215 for (int i
= leader
;;) {
216 orderMap
[sections
[i
]] = curOrder
--;
217 i
= clusters
[i
].next
;
222 if (!config
->printSymbolOrder
.empty()) {
224 raw_fd_ostream
os(config
->printSymbolOrder
, ec
, sys::fs::OF_None
);
226 error("cannot open " + config
->printSymbolOrder
+ ": " + ec
.message());
229 // Print the symbols ordered by C3, in the order of decreasing curOrder
230 // Instead of sorting all the orderMap, just repeat the loops above.
231 for (int leader
: sorted
)
232 for (int i
= leader
;;) {
233 const InputSection
*isec
= sections
[i
];
234 // Search all the symbols in the file of the section
235 // and find out a Defined symbol with name that is within the
237 for (Symbol
*sym
: isec
->getFile()->symbols
) {
238 if (auto *d
= dyn_cast_or_null
<Defined
>(sym
)) {
240 os
<< sym
->getName() << "\n";
243 i
= clusters
[i
].next
;
252 Optional
<size_t> macho::PriorityBuilder::getSymbolPriority(const Defined
*sym
) {
253 if (sym
->isAbsolute())
256 auto it
= priorities
.find(sym
->getName());
257 if (it
== priorities
.end())
259 const SymbolPriorityEntry
&entry
= it
->second
;
260 const InputFile
*f
= sym
->isec
->getFile();
262 return entry
.anyObjectFile
;
263 // We don't use toString(InputFile *) here because it returns the full path
264 // for object files, and we only want the basename.
266 if (f
->archiveName
.empty())
267 filename
= path::filename(f
->getName());
269 filename
= saver().save(path::filename(f
->archiveName
) + "(" +
270 path::filename(f
->getName()) + ")");
271 return std::max(entry
.objectFiles
.lookup(filename
), entry
.anyObjectFile
);
274 void macho::PriorityBuilder::extractCallGraphProfile() {
275 TimeTraceScope
timeScope("Extract call graph profile");
276 bool hasOrderFile
= !priorities
.empty();
277 for (const InputFile
*file
: inputFiles
) {
278 auto *obj
= dyn_cast_or_null
<ObjFile
>(file
);
281 for (const CallGraphEntry
&entry
: obj
->callGraph
) {
282 assert(entry
.fromIndex
< obj
->symbols
.size() &&
283 entry
.toIndex
< obj
->symbols
.size());
284 auto *fromSym
= dyn_cast_or_null
<Defined
>(obj
->symbols
[entry
.fromIndex
]);
285 auto *toSym
= dyn_cast_or_null
<Defined
>(obj
->symbols
[entry
.toIndex
]);
286 if (fromSym
&& toSym
&&
288 (!getSymbolPriority(fromSym
) && !getSymbolPriority(toSym
))))
289 callGraphProfile
[{fromSym
->isec
, toSym
->isec
}] += entry
.count
;
294 void macho::PriorityBuilder::parseOrderFile(StringRef path
) {
295 assert(callGraphProfile
.empty() &&
296 "Order file must be parsed before call graph profile is processed");
297 Optional
<MemoryBufferRef
> buffer
= readFile(path
);
299 error("Could not read order file at " + path
);
303 MemoryBufferRef mbref
= *buffer
;
304 for (StringRef line
: args::getLines(mbref
)) {
305 StringRef objectFile
, symbol
;
306 line
= line
.take_until([](char c
) { return c
== '#'; }); // ignore comments
309 CPUType cpuType
= StringSwitch
<CPUType
>(line
)
310 .StartsWith("i386:", CPU_TYPE_I386
)
311 .StartsWith("x86_64:", CPU_TYPE_X86_64
)
312 .StartsWith("arm:", CPU_TYPE_ARM
)
313 .StartsWith("arm64:", CPU_TYPE_ARM64
)
314 .StartsWith("ppc:", CPU_TYPE_POWERPC
)
315 .StartsWith("ppc64:", CPU_TYPE_POWERPC64
)
316 .Default(CPU_TYPE_ANY
);
318 if (cpuType
!= CPU_TYPE_ANY
&& cpuType
!= target
->cpuType
)
321 // Drop the CPU type as well as the colon
322 if (cpuType
!= CPU_TYPE_ANY
)
323 line
= line
.drop_until([](char c
) { return c
== ':'; }).drop_front();
325 constexpr std::array
<StringRef
, 2> fileEnds
= {".o:", ".o):"};
326 for (StringRef fileEnd
: fileEnds
) {
327 size_t pos
= line
.find(fileEnd
);
328 if (pos
!= StringRef::npos
) {
329 // Split the string around the colon
330 objectFile
= line
.take_front(pos
+ fileEnd
.size() - 1);
331 line
= line
.drop_front(pos
+ fileEnd
.size());
335 symbol
= line
.trim();
337 if (!symbol
.empty()) {
338 SymbolPriorityEntry
&entry
= priorities
[symbol
];
339 if (!objectFile
.empty())
340 entry
.objectFiles
.insert(
341 std::make_pair(objectFile
, highestAvailablePriority
));
343 entry
.anyObjectFile
=
344 std::max(entry
.anyObjectFile
, highestAvailablePriority
);
347 --highestAvailablePriority
;
351 DenseMap
<const InputSection
*, size_t>
352 macho::PriorityBuilder::buildInputSectionPriorities() {
353 DenseMap
<const InputSection
*, size_t> sectionPriorities
;
354 if (config
->callGraphProfileSort
) {
355 // Sort sections by the profile data provided by __LLVM,__cg_profile
358 // This first builds a call graph based on the profile data then merges
359 // sections according to the C³ heuristic. All clusters are then sorted by a
360 // density metric to further improve locality.
361 TimeTraceScope
timeScope("Call graph profile sort");
362 sectionPriorities
= CallGraphSort(callGraphProfile
).run();
365 if (priorities
.empty())
366 return sectionPriorities
;
368 auto addSym
= [&](const Defined
*sym
) {
369 Optional
<size_t> symbolPriority
= getSymbolPriority(sym
);
370 if (!symbolPriority
.hasValue())
372 size_t &priority
= sectionPriorities
[sym
->isec
];
373 priority
= std::max(priority
, symbolPriority
.getValue());
376 // TODO: Make sure this handles weak symbols correctly.
377 for (const InputFile
*file
: inputFiles
) {
378 if (isa
<ObjFile
>(file
))
379 for (Symbol
*sym
: file
->symbols
)
380 if (auto *d
= dyn_cast_or_null
<Defined
>(sym
))
384 return sectionPriorities
;