1 //===- CallGraphSort.cpp --------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// This is based on the ELF port, see ELF/CallGraphSort.cpp for the details
10 /// about the algorithm.
12 //===----------------------------------------------------------------------===//
14 #include "CallGraphSort.h"
15 #include "COFFLinkerContext.h"
16 #include "InputFiles.h"
17 #include "SymbolTable.h"
19 #include "lld/Common/ErrorHandler.h"
25 using namespace lld::coff
;
34 Cluster(int sec
, size_t s
) : next(sec
), prev(sec
), size(s
) {}
36 double getDensity() const {
39 return double(weight
) / double(size
);
46 uint64_t initialWeight
= 0;
47 Edge bestPred
= {-1, 0};
52 CallGraphSort(const COFFLinkerContext
&ctx
);
54 DenseMap
<const SectionChunk
*, int> run();
57 std::vector
<Cluster
> clusters
;
58 std::vector
<const SectionChunk
*> sections
;
61 // Maximum amount the combined cluster density can be worse than the original
62 // cluster to consider merging.
63 constexpr int MAX_DENSITY_DEGRADATION
= 8;
65 // Maximum cluster size in bytes.
66 constexpr uint64_t MAX_CLUSTER_SIZE
= 1024 * 1024;
67 } // end anonymous namespace
69 using SectionPair
= std::pair
<const SectionChunk
*, const SectionChunk
*>;
71 // Take the edge list in Config->CallGraphProfile, resolve symbol names to
72 // Symbols, and generate a graph between InputSections with the provided
74 CallGraphSort::CallGraphSort(const COFFLinkerContext
&ctx
) {
75 MapVector
<SectionPair
, uint64_t> &profile
= config
->callGraphProfile
;
76 DenseMap
<const SectionChunk
*, int> secToCluster
;
78 auto getOrCreateNode
= [&](const SectionChunk
*isec
) -> int {
79 auto res
= secToCluster
.try_emplace(isec
, clusters
.size());
81 sections
.push_back(isec
);
82 clusters
.emplace_back(clusters
.size(), isec
->getSize());
84 return res
.first
->second
;
88 for (std::pair
<SectionPair
, uint64_t> &c
: profile
) {
89 const auto *fromSec
= cast
<SectionChunk
>(c
.first
.first
->repl
);
90 const auto *toSec
= cast
<SectionChunk
>(c
.first
.second
->repl
);
91 uint64_t weight
= c
.second
;
93 // Ignore edges between input sections belonging to different output
94 // sections. This is done because otherwise we would end up with clusters
95 // containing input sections that can't actually be placed adjacently in the
96 // output. This messes with the cluster size and density calculations. We
97 // would also end up moving input sections in other output sections without
98 // moving them closer to what calls them.
99 if (ctx
.getOutputSection(fromSec
) != ctx
.getOutputSection(toSec
))
102 int from
= getOrCreateNode(fromSec
);
103 int to
= getOrCreateNode(toSec
);
105 clusters
[to
].weight
+= weight
;
110 // Remember the best edge.
111 Cluster
&toC
= clusters
[to
];
112 if (toC
.bestPred
.from
== -1 || toC
.bestPred
.weight
< weight
) {
113 toC
.bestPred
.from
= from
;
114 toC
.bestPred
.weight
= weight
;
117 for (Cluster
&c
: clusters
)
118 c
.initialWeight
= c
.weight
;
121 // It's bad to merge clusters which would degrade the density too much.
122 static bool isNewDensityBad(Cluster
&a
, Cluster
&b
) {
123 double newDensity
= double(a
.weight
+ b
.weight
) / double(a
.size
+ b
.size
);
124 return newDensity
< a
.getDensity() / MAX_DENSITY_DEGRADATION
;
127 // Find the leader of V's belonged cluster (represented as an equivalence
128 // class). We apply union-find path-halving technique (simple to implement) in
129 // the meantime as it decreases depths and the time complexity.
130 static int getLeader(std::vector
<int> &leaders
, int v
) {
131 while (leaders
[v
] != v
) {
132 leaders
[v
] = leaders
[leaders
[v
]];
138 static void mergeClusters(std::vector
<Cluster
> &cs
, Cluster
&into
, int intoIdx
,
139 Cluster
&from
, int fromIdx
) {
140 int tail1
= into
.prev
, tail2
= from
.prev
;
142 cs
[tail2
].next
= intoIdx
;
144 cs
[tail1
].next
= fromIdx
;
145 into
.size
+= from
.size
;
146 into
.weight
+= from
.weight
;
151 // Group InputSections into clusters using the Call-Chain Clustering heuristic
152 // then sort the clusters by density.
153 DenseMap
<const SectionChunk
*, int> CallGraphSort::run() {
154 std::vector
<int> sorted(clusters
.size());
155 std::vector
<int> leaders(clusters
.size());
157 std::iota(leaders
.begin(), leaders
.end(), 0);
158 std::iota(sorted
.begin(), sorted
.end(), 0);
159 llvm::stable_sort(sorted
, [&](int a
, int b
) {
160 return clusters
[a
].getDensity() > clusters
[b
].getDensity();
163 for (int l
: sorted
) {
164 // The cluster index is the same as the index of its leader here because
165 // clusters[L] has not been merged into another cluster yet.
166 Cluster
&c
= clusters
[l
];
168 // Don't consider merging if the edge is unlikely.
169 if (c
.bestPred
.from
== -1 || c
.bestPred
.weight
* 10 <= c
.initialWeight
)
172 int predL
= getLeader(leaders
, c
.bestPred
.from
);
176 Cluster
*predC
= &clusters
[predL
];
177 if (c
.size
+ predC
->size
> MAX_CLUSTER_SIZE
)
180 if (isNewDensityBad(*predC
, c
))
184 mergeClusters(clusters
, *predC
, predL
, c
, l
);
187 // Sort remaining non-empty clusters by density.
189 for (int i
= 0, e
= (int)clusters
.size(); i
!= e
; ++i
)
190 if (clusters
[i
].size
> 0)
192 llvm::stable_sort(sorted
, [&](int a
, int b
) {
193 return clusters
[a
].getDensity() > clusters
[b
].getDensity();
196 DenseMap
<const SectionChunk
*, int> orderMap
;
197 // Sections will be sorted by increasing order. Absent sections will have
198 // priority 0 and be placed at the end of sections.
199 int curOrder
= INT_MIN
;
200 for (int leader
: sorted
) {
201 for (int i
= leader
;;) {
202 orderMap
[sections
[i
]] = curOrder
++;
203 i
= clusters
[i
].next
;
208 if (!config
->printSymbolOrder
.empty()) {
210 raw_fd_ostream
os(config
->printSymbolOrder
, ec
, sys::fs::OF_None
);
212 error("cannot open " + config
->printSymbolOrder
+ ": " + ec
.message());
215 // Print the symbols ordered by C3, in the order of increasing curOrder
216 // Instead of sorting all the orderMap, just repeat the loops above.
217 for (int leader
: sorted
)
218 for (int i
= leader
;;) {
219 const SectionChunk
*sc
= sections
[i
];
221 // Search all the symbols in the file of the section
222 // and find out a DefinedCOFF symbol with name that is within the
224 for (Symbol
*sym
: sc
->file
->getSymbols())
225 if (auto *d
= dyn_cast_or_null
<DefinedCOFF
>(sym
))
226 // Filter out non-COMDAT symbols and section symbols.
227 if (d
->isCOMDAT
&& !d
->getCOFFSymbol().isSection() &&
229 os
<< sym
->getName() << "\n";
230 i
= clusters
[i
].next
;
239 // Sort sections by the profile data provided by /call-graph-ordering-file
241 // This first builds a call graph based on the profile data then merges sections
242 // according to the C³ heuristic. All clusters are then sorted by a density
243 // metric to further improve locality.
244 DenseMap
<const SectionChunk
*, int>
245 coff::computeCallGraphProfileOrder(const COFFLinkerContext
&ctx
) {
246 return CallGraphSort(ctx
).run();