1 //===- CallGraphSort.cpp --------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// This is based on the ELF port, see ELF/CallGraphSort.cpp for the details
10 /// about the algorithm.
12 //===----------------------------------------------------------------------===//
14 #include "CallGraphSort.h"
15 #include "COFFLinkerContext.h"
16 #include "InputFiles.h"
17 #include "SymbolTable.h"
19 #include "lld/Common/ErrorHandler.h"
25 using namespace lld::coff
;
34 Cluster(int sec
, size_t s
) : next(sec
), prev(sec
), size(s
) {}
36 double getDensity() const {
39 return double(weight
) / double(size
);
46 uint64_t initialWeight
= 0;
47 Edge bestPred
= {-1, 0};
52 CallGraphSort(COFFLinkerContext
&ctx
);
54 DenseMap
<const SectionChunk
*, int> run();
57 std::vector
<Cluster
> clusters
;
58 std::vector
<const SectionChunk
*> sections
;
60 COFFLinkerContext
&ctx
;
63 // Maximum amount the combined cluster density can be worse than the original
64 // cluster to consider merging.
65 constexpr int MAX_DENSITY_DEGRADATION
= 8;
67 // Maximum cluster size in bytes.
68 constexpr uint64_t MAX_CLUSTER_SIZE
= 1024 * 1024;
69 } // end anonymous namespace
71 using SectionPair
= std::pair
<const SectionChunk
*, const SectionChunk
*>;
73 // Take the edge list in Config->CallGraphProfile, resolve symbol names to
74 // Symbols, and generate a graph between InputSections with the provided
76 CallGraphSort::CallGraphSort(COFFLinkerContext
&ctx
) : ctx(ctx
) {
77 const MapVector
<SectionPair
, uint64_t> &profile
= ctx
.config
.callGraphProfile
;
78 DenseMap
<const SectionChunk
*, int> secToCluster
;
80 auto getOrCreateNode
= [&](const SectionChunk
*isec
) -> int {
81 auto res
= secToCluster
.try_emplace(isec
, clusters
.size());
83 sections
.push_back(isec
);
84 clusters
.emplace_back(clusters
.size(), isec
->getSize());
86 return res
.first
->second
;
90 for (const std::pair
<SectionPair
, uint64_t> &c
: profile
) {
91 const auto *fromSec
= cast
<SectionChunk
>(c
.first
.first
->repl
);
92 const auto *toSec
= cast
<SectionChunk
>(c
.first
.second
->repl
);
93 uint64_t weight
= c
.second
;
95 // Ignore edges between input sections belonging to different output
96 // sections. This is done because otherwise we would end up with clusters
97 // containing input sections that can't actually be placed adjacently in the
98 // output. This messes with the cluster size and density calculations. We
99 // would also end up moving input sections in other output sections without
100 // moving them closer to what calls them.
101 if (ctx
.getOutputSection(fromSec
) != ctx
.getOutputSection(toSec
))
104 int from
= getOrCreateNode(fromSec
);
105 int to
= getOrCreateNode(toSec
);
107 clusters
[to
].weight
+= weight
;
112 // Remember the best edge.
113 Cluster
&toC
= clusters
[to
];
114 if (toC
.bestPred
.from
== -1 || toC
.bestPred
.weight
< weight
) {
115 toC
.bestPred
.from
= from
;
116 toC
.bestPred
.weight
= weight
;
119 for (Cluster
&c
: clusters
)
120 c
.initialWeight
= c
.weight
;
123 // It's bad to merge clusters which would degrade the density too much.
124 static bool isNewDensityBad(Cluster
&a
, Cluster
&b
) {
125 double newDensity
= double(a
.weight
+ b
.weight
) / double(a
.size
+ b
.size
);
126 return newDensity
< a
.getDensity() / MAX_DENSITY_DEGRADATION
;
129 // Find the leader of V's belonged cluster (represented as an equivalence
130 // class). We apply union-find path-halving technique (simple to implement) in
131 // the meantime as it decreases depths and the time complexity.
132 static int getLeader(std::vector
<int> &leaders
, int v
) {
133 while (leaders
[v
] != v
) {
134 leaders
[v
] = leaders
[leaders
[v
]];
140 static void mergeClusters(std::vector
<Cluster
> &cs
, Cluster
&into
, int intoIdx
,
141 Cluster
&from
, int fromIdx
) {
142 int tail1
= into
.prev
, tail2
= from
.prev
;
144 cs
[tail2
].next
= intoIdx
;
146 cs
[tail1
].next
= fromIdx
;
147 into
.size
+= from
.size
;
148 into
.weight
+= from
.weight
;
153 // Group InputSections into clusters using the Call-Chain Clustering heuristic
154 // then sort the clusters by density.
155 DenseMap
<const SectionChunk
*, int> CallGraphSort::run() {
156 std::vector
<int> sorted(clusters
.size());
157 std::vector
<int> leaders(clusters
.size());
159 std::iota(leaders
.begin(), leaders
.end(), 0);
160 std::iota(sorted
.begin(), sorted
.end(), 0);
161 llvm::stable_sort(sorted
, [&](int a
, int b
) {
162 return clusters
[a
].getDensity() > clusters
[b
].getDensity();
165 for (int l
: sorted
) {
166 // The cluster index is the same as the index of its leader here because
167 // clusters[L] has not been merged into another cluster yet.
168 Cluster
&c
= clusters
[l
];
170 // Don't consider merging if the edge is unlikely.
171 if (c
.bestPred
.from
== -1 || c
.bestPred
.weight
* 10 <= c
.initialWeight
)
174 int predL
= getLeader(leaders
, c
.bestPred
.from
);
178 Cluster
*predC
= &clusters
[predL
];
179 if (c
.size
+ predC
->size
> MAX_CLUSTER_SIZE
)
182 if (isNewDensityBad(*predC
, c
))
186 mergeClusters(clusters
, *predC
, predL
, c
, l
);
189 // Sort remaining non-empty clusters by density.
191 for (int i
= 0, e
= (int)clusters
.size(); i
!= e
; ++i
)
192 if (clusters
[i
].size
> 0)
194 llvm::stable_sort(sorted
, [&](int a
, int b
) {
195 return clusters
[a
].getDensity() > clusters
[b
].getDensity();
198 DenseMap
<const SectionChunk
*, int> orderMap
;
199 // Sections will be sorted by increasing order. Absent sections will have
200 // priority 0 and be placed at the end of sections.
201 int curOrder
= INT_MIN
;
202 for (int leader
: sorted
) {
203 for (int i
= leader
;;) {
204 orderMap
[sections
[i
]] = curOrder
++;
205 i
= clusters
[i
].next
;
210 if (!ctx
.config
.printSymbolOrder
.empty()) {
212 raw_fd_ostream
os(ctx
.config
.printSymbolOrder
, ec
, sys::fs::OF_None
);
214 Err(ctx
) << "cannot open " << ctx
.config
.printSymbolOrder
<< ": "
218 // Print the symbols ordered by C3, in the order of increasing curOrder
219 // Instead of sorting all the orderMap, just repeat the loops above.
220 for (int leader
: sorted
)
221 for (int i
= leader
;;) {
222 const SectionChunk
*sc
= sections
[i
];
224 // Search all the symbols in the file of the section
225 // and find out a DefinedCOFF symbol with name that is within the
227 for (Symbol
*sym
: sc
->file
->getSymbols())
228 if (auto *d
= dyn_cast_or_null
<DefinedCOFF
>(sym
))
229 // Filter out non-COMDAT symbols and section symbols.
230 if (d
->isCOMDAT
&& !d
->getCOFFSymbol().isSection() &&
232 os
<< sym
->getName() << "\n";
233 i
= clusters
[i
].next
;
242 // Sort sections by the profile data provided by /call-graph-ordering-file
244 // This first builds a call graph based on the profile data then merges sections
245 // according to the C³ heuristic. All clusters are then sorted by a density
246 // metric to further improve locality.
247 DenseMap
<const SectionChunk
*, int>
248 coff::computeCallGraphProfileOrder(COFFLinkerContext
&ctx
) {
249 return CallGraphSort(ctx
).run();