1 //===- bolt/Passes/HFSort.cpp - Cluster functions by hotness --------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Implementation of HFSort algorithm for function ordering:
10 // https://research.fb.com/wp-content/uploads/2017/01/cgo2017-hfsort-final1.pdf
12 //===----------------------------------------------------------------------===//
14 #include "bolt/Passes/HFSort.h"
15 #include "llvm/Support/CommandLine.h"
16 #include "llvm/Support/Debug.h"
17 #include "llvm/Support/Format.h"
18 #include "llvm/Support/raw_ostream.h"
19 #include <unordered_set>
21 #define DEBUG_TYPE "hfsort"
24 extern llvm::cl::opt
<unsigned> Verbosity
;
30 using NodeId
= CallGraph::NodeId
;
31 using Arc
= CallGraph::Arc
;
32 using Node
= CallGraph::Node
;
36 // The number of pages to reserve for the functions with highest
37 // density (samples / size). The functions put in these pages are not
38 // considered for clustering.
39 constexpr uint32_t FrozenPages
= 0;
41 // The minimum approximate probability of a callee being called from a
42 // particular arc to consider merging with the caller's cluster.
43 constexpr double MinArcProbability
= 0.1;
45 // This is a factor to determine by how much a caller cluster is
46 // willing to degrade it's density by merging a callee.
47 constexpr int CallerDegradeFactor
= 8;
51 ////////////////////////////////////////////////////////////////////////////////
53 Cluster::Cluster(NodeId Id
, const Node
&Func
)
54 : Samples(Func
.samples()), Size(Func
.size()),
55 Density((double)Samples
/ Size
) {
56 Targets
.push_back(Id
);
59 Cluster::Cluster(const std::vector
<NodeId
> &Nodes
, const CallGraph
&Cg
) {
62 for (NodeId TargetId
: Nodes
) {
63 Targets
.push_back(TargetId
);
64 Samples
+= Cg
.samples(TargetId
);
65 Size
+= Cg
.size(TargetId
);
67 Density
= (double)Samples
/ Size
;
70 std::string
Cluster::toString() const {
72 raw_string_ostream
CS(Str
);
73 bool PrintComma
= false;
75 for (const NodeId
&Target
: Targets
) {
87 void freezeClusters(const CallGraph
&Cg
, std::vector
<Cluster
> &Clusters
) {
88 uint32_t TotalSize
= 0;
89 llvm::sort(Clusters
, compareClustersDensity
);
90 for (Cluster
&C
: Clusters
) {
91 uint32_t NewSize
= TotalSize
+ C
.size();
92 if (NewSize
> FrozenPages
* HugePageSize
)
96 LLVM_DEBUG(NodeId Fid
= C
.target(0);
98 "freezing cluster for func %d, size = %u, samples = %lu)\n",
99 Fid
, Cg
.size(Fid
), Cg
.samples(Fid
)););
105 void Cluster::reverseTargets() { std::reverse(Targets
.begin(), Targets
.end()); }
107 void Cluster::merge(const Cluster
&Other
, const double Aw
) {
108 Targets
.insert(Targets
.end(), Other
.Targets
.begin(), Other
.Targets
.end());
110 Samples
+= Other
.Samples
;
111 Density
= (double)Samples
/ Size
;
114 void Cluster::merge(const Cluster
&Other
,
115 const std::vector
<CallGraph::NodeId
> &Targets_
) {
118 Samples
+= Other
.Samples
;
119 Density
= (double)Samples
/ Size
;
122 void Cluster::clear() {
131 std::vector
<Cluster
> clusterize(const CallGraph
&Cg
) {
132 std::vector
<NodeId
> SortedFuncs
;
134 // indexed by NodeId, keeps it's current cluster
135 std::vector
<Cluster
*> FuncCluster(Cg
.numNodes(), nullptr);
136 std::vector
<Cluster
> Clusters
;
137 Clusters
.reserve(Cg
.numNodes());
139 for (NodeId F
= 0; F
< Cg
.numNodes(); F
++) {
140 if (Cg
.samples(F
) == 0)
142 Clusters
.emplace_back(F
, Cg
.getNode(F
));
143 SortedFuncs
.push_back(F
);
146 freezeClusters(Cg
, Clusters
);
148 // The size and order of Clusters is fixed until we reshuffle it immediately
150 for (Cluster
&Cluster
: Clusters
)
151 FuncCluster
[Cluster
.targets().front()] = &Cluster
;
153 llvm::sort(SortedFuncs
, [&](const NodeId F1
, const NodeId F2
) {
154 const CallGraph::Node
&Func1
= Cg
.getNode(F1
);
155 const CallGraph::Node
&Func2
= Cg
.getNode(F2
);
156 return Func1
.samples() * Func2
.size() > // TODO: is this correct?
157 Func2
.samples() * Func1
.size();
160 // Process each function, and consider merging its cluster with the
161 // one containing its most likely predecessor.
162 for (const NodeId Fid
: SortedFuncs
) {
163 Cluster
*Cluster
= FuncCluster
[Fid
];
164 if (Cluster
->frozen())
167 // Find best predecessor.
168 NodeId BestPred
= CallGraph::InvalidId
;
171 for (const NodeId Src
: Cg
.predecessors(Fid
)) {
172 const Arc
&Arc
= *Cg
.findArc(Src
, Fid
);
173 if (BestPred
== CallGraph::InvalidId
||
174 Arc
.normalizedWeight() > BestProb
) {
175 BestPred
= Arc
.src();
176 BestProb
= Arc
.normalizedWeight();
180 // Check if the merge is good for the callee.
181 // Don't merge if the probability of getting to the callee from the
182 // caller is too low.
183 if (BestProb
< MinArcProbability
)
186 assert(BestPred
!= CallGraph::InvalidId
);
188 class Cluster
*PredCluster
= FuncCluster
[BestPred
];
190 // Skip if no predCluster (predecessor w/ no samples), or if same
191 // as cluster, of it's frozen.
192 if (PredCluster
== nullptr || PredCluster
== Cluster
||
193 PredCluster
->frozen())
196 // Skip if merged cluster would be bigger than the threshold.
197 if (Cluster
->size() + PredCluster
->size() > MaxClusterSize
)
200 // Check if the merge is good for the caller.
201 // Don't merge if the caller's density is significantly better
202 // than the density resulting from the merge.
203 const double NewDensity
=
204 ((double)PredCluster
->samples() + Cluster
->samples()) /
205 (PredCluster
->size() + Cluster
->size());
206 if (PredCluster
->density() > NewDensity
* CallerDegradeFactor
) {
210 LLVM_DEBUG(if (opts::Verbosity
> 1) {
211 dbgs() << format("merging %s -> %s: %u\n",
212 PredCluster
->toString().c_str(),
213 Cluster
->toString().c_str(), Cg
.samples(Fid
));
216 for (NodeId F
: Cluster
->targets())
217 FuncCluster
[F
] = PredCluster
;
219 PredCluster
->merge(*Cluster
);
223 // Return the set of Clusters that are left, which are the ones that
224 // didn't get merged (so their first func is its original func).
225 std::vector
<Cluster
> SortedClusters
;
226 std::unordered_set
<Cluster
*> Visited
;
227 for (const NodeId Func
: SortedFuncs
) {
228 Cluster
*Cluster
= FuncCluster
[Func
];
229 if (!Cluster
|| Visited
.count(Cluster
) == 1 || Cluster
->target(0) != Func
)
232 SortedClusters
.emplace_back(std::move(*Cluster
));
233 Visited
.insert(Cluster
);
236 llvm::sort(SortedClusters
, compareClustersDensity
);
238 return SortedClusters
;
241 std::vector
<Cluster
> randomClusters(const CallGraph
&Cg
) {
242 std::vector
<NodeId
> FuncIds(Cg
.numNodes(), 0);
243 std::vector
<Cluster
> Clusters
;
244 Clusters
.reserve(Cg
.numNodes());
246 for (NodeId F
= 0; F
< Cg
.numNodes(); F
++) {
247 if (Cg
.samples(F
) == 0)
249 Clusters
.emplace_back(F
, Cg
.getNode(F
));
252 llvm::sort(Clusters
, [](const Cluster
&A
, const Cluster
&B
) {
253 return A
.size() < B
.size();
256 auto pickMergeCluster
= [&Clusters
](const size_t Idx
) {
257 size_t MaxIdx
= Idx
+ 1;
259 while (MaxIdx
< Clusters
.size() &&
260 Clusters
[Idx
].size() + Clusters
[MaxIdx
].size() <= MaxClusterSize
)
263 if (MaxIdx
- Idx
> 1) {
264 size_t MergeIdx
= (std::rand() % (MaxIdx
- Idx
- 1)) + Idx
+ 1;
265 assert(Clusters
[MergeIdx
].size() + Clusters
[Idx
].size() <=
269 return Clusters
.size();
273 while (Idx
< Clusters
.size()) {
274 size_t MergeIdx
= pickMergeCluster(Idx
);
275 if (MergeIdx
== Clusters
.size()) {
278 Clusters
[Idx
].merge(Clusters
[MergeIdx
]);
279 Clusters
.erase(Clusters
.begin() + MergeIdx
);