1 //===-- Clustering.cpp ------------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "Clustering.h"
10 #include "llvm/ADT/SetVector.h"
11 #include "llvm/ADT/SmallSet.h"
12 #include "llvm/ADT/SmallVector.h"
20 // The clustering problem has the following characteristics:
21 // (A) - Low dimension (dimensions are typically proc resource units,
23 // (B) - Number of points : ~thousands (points are measurements of an MCInst)
24 // (C) - Number of clusters: ~tens.
25 // (D) - The number of clusters is not known /a priory/.
26 // (E) - The amount of noise is relatively small.
27 // The problem is rather small. In terms of algorithms, (D) disqualifies
28 // k-means and makes algorithms such as DBSCAN[1] or OPTICS[2] more applicable.
30 // We've used DBSCAN here because it's simple to implement. This is a pretty
31 // straightforward and inefficient implementation of the pseudocode in [2].
33 // [1] https://en.wikipedia.org/wiki/DBSCAN
34 // [2] https://en.wikipedia.org/wiki/OPTICS_algorithm
36 // Finds the points at distance less than sqrt(EpsilonSquared) of Q (not
38 void InstructionBenchmarkClustering::rangeQuery(
39 const size_t Q
, std::vector
<size_t> &Neighbors
) const {
41 Neighbors
.reserve(Points_
.size() - 1); // The Q itself isn't a neighbor.
42 const auto &QMeasurements
= Points_
[Q
].Measurements
;
43 for (size_t P
= 0, NumPoints
= Points_
.size(); P
< NumPoints
; ++P
) {
46 const auto &PMeasurements
= Points_
[P
].Measurements
;
47 if (PMeasurements
.empty()) // Error point.
49 if (isNeighbour(PMeasurements
, QMeasurements
,
50 AnalysisClusteringEpsilonSquared_
)) {
51 Neighbors
.push_back(P
);
56 // Given a set of points, checks that all the points are neighbours
57 // up to AnalysisClusteringEpsilon. This is O(2*N).
58 bool InstructionBenchmarkClustering::areAllNeighbours(
59 ArrayRef
<size_t> Pts
) const {
60 // First, get the centroid of this group of points. This is O(N).
61 SchedClassClusterCentroid G
;
62 llvm::for_each(Pts
, [this, &G
](size_t P
) {
63 assert(P
< Points_
.size());
64 ArrayRef
<BenchmarkMeasure
> Measurements
= Points_
[P
].Measurements
;
65 if (Measurements
.empty()) // Error point.
67 G
.addPoint(Measurements
);
69 const std::vector
<BenchmarkMeasure
> Centroid
= G
.getAsPoint();
71 // Since we will be comparing with the centroid, we need to halve the epsilon.
72 double AnalysisClusteringEpsilonHalvedSquared
=
73 AnalysisClusteringEpsilonSquared_
/ 4.0;
75 // And now check that every point is a neighbour of the centroid. Also O(N).
77 Pts
, [this, &Centroid
, AnalysisClusteringEpsilonHalvedSquared
](size_t P
) {
78 assert(P
< Points_
.size());
79 const auto &PMeasurements
= Points_
[P
].Measurements
;
80 if (PMeasurements
.empty()) // Error point.
81 return true; // Pretend that error point is a neighbour.
82 return isNeighbour(PMeasurements
, Centroid
,
83 AnalysisClusteringEpsilonHalvedSquared
);
87 InstructionBenchmarkClustering::InstructionBenchmarkClustering(
88 const std::vector
<InstructionBenchmark
> &Points
,
89 const double AnalysisClusteringEpsilonSquared
)
91 AnalysisClusteringEpsilonSquared_(AnalysisClusteringEpsilonSquared
),
92 NoiseCluster_(ClusterId::noise()), ErrorCluster_(ClusterId::error()) {}
94 llvm::Error
InstructionBenchmarkClustering::validateAndSetup() {
95 ClusterIdForPoint_
.resize(Points_
.size());
96 // Mark erroneous measurements out.
97 // All points must have the same number of dimensions, in the same order.
98 const std::vector
<BenchmarkMeasure
> *LastMeasurement
= nullptr;
99 for (size_t P
= 0, NumPoints
= Points_
.size(); P
< NumPoints
; ++P
) {
100 const auto &Point
= Points_
[P
];
101 if (!Point
.Error
.empty()) {
102 ClusterIdForPoint_
[P
] = ClusterId::error();
103 ErrorCluster_
.PointIndices
.push_back(P
);
106 const auto *CurMeasurement
= &Point
.Measurements
;
107 if (LastMeasurement
) {
108 if (LastMeasurement
->size() != CurMeasurement
->size()) {
109 return llvm::make_error
<llvm::StringError
>(
110 "inconsistent measurement dimensions",
111 llvm::inconvertibleErrorCode());
113 for (size_t I
= 0, E
= LastMeasurement
->size(); I
< E
; ++I
) {
114 if (LastMeasurement
->at(I
).Key
!= CurMeasurement
->at(I
).Key
) {
115 return llvm::make_error
<llvm::StringError
>(
116 "inconsistent measurement dimensions keys",
117 llvm::inconvertibleErrorCode());
121 LastMeasurement
= CurMeasurement
;
123 if (LastMeasurement
) {
124 NumDimensions_
= LastMeasurement
->size();
126 return llvm::Error::success();
129 void InstructionBenchmarkClustering::clusterizeDbScan(const size_t MinPts
) {
130 std::vector
<size_t> Neighbors
; // Persistent buffer to avoid allocs.
131 for (size_t P
= 0, NumPoints
= Points_
.size(); P
< NumPoints
; ++P
) {
132 if (!ClusterIdForPoint_
[P
].isUndef())
133 continue; // Previously processed in inner loop.
134 rangeQuery(P
, Neighbors
);
135 if (Neighbors
.size() + 1 < MinPts
) { // Density check.
136 // The region around P is not dense enough to create a new cluster, mark
138 ClusterIdForPoint_
[P
] = ClusterId::noise();
142 // Create a new cluster, add P.
143 Clusters_
.emplace_back(ClusterId::makeValid(Clusters_
.size()));
144 Cluster
&CurrentCluster
= Clusters_
.back();
145 ClusterIdForPoint_
[P
] = CurrentCluster
.Id
; /* Label initial point */
146 CurrentCluster
.PointIndices
.push_back(P
);
148 // Process P's neighbors.
149 llvm::SetVector
<size_t, std::deque
<size_t>> ToProcess
;
150 ToProcess
.insert(Neighbors
.begin(), Neighbors
.end());
151 while (!ToProcess
.empty()) {
152 // Retrieve a point from the set.
153 const size_t Q
= *ToProcess
.begin();
154 ToProcess
.erase(ToProcess
.begin());
156 if (ClusterIdForPoint_
[Q
].isNoise()) {
157 // Change noise point to border point.
158 ClusterIdForPoint_
[Q
] = CurrentCluster
.Id
;
159 CurrentCluster
.PointIndices
.push_back(Q
);
162 if (!ClusterIdForPoint_
[Q
].isUndef()) {
163 continue; // Previously processed.
165 // Add Q to the current custer.
166 ClusterIdForPoint_
[Q
] = CurrentCluster
.Id
;
167 CurrentCluster
.PointIndices
.push_back(Q
);
168 // And extend to the neighbors of Q if the region is dense enough.
169 rangeQuery(Q
, Neighbors
);
170 if (Neighbors
.size() + 1 >= MinPts
) {
171 ToProcess
.insert(Neighbors
.begin(), Neighbors
.end());
175 // assert(Neighbors.capacity() == (Points_.size() - 1));
176 // ^ True, but it is not quaranteed to be true in all the cases.
178 // Add noisy points to noise cluster.
179 for (size_t P
= 0, NumPoints
= Points_
.size(); P
< NumPoints
; ++P
) {
180 if (ClusterIdForPoint_
[P
].isNoise()) {
181 NoiseCluster_
.PointIndices
.push_back(P
);
186 void InstructionBenchmarkClustering::clusterizeNaive(unsigned NumOpcodes
) {
187 // Given an instruction Opcode, which are the benchmarks of this instruction?
188 std::vector
<llvm::SmallVector
<size_t, 1>> OpcodeToPoints
;
189 OpcodeToPoints
.resize(NumOpcodes
);
190 size_t NumOpcodesSeen
= 0;
191 for (size_t P
= 0, NumPoints
= Points_
.size(); P
< NumPoints
; ++P
) {
192 const InstructionBenchmark
&Point
= Points_
[P
];
193 const unsigned Opcode
= Point
.keyInstruction().getOpcode();
194 assert(Opcode
< NumOpcodes
&& "NumOpcodes is incorrect (too small)");
195 llvm::SmallVectorImpl
<size_t> &PointsOfOpcode
= OpcodeToPoints
[Opcode
];
196 if (PointsOfOpcode
.empty()) // If we previously have not seen any points of
197 ++NumOpcodesSeen
; // this opcode, then naturally this is the new opcode.
198 PointsOfOpcode
.emplace_back(P
);
200 assert(OpcodeToPoints
.size() == NumOpcodes
&& "sanity check");
201 assert(NumOpcodesSeen
<= NumOpcodes
&&
202 "can't see more opcodes than there are total opcodes");
203 assert(NumOpcodesSeen
<= Points_
.size() &&
204 "can't see more opcodes than there are total points");
206 Clusters_
.reserve(NumOpcodesSeen
); // One cluster per opcode.
207 for (ArrayRef
<size_t> PointsOfOpcode
: llvm::make_filter_range(
208 OpcodeToPoints
, [](ArrayRef
<size_t> PointsOfOpcode
) {
209 return !PointsOfOpcode
.empty(); // Ignore opcodes with no points.
211 // Create a new cluster.
212 Clusters_
.emplace_back(ClusterId::makeValid(
213 Clusters_
.size(), /*IsUnstable=*/!areAllNeighbours(PointsOfOpcode
)));
214 Cluster
&CurrentCluster
= Clusters_
.back();
215 // Mark points as belonging to the new cluster.
216 llvm::for_each(PointsOfOpcode
, [this, &CurrentCluster
](size_t P
) {
217 ClusterIdForPoint_
[P
] = CurrentCluster
.Id
;
219 // And add all the points of this opcode to the new cluster.
220 CurrentCluster
.PointIndices
.reserve(PointsOfOpcode
.size());
221 CurrentCluster
.PointIndices
.assign(PointsOfOpcode
.begin(),
222 PointsOfOpcode
.end());
223 assert(CurrentCluster
.PointIndices
.size() == PointsOfOpcode
.size());
225 assert(Clusters_
.size() == NumOpcodesSeen
);
228 // Given an instruction Opcode, we can make benchmarks (measurements) of the
229 // instruction characteristics/performance. Then, to facilitate further analysis
230 // we group the benchmarks with *similar* characteristics into clusters.
231 // Now, this is all not entirely deterministic. Some instructions have variable
232 // characteristics, depending on their arguments. And thus, if we do several
233 // benchmarks of the same instruction Opcode, we may end up with *different*
234 // performance characteristics measurements. And when we then do clustering,
235 // these several benchmarks of the same instruction Opcode may end up being
236 // clustered into *different* clusters. This is not great for further analysis.
237 // We shall find every opcode with benchmarks not in just one cluster, and move
238 // *all* the benchmarks of said Opcode into one new unstable cluster per Opcode.
239 void InstructionBenchmarkClustering::stabilize(unsigned NumOpcodes
) {
240 // Given an instruction Opcode, in which clusters do benchmarks of this
241 // instruction lie? Normally, they all should be in the same cluster.
242 std::vector
<llvm::SmallSet
<ClusterId
, 1>> OpcodeToClusterIDs
;
243 OpcodeToClusterIDs
.resize(NumOpcodes
);
244 // The list of opcodes that have more than one cluster.
245 llvm::SetVector
<size_t> UnstableOpcodes
;
246 // Populate OpcodeToClusterIDs and UnstableOpcodes data structures.
247 assert(ClusterIdForPoint_
.size() == Points_
.size() && "size mismatch");
248 for (const auto &Point
: zip(Points_
, ClusterIdForPoint_
)) {
249 const ClusterId
&ClusterIdOfPoint
= std::get
<1>(Point
);
250 if (!ClusterIdOfPoint
.isValid())
251 continue; // Only process fully valid clusters.
252 const unsigned Opcode
= std::get
<0>(Point
).keyInstruction().getOpcode();
253 assert(Opcode
< NumOpcodes
&& "NumOpcodes is incorrect (too small)");
254 llvm::SmallSet
<ClusterId
, 1> &ClusterIDsOfOpcode
=
255 OpcodeToClusterIDs
[Opcode
];
256 ClusterIDsOfOpcode
.insert(ClusterIdOfPoint
);
257 // Is there more than one ClusterID for this opcode?.
258 if (ClusterIDsOfOpcode
.size() < 2)
259 continue; // If not, then at this moment this Opcode is stable.
260 // Else let's record this unstable opcode for future use.
261 UnstableOpcodes
.insert(Opcode
);
263 assert(OpcodeToClusterIDs
.size() == NumOpcodes
&& "sanity check");
265 // We know with how many [new] clusters we will end up with.
266 const auto NewTotalClusterCount
= Clusters_
.size() + UnstableOpcodes
.size();
267 Clusters_
.reserve(NewTotalClusterCount
);
268 for (const size_t UnstableOpcode
: UnstableOpcodes
.getArrayRef()) {
269 const llvm::SmallSet
<ClusterId
, 1> &ClusterIDs
=
270 OpcodeToClusterIDs
[UnstableOpcode
];
271 assert(ClusterIDs
.size() > 1 &&
272 "Should only have Opcodes with more than one cluster.");
274 // Create a new unstable cluster, one per Opcode.
275 Clusters_
.emplace_back(ClusterId::makeValidUnstable(Clusters_
.size()));
276 Cluster
&UnstableCluster
= Clusters_
.back();
277 // We will find *at least* one point in each of these clusters.
278 UnstableCluster
.PointIndices
.reserve(ClusterIDs
.size());
280 // Go through every cluster which we recorded as containing benchmarks
281 // of this UnstableOpcode. NOTE: we only recorded valid clusters.
282 for (const ClusterId
&CID
: ClusterIDs
) {
283 assert(CID
.isValid() &&
284 "We only recorded valid clusters, not noise/error clusters.");
285 Cluster
&OldCluster
= Clusters_
[CID
.getId()]; // Valid clusters storage.
286 // Within each cluster, go through each point, and either move it to the
287 // new unstable cluster, or 'keep' it.
288 // In this case, we'll reshuffle OldCluster.PointIndices vector
289 // so that all the points that are *not* for UnstableOpcode are first,
290 // and the rest of the points is for the UnstableOpcode.
291 const auto it
= std::stable_partition(
292 OldCluster
.PointIndices
.begin(), OldCluster
.PointIndices
.end(),
293 [this, UnstableOpcode
](size_t P
) {
294 return Points_
[P
].keyInstruction().getOpcode() != UnstableOpcode
;
296 assert(std::distance(it
, OldCluster
.PointIndices
.end()) > 0 &&
297 "Should have found at least one bad point");
298 // Mark to-be-moved points as belonging to the new cluster.
299 std::for_each(it
, OldCluster
.PointIndices
.end(),
300 [this, &UnstableCluster
](size_t P
) {
301 ClusterIdForPoint_
[P
] = UnstableCluster
.Id
;
303 // Actually append to-be-moved points to the new cluster.
304 UnstableCluster
.PointIndices
.insert(UnstableCluster
.PointIndices
.end(),
305 it
, OldCluster
.PointIndices
.end());
306 // And finally, remove "to-be-moved" points form the old cluster.
307 OldCluster
.PointIndices
.erase(it
, OldCluster
.PointIndices
.end());
308 // Now, the old cluster may end up being empty, but let's just keep it
309 // in whatever state it ended up. Purging empty clusters isn't worth it.
311 assert(UnstableCluster
.PointIndices
.size() > 1 &&
312 "New unstable cluster should end up with more than one point.");
313 assert(UnstableCluster
.PointIndices
.size() >= ClusterIDs
.size() &&
314 "New unstable cluster should end up with no less points than there "
317 assert(Clusters_
.size() == NewTotalClusterCount
&& "sanity check");
320 llvm::Expected
<InstructionBenchmarkClustering
>
321 InstructionBenchmarkClustering::create(
322 const std::vector
<InstructionBenchmark
> &Points
, const ModeE Mode
,
323 const size_t DbscanMinPts
, const double AnalysisClusteringEpsilon
,
324 llvm::Optional
<unsigned> NumOpcodes
) {
325 InstructionBenchmarkClustering
Clustering(
326 Points
, AnalysisClusteringEpsilon
* AnalysisClusteringEpsilon
);
327 if (auto Error
= Clustering
.validateAndSetup()) {
328 return std::move(Error
);
330 if (Clustering
.ErrorCluster_
.PointIndices
.size() == Points
.size()) {
331 return Clustering
; // Nothing to cluster.
334 if (Mode
== ModeE::Dbscan
) {
335 Clustering
.clusterizeDbScan(DbscanMinPts
);
337 if (NumOpcodes
.hasValue())
338 Clustering
.stabilize(NumOpcodes
.getValue());
339 } else /*if(Mode == ModeE::Naive)*/ {
340 if (!NumOpcodes
.hasValue())
341 llvm::report_fatal_error(
342 "'naive' clustering mode requires opcode count to be specified");
343 Clustering
.clusterizeNaive(NumOpcodes
.getValue());
349 void SchedClassClusterCentroid::addPoint(ArrayRef
<BenchmarkMeasure
> Point
) {
350 if (Representative
.empty())
351 Representative
.resize(Point
.size());
352 assert(Representative
.size() == Point
.size() &&
353 "All points should have identical dimensions.");
355 for (const auto &I
: llvm::zip(Representative
, Point
))
356 std::get
<0>(I
).push(std::get
<1>(I
));
359 std::vector
<BenchmarkMeasure
> SchedClassClusterCentroid::getAsPoint() const {
360 std::vector
<BenchmarkMeasure
> ClusterCenterPoint(Representative
.size());
361 for (const auto &I
: llvm::zip(ClusterCenterPoint
, Representative
))
362 std::get
<0>(I
).PerInstructionValue
= std::get
<1>(I
).avg();
363 return ClusterCenterPoint
;
366 bool SchedClassClusterCentroid::validate(
367 InstructionBenchmark::ModeE Mode
) const {
368 size_t NumMeasurements
= Representative
.size();
370 case InstructionBenchmark::Latency
:
371 if (NumMeasurements
!= 1) {
373 << "invalid number of measurements in latency mode: expected 1, got "
374 << NumMeasurements
<< "\n";
378 case InstructionBenchmark::Uops
:
379 // Can have many measurements.
381 case InstructionBenchmark::InverseThroughput
:
382 if (NumMeasurements
!= 1) {
383 llvm::errs() << "invalid number of measurements in inverse throughput "
384 "mode: expected 1, got "
385 << NumMeasurements
<< "\n";
390 llvm_unreachable("unimplemented measurement matching mode");
394 return true; // All good.
397 } // namespace exegesis