1 //===-- Clustering.cpp ------------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "Clustering.h"
10 #include "llvm/ADT/SetVector.h"
11 #include "llvm/ADT/SmallSet.h"
12 #include "llvm/ADT/SmallVector.h"
20 // The clustering problem has the following characteristics:
21 // (A) - Low dimension (dimensions are typically proc resource units,
23 // (B) - Number of points : ~thousands (points are measurements of an MCInst)
24 // (C) - Number of clusters: ~tens.
25 // (D) - The number of clusters is not known /a priory/.
26 // (E) - The amount of noise is relatively small.
27 // The problem is rather small. In terms of algorithms, (D) disqualifies
28 // k-means and makes algorithms such as DBSCAN[1] or OPTICS[2] more applicable.
30 // We've used DBSCAN here because it's simple to implement. This is a pretty
31 // straightforward and inefficient implementation of the pseudocode in [2].
33 // [1] https://en.wikipedia.org/wiki/DBSCAN
34 // [2] https://en.wikipedia.org/wiki/OPTICS_algorithm
36 // Finds the points at distance less than sqrt(EpsilonSquared) of Q (not
38 void InstructionBenchmarkClustering::rangeQuery(
39 const size_t Q
, std::vector
<size_t> &Neighbors
) const {
41 Neighbors
.reserve(Points_
.size() - 1); // The Q itself isn't a neighbor.
42 const auto &QMeasurements
= Points_
[Q
].Measurements
;
43 for (size_t P
= 0, NumPoints
= Points_
.size(); P
< NumPoints
; ++P
) {
46 const auto &PMeasurements
= Points_
[P
].Measurements
;
47 if (PMeasurements
.empty()) // Error point.
49 if (isNeighbour(PMeasurements
, QMeasurements
,
50 AnalysisClusteringEpsilonSquared_
)) {
51 Neighbors
.push_back(P
);
56 // Given a set of points, checks that all the points are neighbours
57 // up to AnalysisClusteringEpsilon. This is O(2*N).
58 bool InstructionBenchmarkClustering::areAllNeighbours(
59 ArrayRef
<size_t> Pts
) const {
60 // First, get the centroid of this group of points. This is O(N).
61 SchedClassClusterCentroid G
;
62 for_each(Pts
, [this, &G
](size_t P
) {
63 assert(P
< Points_
.size());
64 ArrayRef
<BenchmarkMeasure
> Measurements
= Points_
[P
].Measurements
;
65 if (Measurements
.empty()) // Error point.
67 G
.addPoint(Measurements
);
69 const std::vector
<BenchmarkMeasure
> Centroid
= G
.getAsPoint();
71 // Since we will be comparing with the centroid, we need to halve the epsilon.
72 double AnalysisClusteringEpsilonHalvedSquared
=
73 AnalysisClusteringEpsilonSquared_
/ 4.0;
75 // And now check that every point is a neighbour of the centroid. Also O(N).
77 Pts
, [this, &Centroid
, AnalysisClusteringEpsilonHalvedSquared
](size_t P
) {
78 assert(P
< Points_
.size());
79 const auto &PMeasurements
= Points_
[P
].Measurements
;
80 if (PMeasurements
.empty()) // Error point.
81 return true; // Pretend that error point is a neighbour.
82 return isNeighbour(PMeasurements
, Centroid
,
83 AnalysisClusteringEpsilonHalvedSquared
);
87 InstructionBenchmarkClustering::InstructionBenchmarkClustering(
88 const std::vector
<InstructionBenchmark
> &Points
,
89 const double AnalysisClusteringEpsilonSquared
)
91 AnalysisClusteringEpsilonSquared_(AnalysisClusteringEpsilonSquared
),
92 NoiseCluster_(ClusterId::noise()), ErrorCluster_(ClusterId::error()) {}
94 Error
InstructionBenchmarkClustering::validateAndSetup() {
95 ClusterIdForPoint_
.resize(Points_
.size());
96 // Mark erroneous measurements out.
97 // All points must have the same number of dimensions, in the same order.
98 const std::vector
<BenchmarkMeasure
> *LastMeasurement
= nullptr;
99 for (size_t P
= 0, NumPoints
= Points_
.size(); P
< NumPoints
; ++P
) {
100 const auto &Point
= Points_
[P
];
101 if (!Point
.Error
.empty()) {
102 ClusterIdForPoint_
[P
] = ClusterId::error();
103 ErrorCluster_
.PointIndices
.push_back(P
);
106 const auto *CurMeasurement
= &Point
.Measurements
;
107 if (LastMeasurement
) {
108 if (LastMeasurement
->size() != CurMeasurement
->size()) {
109 return make_error
<StringError
>("inconsistent measurement dimensions",
110 inconvertibleErrorCode());
112 for (size_t I
= 0, E
= LastMeasurement
->size(); I
< E
; ++I
) {
113 if (LastMeasurement
->at(I
).Key
!= CurMeasurement
->at(I
).Key
) {
114 return make_error
<StringError
>(
115 "inconsistent measurement dimensions keys",
116 inconvertibleErrorCode());
120 LastMeasurement
= CurMeasurement
;
122 if (LastMeasurement
) {
123 NumDimensions_
= LastMeasurement
->size();
125 return Error::success();
128 void InstructionBenchmarkClustering::clusterizeDbScan(const size_t MinPts
) {
129 std::vector
<size_t> Neighbors
; // Persistent buffer to avoid allocs.
130 for (size_t P
= 0, NumPoints
= Points_
.size(); P
< NumPoints
; ++P
) {
131 if (!ClusterIdForPoint_
[P
].isUndef())
132 continue; // Previously processed in inner loop.
133 rangeQuery(P
, Neighbors
);
134 if (Neighbors
.size() + 1 < MinPts
) { // Density check.
135 // The region around P is not dense enough to create a new cluster, mark
137 ClusterIdForPoint_
[P
] = ClusterId::noise();
141 // Create a new cluster, add P.
142 Clusters_
.emplace_back(ClusterId::makeValid(Clusters_
.size()));
143 Cluster
&CurrentCluster
= Clusters_
.back();
144 ClusterIdForPoint_
[P
] = CurrentCluster
.Id
; /* Label initial point */
145 CurrentCluster
.PointIndices
.push_back(P
);
147 // Process P's neighbors.
148 SetVector
<size_t, std::deque
<size_t>> ToProcess
;
149 ToProcess
.insert(Neighbors
.begin(), Neighbors
.end());
150 while (!ToProcess
.empty()) {
151 // Retrieve a point from the set.
152 const size_t Q
= *ToProcess
.begin();
153 ToProcess
.erase(ToProcess
.begin());
155 if (ClusterIdForPoint_
[Q
].isNoise()) {
156 // Change noise point to border point.
157 ClusterIdForPoint_
[Q
] = CurrentCluster
.Id
;
158 CurrentCluster
.PointIndices
.push_back(Q
);
161 if (!ClusterIdForPoint_
[Q
].isUndef()) {
162 continue; // Previously processed.
164 // Add Q to the current custer.
165 ClusterIdForPoint_
[Q
] = CurrentCluster
.Id
;
166 CurrentCluster
.PointIndices
.push_back(Q
);
167 // And extend to the neighbors of Q if the region is dense enough.
168 rangeQuery(Q
, Neighbors
);
169 if (Neighbors
.size() + 1 >= MinPts
) {
170 ToProcess
.insert(Neighbors
.begin(), Neighbors
.end());
174 // assert(Neighbors.capacity() == (Points_.size() - 1));
175 // ^ True, but it is not quaranteed to be true in all the cases.
177 // Add noisy points to noise cluster.
178 for (size_t P
= 0, NumPoints
= Points_
.size(); P
< NumPoints
; ++P
) {
179 if (ClusterIdForPoint_
[P
].isNoise()) {
180 NoiseCluster_
.PointIndices
.push_back(P
);
185 void InstructionBenchmarkClustering::clusterizeNaive(unsigned NumOpcodes
) {
186 // Given an instruction Opcode, which are the benchmarks of this instruction?
187 std::vector
<SmallVector
<size_t, 1>> OpcodeToPoints
;
188 OpcodeToPoints
.resize(NumOpcodes
);
189 size_t NumOpcodesSeen
= 0;
190 for (size_t P
= 0, NumPoints
= Points_
.size(); P
< NumPoints
; ++P
) {
191 const InstructionBenchmark
&Point
= Points_
[P
];
192 const unsigned Opcode
= Point
.keyInstruction().getOpcode();
193 assert(Opcode
< NumOpcodes
&& "NumOpcodes is incorrect (too small)");
194 SmallVectorImpl
<size_t> &PointsOfOpcode
= OpcodeToPoints
[Opcode
];
195 if (PointsOfOpcode
.empty()) // If we previously have not seen any points of
196 ++NumOpcodesSeen
; // this opcode, then naturally this is the new opcode.
197 PointsOfOpcode
.emplace_back(P
);
199 assert(OpcodeToPoints
.size() == NumOpcodes
&& "sanity check");
200 assert(NumOpcodesSeen
<= NumOpcodes
&&
201 "can't see more opcodes than there are total opcodes");
202 assert(NumOpcodesSeen
<= Points_
.size() &&
203 "can't see more opcodes than there are total points");
205 Clusters_
.reserve(NumOpcodesSeen
); // One cluster per opcode.
206 for (ArrayRef
<size_t> PointsOfOpcode
:
207 make_filter_range(OpcodeToPoints
, [](ArrayRef
<size_t> PointsOfOpcode
) {
208 return !PointsOfOpcode
.empty(); // Ignore opcodes with no points.
210 // Create a new cluster.
211 Clusters_
.emplace_back(ClusterId::makeValid(
212 Clusters_
.size(), /*IsUnstable=*/!areAllNeighbours(PointsOfOpcode
)));
213 Cluster
&CurrentCluster
= Clusters_
.back();
214 // Mark points as belonging to the new cluster.
215 for_each(PointsOfOpcode
, [this, &CurrentCluster
](size_t P
) {
216 ClusterIdForPoint_
[P
] = CurrentCluster
.Id
;
218 // And add all the points of this opcode to the new cluster.
219 CurrentCluster
.PointIndices
.reserve(PointsOfOpcode
.size());
220 CurrentCluster
.PointIndices
.assign(PointsOfOpcode
.begin(),
221 PointsOfOpcode
.end());
222 assert(CurrentCluster
.PointIndices
.size() == PointsOfOpcode
.size());
224 assert(Clusters_
.size() == NumOpcodesSeen
);
227 // Given an instruction Opcode, we can make benchmarks (measurements) of the
228 // instruction characteristics/performance. Then, to facilitate further analysis
229 // we group the benchmarks with *similar* characteristics into clusters.
230 // Now, this is all not entirely deterministic. Some instructions have variable
231 // characteristics, depending on their arguments. And thus, if we do several
232 // benchmarks of the same instruction Opcode, we may end up with *different*
233 // performance characteristics measurements. And when we then do clustering,
234 // these several benchmarks of the same instruction Opcode may end up being
235 // clustered into *different* clusters. This is not great for further analysis.
236 // We shall find every opcode with benchmarks not in just one cluster, and move
237 // *all* the benchmarks of said Opcode into one new unstable cluster per Opcode.
238 void InstructionBenchmarkClustering::stabilize(unsigned NumOpcodes
) {
239 // Given an instruction Opcode and Config, in which clusters do benchmarks of
240 // this instruction lie? Normally, they all should be in the same cluster.
241 struct OpcodeAndConfig
{
242 explicit OpcodeAndConfig(const InstructionBenchmark
&IB
)
243 : Opcode(IB
.keyInstruction().getOpcode()), Config(&IB
.Key
.Config
) {}
245 const std::string
*Config
;
247 auto Tie() const -> auto { return std::tie(Opcode
, *Config
); }
249 bool operator<(const OpcodeAndConfig
&O
) const { return Tie() < O
.Tie(); }
250 bool operator!=(const OpcodeAndConfig
&O
) const { return Tie() != O
.Tie(); }
252 std::map
<OpcodeAndConfig
, SmallSet
<ClusterId
, 1>> OpcodeConfigToClusterIDs
;
253 // Populate OpcodeConfigToClusterIDs and UnstableOpcodes data structures.
254 assert(ClusterIdForPoint_
.size() == Points_
.size() && "size mismatch");
255 for (const auto &Point
: zip(Points_
, ClusterIdForPoint_
)) {
256 const ClusterId
&ClusterIdOfPoint
= std::get
<1>(Point
);
257 if (!ClusterIdOfPoint
.isValid())
258 continue; // Only process fully valid clusters.
259 const OpcodeAndConfig
Key(std::get
<0>(Point
));
260 SmallSet
<ClusterId
, 1> &ClusterIDsOfOpcode
= OpcodeConfigToClusterIDs
[Key
];
261 ClusterIDsOfOpcode
.insert(ClusterIdOfPoint
);
264 for (const auto &OpcodeConfigToClusterID
: OpcodeConfigToClusterIDs
) {
265 const SmallSet
<ClusterId
, 1> &ClusterIDs
= OpcodeConfigToClusterID
.second
;
266 const OpcodeAndConfig
&Key
= OpcodeConfigToClusterID
.first
;
267 // We only care about unstable instructions.
268 if (ClusterIDs
.size() < 2)
271 // Create a new unstable cluster, one per Opcode.
272 Clusters_
.emplace_back(ClusterId::makeValidUnstable(Clusters_
.size()));
273 Cluster
&UnstableCluster
= Clusters_
.back();
274 // We will find *at least* one point in each of these clusters.
275 UnstableCluster
.PointIndices
.reserve(ClusterIDs
.size());
277 // Go through every cluster which we recorded as containing benchmarks
278 // of this UnstableOpcode. NOTE: we only recorded valid clusters.
279 for (const ClusterId
&CID
: ClusterIDs
) {
280 assert(CID
.isValid() &&
281 "We only recorded valid clusters, not noise/error clusters.");
282 Cluster
&OldCluster
= Clusters_
[CID
.getId()]; // Valid clusters storage.
283 // Within each cluster, go through each point, and either move it to the
284 // new unstable cluster, or 'keep' it.
285 // In this case, we'll reshuffle OldCluster.PointIndices vector
286 // so that all the points that are *not* for UnstableOpcode are first,
287 // and the rest of the points is for the UnstableOpcode.
288 const auto it
= std::stable_partition(
289 OldCluster
.PointIndices
.begin(), OldCluster
.PointIndices
.end(),
290 [this, &Key
](size_t P
) {
291 return OpcodeAndConfig(Points_
[P
]) != Key
;
293 assert(std::distance(it
, OldCluster
.PointIndices
.end()) > 0 &&
294 "Should have found at least one bad point");
295 // Mark to-be-moved points as belonging to the new cluster.
296 std::for_each(it
, OldCluster
.PointIndices
.end(),
297 [this, &UnstableCluster
](size_t P
) {
298 ClusterIdForPoint_
[P
] = UnstableCluster
.Id
;
300 // Actually append to-be-moved points to the new cluster.
301 UnstableCluster
.PointIndices
.insert(UnstableCluster
.PointIndices
.end(),
302 it
, OldCluster
.PointIndices
.end());
303 // And finally, remove "to-be-moved" points form the old cluster.
304 OldCluster
.PointIndices
.erase(it
, OldCluster
.PointIndices
.end());
305 // Now, the old cluster may end up being empty, but let's just keep it
306 // in whatever state it ended up. Purging empty clusters isn't worth it.
308 assert(UnstableCluster
.PointIndices
.size() > 1 &&
309 "New unstable cluster should end up with more than one point.");
310 assert(UnstableCluster
.PointIndices
.size() >= ClusterIDs
.size() &&
311 "New unstable cluster should end up with no less points than there "
316 Expected
<InstructionBenchmarkClustering
> InstructionBenchmarkClustering::create(
317 const std::vector
<InstructionBenchmark
> &Points
, const ModeE Mode
,
318 const size_t DbscanMinPts
, const double AnalysisClusteringEpsilon
,
319 Optional
<unsigned> NumOpcodes
) {
320 InstructionBenchmarkClustering
Clustering(
321 Points
, AnalysisClusteringEpsilon
* AnalysisClusteringEpsilon
);
322 if (auto Error
= Clustering
.validateAndSetup()) {
323 return std::move(Error
);
325 if (Clustering
.ErrorCluster_
.PointIndices
.size() == Points
.size()) {
326 return Clustering
; // Nothing to cluster.
329 if (Mode
== ModeE::Dbscan
) {
330 Clustering
.clusterizeDbScan(DbscanMinPts
);
332 if (NumOpcodes
.hasValue())
333 Clustering
.stabilize(NumOpcodes
.getValue());
334 } else /*if(Mode == ModeE::Naive)*/ {
335 if (!NumOpcodes
.hasValue())
337 "'naive' clustering mode requires opcode count to be specified");
338 Clustering
.clusterizeNaive(NumOpcodes
.getValue());
344 void SchedClassClusterCentroid::addPoint(ArrayRef
<BenchmarkMeasure
> Point
) {
345 if (Representative
.empty())
346 Representative
.resize(Point
.size());
347 assert(Representative
.size() == Point
.size() &&
348 "All points should have identical dimensions.");
350 for (const auto &I
: zip(Representative
, Point
))
351 std::get
<0>(I
).push(std::get
<1>(I
));
354 std::vector
<BenchmarkMeasure
> SchedClassClusterCentroid::getAsPoint() const {
355 std::vector
<BenchmarkMeasure
> ClusterCenterPoint(Representative
.size());
356 for (const auto &I
: zip(ClusterCenterPoint
, Representative
))
357 std::get
<0>(I
).PerInstructionValue
= std::get
<1>(I
).avg();
358 return ClusterCenterPoint
;
361 bool SchedClassClusterCentroid::validate(
362 InstructionBenchmark::ModeE Mode
) const {
363 size_t NumMeasurements
= Representative
.size();
365 case InstructionBenchmark::Latency
:
366 if (NumMeasurements
!= 1) {
368 << "invalid number of measurements in latency mode: expected 1, got "
369 << NumMeasurements
<< "\n";
373 case InstructionBenchmark::Uops
:
374 // Can have many measurements.
376 case InstructionBenchmark::InverseThroughput
:
377 if (NumMeasurements
!= 1) {
378 errs() << "invalid number of measurements in inverse throughput "
379 "mode: expected 1, got "
380 << NumMeasurements
<< "\n";
385 llvm_unreachable("unimplemented measurement matching mode");
389 return true; // All good.
392 } // namespace exegesis