1 //===-- Clustering.cpp ------------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "Clustering.h"
11 #include "SchedClassResolution.h"
12 #include "llvm/ADT/MapVector.h"
13 #include "llvm/ADT/SetVector.h"
14 #include "llvm/ADT/SmallSet.h"
15 #include "llvm/ADT/SmallVector.h"
24 // The clustering problem has the following characteristics:
25 // (A) - Low dimension (dimensions are typically proc resource units,
27 // (B) - Number of points : ~thousands (points are measurements of an MCInst)
28 // (C) - Number of clusters: ~tens.
29 // (D) - The number of clusters is not known /a priory/.
30 // (E) - The amount of noise is relatively small.
31 // The problem is rather small. In terms of algorithms, (D) disqualifies
32 // k-means and makes algorithms such as DBSCAN[1] or OPTICS[2] more applicable.
34 // We've used DBSCAN here because it's simple to implement. This is a pretty
35 // straightforward and inefficient implementation of the pseudocode in [2].
37 // [1] https://en.wikipedia.org/wiki/DBSCAN
38 // [2] https://en.wikipedia.org/wiki/OPTICS_algorithm
40 // Finds the points at distance less than sqrt(EpsilonSquared) of Q (not
42 void InstructionBenchmarkClustering::rangeQuery(
43 const size_t Q
, std::vector
<size_t> &Neighbors
) const {
45 Neighbors
.reserve(Points_
.size() - 1); // The Q itself isn't a neighbor.
46 const auto &QMeasurements
= Points_
[Q
].Measurements
;
47 for (size_t P
= 0, NumPoints
= Points_
.size(); P
< NumPoints
; ++P
) {
50 const auto &PMeasurements
= Points_
[P
].Measurements
;
51 if (PMeasurements
.empty()) // Error point.
53 if (isNeighbour(PMeasurements
, QMeasurements
,
54 AnalysisClusteringEpsilonSquared_
)) {
55 Neighbors
.push_back(P
);
60 // Given a set of points, checks that all the points are neighbours
61 // up to AnalysisClusteringEpsilon. This is O(2*N).
62 bool InstructionBenchmarkClustering::areAllNeighbours(
63 ArrayRef
<size_t> Pts
) const {
64 // First, get the centroid of this group of points. This is O(N).
65 SchedClassClusterCentroid G
;
66 for_each(Pts
, [this, &G
](size_t P
) {
67 assert(P
< Points_
.size());
68 ArrayRef
<BenchmarkMeasure
> Measurements
= Points_
[P
].Measurements
;
69 if (Measurements
.empty()) // Error point.
71 G
.addPoint(Measurements
);
73 const std::vector
<BenchmarkMeasure
> Centroid
= G
.getAsPoint();
75 // Since we will be comparing with the centroid, we need to halve the epsilon.
76 double AnalysisClusteringEpsilonHalvedSquared
=
77 AnalysisClusteringEpsilonSquared_
/ 4.0;
79 // And now check that every point is a neighbour of the centroid. Also O(N).
81 Pts
, [this, &Centroid
, AnalysisClusteringEpsilonHalvedSquared
](size_t P
) {
82 assert(P
< Points_
.size());
83 const auto &PMeasurements
= Points_
[P
].Measurements
;
84 if (PMeasurements
.empty()) // Error point.
85 return true; // Pretend that error point is a neighbour.
86 return isNeighbour(PMeasurements
, Centroid
,
87 AnalysisClusteringEpsilonHalvedSquared
);
91 InstructionBenchmarkClustering::InstructionBenchmarkClustering(
92 const std::vector
<InstructionBenchmark
> &Points
,
93 const double AnalysisClusteringEpsilonSquared
)
95 AnalysisClusteringEpsilonSquared_(AnalysisClusteringEpsilonSquared
),
96 NoiseCluster_(ClusterId::noise()), ErrorCluster_(ClusterId::error()) {}
98 Error
InstructionBenchmarkClustering::validateAndSetup() {
99 ClusterIdForPoint_
.resize(Points_
.size());
100 // Mark erroneous measurements out.
101 // All points must have the same number of dimensions, in the same order.
102 const std::vector
<BenchmarkMeasure
> *LastMeasurement
= nullptr;
103 for (size_t P
= 0, NumPoints
= Points_
.size(); P
< NumPoints
; ++P
) {
104 const auto &Point
= Points_
[P
];
105 if (!Point
.Error
.empty()) {
106 ClusterIdForPoint_
[P
] = ClusterId::error();
107 ErrorCluster_
.PointIndices
.push_back(P
);
110 const auto *CurMeasurement
= &Point
.Measurements
;
111 if (LastMeasurement
) {
112 if (LastMeasurement
->size() != CurMeasurement
->size()) {
113 return make_error
<ClusteringError
>(
114 "inconsistent measurement dimensions");
116 for (size_t I
= 0, E
= LastMeasurement
->size(); I
< E
; ++I
) {
117 if (LastMeasurement
->at(I
).Key
!= CurMeasurement
->at(I
).Key
) {
118 return make_error
<ClusteringError
>(
119 "inconsistent measurement dimensions keys");
123 LastMeasurement
= CurMeasurement
;
125 if (LastMeasurement
) {
126 NumDimensions_
= LastMeasurement
->size();
128 return Error::success();
131 void InstructionBenchmarkClustering::clusterizeDbScan(const size_t MinPts
) {
132 std::vector
<size_t> Neighbors
; // Persistent buffer to avoid allocs.
133 for (size_t P
= 0, NumPoints
= Points_
.size(); P
< NumPoints
; ++P
) {
134 if (!ClusterIdForPoint_
[P
].isUndef())
135 continue; // Previously processed in inner loop.
136 rangeQuery(P
, Neighbors
);
137 if (Neighbors
.size() + 1 < MinPts
) { // Density check.
138 // The region around P is not dense enough to create a new cluster, mark
140 ClusterIdForPoint_
[P
] = ClusterId::noise();
144 // Create a new cluster, add P.
145 Clusters_
.emplace_back(ClusterId::makeValid(Clusters_
.size()));
146 Cluster
&CurrentCluster
= Clusters_
.back();
147 ClusterIdForPoint_
[P
] = CurrentCluster
.Id
; /* Label initial point */
148 CurrentCluster
.PointIndices
.push_back(P
);
150 // Process P's neighbors.
151 SetVector
<size_t, std::deque
<size_t>> ToProcess
;
152 ToProcess
.insert(Neighbors
.begin(), Neighbors
.end());
153 while (!ToProcess
.empty()) {
154 // Retrieve a point from the set.
155 const size_t Q
= *ToProcess
.begin();
156 ToProcess
.erase(ToProcess
.begin());
158 if (ClusterIdForPoint_
[Q
].isNoise()) {
159 // Change noise point to border point.
160 ClusterIdForPoint_
[Q
] = CurrentCluster
.Id
;
161 CurrentCluster
.PointIndices
.push_back(Q
);
164 if (!ClusterIdForPoint_
[Q
].isUndef()) {
165 continue; // Previously processed.
167 // Add Q to the current custer.
168 ClusterIdForPoint_
[Q
] = CurrentCluster
.Id
;
169 CurrentCluster
.PointIndices
.push_back(Q
);
170 // And extend to the neighbors of Q if the region is dense enough.
171 rangeQuery(Q
, Neighbors
);
172 if (Neighbors
.size() + 1 >= MinPts
) {
173 ToProcess
.insert(Neighbors
.begin(), Neighbors
.end());
177 // assert(Neighbors.capacity() == (Points_.size() - 1));
178 // ^ True, but it is not quaranteed to be true in all the cases.
180 // Add noisy points to noise cluster.
181 for (size_t P
= 0, NumPoints
= Points_
.size(); P
< NumPoints
; ++P
) {
182 if (ClusterIdForPoint_
[P
].isNoise()) {
183 NoiseCluster_
.PointIndices
.push_back(P
);
188 void InstructionBenchmarkClustering::clusterizeNaive(
189 const MCSubtargetInfo
&SubtargetInfo
, const MCInstrInfo
&InstrInfo
) {
190 // Given an instruction Opcode, which sched class id's are represented,
191 // and which are the benchmarks for each sched class?
192 std::vector
<SmallMapVector
<unsigned, SmallVector
<size_t, 1>, 1>>
193 OpcodeToSchedClassesToPoints
;
194 const unsigned NumOpcodes
= InstrInfo
.getNumOpcodes();
195 OpcodeToSchedClassesToPoints
.resize(NumOpcodes
);
196 size_t NumClusters
= 0;
197 for (size_t P
= 0, NumPoints
= Points_
.size(); P
< NumPoints
; ++P
) {
198 const InstructionBenchmark
&Point
= Points_
[P
];
199 const MCInst
&MCI
= Point
.keyInstruction();
200 unsigned SchedClassId
;
201 std::tie(SchedClassId
, std::ignore
) =
202 ResolvedSchedClass::resolveSchedClassId(SubtargetInfo
, InstrInfo
, MCI
);
203 const unsigned Opcode
= MCI
.getOpcode();
204 assert(Opcode
< NumOpcodes
&& "NumOpcodes is incorrect (too small)");
205 auto &Points
= OpcodeToSchedClassesToPoints
[Opcode
][SchedClassId
];
206 if (Points
.empty()) // If we previously have not seen any points of
207 ++NumClusters
; // this opcode's sched class, then new cluster begins.
208 Points
.emplace_back(P
);
210 assert(NumClusters
<= NumOpcodes
&&
211 "can't see more opcodes than there are total opcodes");
212 assert(NumClusters
<= Points_
.size() &&
213 "can't see more opcodes than there are total points");
215 Clusters_
.reserve(NumClusters
); // We already know how many clusters there is.
216 for (const auto &SchedClassesOfOpcode
: OpcodeToSchedClassesToPoints
) {
217 if (SchedClassesOfOpcode
.empty())
219 for (ArrayRef
<size_t> PointsOfSchedClass
:
220 make_second_range(SchedClassesOfOpcode
)) {
221 if (PointsOfSchedClass
.empty())
223 // Create a new cluster.
224 Clusters_
.emplace_back(ClusterId::makeValid(
226 /*IsUnstable=*/!areAllNeighbours(PointsOfSchedClass
)));
227 Cluster
&CurrentCluster
= Clusters_
.back();
228 // Mark points as belonging to the new cluster.
229 for_each(PointsOfSchedClass
, [this, &CurrentCluster
](size_t P
) {
230 ClusterIdForPoint_
[P
] = CurrentCluster
.Id
;
232 // And add all the points of this opcode's sched class to the new cluster.
233 CurrentCluster
.PointIndices
.reserve(PointsOfSchedClass
.size());
234 CurrentCluster
.PointIndices
.assign(PointsOfSchedClass
.begin(),
235 PointsOfSchedClass
.end());
236 assert(CurrentCluster
.PointIndices
.size() == PointsOfSchedClass
.size());
239 assert(Clusters_
.size() == NumClusters
);
242 // Given an instruction Opcode, we can make benchmarks (measurements) of the
243 // instruction characteristics/performance. Then, to facilitate further analysis
244 // we group the benchmarks with *similar* characteristics into clusters.
245 // Now, this is all not entirely deterministic. Some instructions have variable
246 // characteristics, depending on their arguments. And thus, if we do several
247 // benchmarks of the same instruction Opcode, we may end up with *different*
248 // performance characteristics measurements. And when we then do clustering,
249 // these several benchmarks of the same instruction Opcode may end up being
250 // clustered into *different* clusters. This is not great for further analysis.
251 // We shall find every opcode with benchmarks not in just one cluster, and move
252 // *all* the benchmarks of said Opcode into one new unstable cluster per Opcode.
253 void InstructionBenchmarkClustering::stabilize(unsigned NumOpcodes
) {
254 // Given an instruction Opcode and Config, in which clusters do benchmarks of
255 // this instruction lie? Normally, they all should be in the same cluster.
256 struct OpcodeAndConfig
{
257 explicit OpcodeAndConfig(const InstructionBenchmark
&IB
)
258 : Opcode(IB
.keyInstruction().getOpcode()), Config(&IB
.Key
.Config
) {}
260 const std::string
*Config
;
262 auto Tie() const -> auto { return std::tie(Opcode
, *Config
); }
264 bool operator<(const OpcodeAndConfig
&O
) const { return Tie() < O
.Tie(); }
265 bool operator!=(const OpcodeAndConfig
&O
) const { return Tie() != O
.Tie(); }
267 std::map
<OpcodeAndConfig
, SmallSet
<ClusterId
, 1>> OpcodeConfigToClusterIDs
;
268 // Populate OpcodeConfigToClusterIDs and UnstableOpcodes data structures.
269 assert(ClusterIdForPoint_
.size() == Points_
.size() && "size mismatch");
270 for (auto Point
: zip(Points_
, ClusterIdForPoint_
)) {
271 const ClusterId
&ClusterIdOfPoint
= std::get
<1>(Point
);
272 if (!ClusterIdOfPoint
.isValid())
273 continue; // Only process fully valid clusters.
274 const OpcodeAndConfig
Key(std::get
<0>(Point
));
275 SmallSet
<ClusterId
, 1> &ClusterIDsOfOpcode
= OpcodeConfigToClusterIDs
[Key
];
276 ClusterIDsOfOpcode
.insert(ClusterIdOfPoint
);
279 for (const auto &OpcodeConfigToClusterID
: OpcodeConfigToClusterIDs
) {
280 const SmallSet
<ClusterId
, 1> &ClusterIDs
= OpcodeConfigToClusterID
.second
;
281 const OpcodeAndConfig
&Key
= OpcodeConfigToClusterID
.first
;
282 // We only care about unstable instructions.
283 if (ClusterIDs
.size() < 2)
286 // Create a new unstable cluster, one per Opcode.
287 Clusters_
.emplace_back(ClusterId::makeValidUnstable(Clusters_
.size()));
288 Cluster
&UnstableCluster
= Clusters_
.back();
289 // We will find *at least* one point in each of these clusters.
290 UnstableCluster
.PointIndices
.reserve(ClusterIDs
.size());
292 // Go through every cluster which we recorded as containing benchmarks
293 // of this UnstableOpcode. NOTE: we only recorded valid clusters.
294 for (const ClusterId
&CID
: ClusterIDs
) {
295 assert(CID
.isValid() &&
296 "We only recorded valid clusters, not noise/error clusters.");
297 Cluster
&OldCluster
= Clusters_
[CID
.getId()]; // Valid clusters storage.
298 // Within each cluster, go through each point, and either move it to the
299 // new unstable cluster, or 'keep' it.
300 // In this case, we'll reshuffle OldCluster.PointIndices vector
301 // so that all the points that are *not* for UnstableOpcode are first,
302 // and the rest of the points is for the UnstableOpcode.
303 const auto it
= std::stable_partition(
304 OldCluster
.PointIndices
.begin(), OldCluster
.PointIndices
.end(),
305 [this, &Key
](size_t P
) {
306 return OpcodeAndConfig(Points_
[P
]) != Key
;
308 assert(std::distance(it
, OldCluster
.PointIndices
.end()) > 0 &&
309 "Should have found at least one bad point");
310 // Mark to-be-moved points as belonging to the new cluster.
311 std::for_each(it
, OldCluster
.PointIndices
.end(),
312 [this, &UnstableCluster
](size_t P
) {
313 ClusterIdForPoint_
[P
] = UnstableCluster
.Id
;
315 // Actually append to-be-moved points to the new cluster.
316 UnstableCluster
.PointIndices
.insert(UnstableCluster
.PointIndices
.end(),
317 it
, OldCluster
.PointIndices
.end());
318 // And finally, remove "to-be-moved" points form the old cluster.
319 OldCluster
.PointIndices
.erase(it
, OldCluster
.PointIndices
.end());
320 // Now, the old cluster may end up being empty, but let's just keep it
321 // in whatever state it ended up. Purging empty clusters isn't worth it.
323 assert(UnstableCluster
.PointIndices
.size() > 1 &&
324 "New unstable cluster should end up with more than one point.");
325 assert(UnstableCluster
.PointIndices
.size() >= ClusterIDs
.size() &&
326 "New unstable cluster should end up with no less points than there "
331 Expected
<InstructionBenchmarkClustering
> InstructionBenchmarkClustering::create(
332 const std::vector
<InstructionBenchmark
> &Points
, const ModeE Mode
,
333 const size_t DbscanMinPts
, const double AnalysisClusteringEpsilon
,
334 const MCSubtargetInfo
*SubtargetInfo
, const MCInstrInfo
*InstrInfo
) {
335 InstructionBenchmarkClustering
Clustering(
336 Points
, AnalysisClusteringEpsilon
* AnalysisClusteringEpsilon
);
337 if (auto Error
= Clustering
.validateAndSetup()) {
338 return std::move(Error
);
340 if (Clustering
.ErrorCluster_
.PointIndices
.size() == Points
.size()) {
341 return Clustering
; // Nothing to cluster.
344 if (Mode
== ModeE::Dbscan
) {
345 Clustering
.clusterizeDbScan(DbscanMinPts
);
348 Clustering
.stabilize(InstrInfo
->getNumOpcodes());
349 } else /*if(Mode == ModeE::Naive)*/ {
350 if (!SubtargetInfo
|| !InstrInfo
)
351 return make_error
<Failure
>("'naive' clustering mode requires "
352 "SubtargetInfo and InstrInfo to be present");
353 Clustering
.clusterizeNaive(*SubtargetInfo
, *InstrInfo
);
359 void SchedClassClusterCentroid::addPoint(ArrayRef
<BenchmarkMeasure
> Point
) {
360 if (Representative
.empty())
361 Representative
.resize(Point
.size());
362 assert(Representative
.size() == Point
.size() &&
363 "All points should have identical dimensions.");
365 for (auto I
: zip(Representative
, Point
))
366 std::get
<0>(I
).push(std::get
<1>(I
));
369 std::vector
<BenchmarkMeasure
> SchedClassClusterCentroid::getAsPoint() const {
370 std::vector
<BenchmarkMeasure
> ClusterCenterPoint(Representative
.size());
371 for (auto I
: zip(ClusterCenterPoint
, Representative
))
372 std::get
<0>(I
).PerInstructionValue
= std::get
<1>(I
).avg();
373 return ClusterCenterPoint
;
376 bool SchedClassClusterCentroid::validate(
377 InstructionBenchmark::ModeE Mode
) const {
378 size_t NumMeasurements
= Representative
.size();
380 case InstructionBenchmark::Latency
:
381 if (NumMeasurements
!= 1) {
383 << "invalid number of measurements in latency mode: expected 1, got "
384 << NumMeasurements
<< "\n";
388 case InstructionBenchmark::Uops
:
389 // Can have many measurements.
391 case InstructionBenchmark::InverseThroughput
:
392 if (NumMeasurements
!= 1) {
393 errs() << "invalid number of measurements in inverse throughput "
394 "mode: expected 1, got "
395 << NumMeasurements
<< "\n";
400 llvm_unreachable("unimplemented measurement matching mode");
404 return true; // All good.
407 } // namespace exegesis