[yaml2obj/obj2yaml] - Add support for .stack_sizes sections.
[llvm-complete.git] / tools / llvm-exegesis / lib / Clustering.cpp
blob398bbf776af743606f759e530aa8a69820db6da6
1 //===-- Clustering.cpp ------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "Clustering.h"
10 #include "llvm/ADT/SetVector.h"
11 #include "llvm/ADT/SmallSet.h"
12 #include "llvm/ADT/SmallVector.h"
13 #include <algorithm>
14 #include <string>
15 #include <vector>
17 namespace llvm {
18 namespace exegesis {
20 // The clustering problem has the following characteristics:
21 // (A) - Low dimension (dimensions are typically proc resource units,
22 // typically < 10).
23 // (B) - Number of points : ~thousands (points are measurements of an MCInst)
24 // (C) - Number of clusters: ~tens.
25 // (D) - The number of clusters is not known /a priory/.
26 // (E) - The amount of noise is relatively small.
27 // The problem is rather small. In terms of algorithms, (D) disqualifies
28 // k-means and makes algorithms such as DBSCAN[1] or OPTICS[2] more applicable.
30 // We've used DBSCAN here because it's simple to implement. This is a pretty
31 // straightforward and inefficient implementation of the pseudocode in [2].
33 // [1] https://en.wikipedia.org/wiki/DBSCAN
34 // [2] https://en.wikipedia.org/wiki/OPTICS_algorithm
36 // Finds the points at distance less than sqrt(EpsilonSquared) of Q (not
37 // including Q).
38 void InstructionBenchmarkClustering::rangeQuery(
39 const size_t Q, std::vector<size_t> &Neighbors) const {
40 Neighbors.clear();
41 Neighbors.reserve(Points_.size() - 1); // The Q itself isn't a neighbor.
42 const auto &QMeasurements = Points_[Q].Measurements;
43 for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) {
44 if (P == Q)
45 continue;
46 const auto &PMeasurements = Points_[P].Measurements;
47 if (PMeasurements.empty()) // Error point.
48 continue;
49 if (isNeighbour(PMeasurements, QMeasurements,
50 AnalysisClusteringEpsilonSquared_)) {
51 Neighbors.push_back(P);
56 // Given a set of points, checks that all the points are neighbours
57 // up to AnalysisClusteringEpsilon. This is O(2*N).
58 bool InstructionBenchmarkClustering::areAllNeighbours(
59 ArrayRef<size_t> Pts) const {
60 // First, get the centroid of this group of points. This is O(N).
61 SchedClassClusterCentroid G;
62 llvm::for_each(Pts, [this, &G](size_t P) {
63 assert(P < Points_.size());
64 ArrayRef<BenchmarkMeasure> Measurements = Points_[P].Measurements;
65 if (Measurements.empty()) // Error point.
66 return;
67 G.addPoint(Measurements);
68 });
69 const std::vector<BenchmarkMeasure> Centroid = G.getAsPoint();
71 // Since we will be comparing with the centroid, we need to halve the epsilon.
72 double AnalysisClusteringEpsilonHalvedSquared =
73 AnalysisClusteringEpsilonSquared_ / 4.0;
75 // And now check that every point is a neighbour of the centroid. Also O(N).
76 return llvm::all_of(
77 Pts, [this, &Centroid, AnalysisClusteringEpsilonHalvedSquared](size_t P) {
78 assert(P < Points_.size());
79 const auto &PMeasurements = Points_[P].Measurements;
80 if (PMeasurements.empty()) // Error point.
81 return true; // Pretend that error point is a neighbour.
82 return isNeighbour(PMeasurements, Centroid,
83 AnalysisClusteringEpsilonHalvedSquared);
84 });
87 InstructionBenchmarkClustering::InstructionBenchmarkClustering(
88 const std::vector<InstructionBenchmark> &Points,
89 const double AnalysisClusteringEpsilonSquared)
90 : Points_(Points),
91 AnalysisClusteringEpsilonSquared_(AnalysisClusteringEpsilonSquared),
92 NoiseCluster_(ClusterId::noise()), ErrorCluster_(ClusterId::error()) {}
94 llvm::Error InstructionBenchmarkClustering::validateAndSetup() {
95 ClusterIdForPoint_.resize(Points_.size());
96 // Mark erroneous measurements out.
97 // All points must have the same number of dimensions, in the same order.
98 const std::vector<BenchmarkMeasure> *LastMeasurement = nullptr;
99 for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) {
100 const auto &Point = Points_[P];
101 if (!Point.Error.empty()) {
102 ClusterIdForPoint_[P] = ClusterId::error();
103 ErrorCluster_.PointIndices.push_back(P);
104 continue;
106 const auto *CurMeasurement = &Point.Measurements;
107 if (LastMeasurement) {
108 if (LastMeasurement->size() != CurMeasurement->size()) {
109 return llvm::make_error<llvm::StringError>(
110 "inconsistent measurement dimensions",
111 llvm::inconvertibleErrorCode());
113 for (size_t I = 0, E = LastMeasurement->size(); I < E; ++I) {
114 if (LastMeasurement->at(I).Key != CurMeasurement->at(I).Key) {
115 return llvm::make_error<llvm::StringError>(
116 "inconsistent measurement dimensions keys",
117 llvm::inconvertibleErrorCode());
121 LastMeasurement = CurMeasurement;
123 if (LastMeasurement) {
124 NumDimensions_ = LastMeasurement->size();
126 return llvm::Error::success();
129 void InstructionBenchmarkClustering::clusterizeDbScan(const size_t MinPts) {
130 std::vector<size_t> Neighbors; // Persistent buffer to avoid allocs.
131 for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) {
132 if (!ClusterIdForPoint_[P].isUndef())
133 continue; // Previously processed in inner loop.
134 rangeQuery(P, Neighbors);
135 if (Neighbors.size() + 1 < MinPts) { // Density check.
136 // The region around P is not dense enough to create a new cluster, mark
137 // as noise for now.
138 ClusterIdForPoint_[P] = ClusterId::noise();
139 continue;
142 // Create a new cluster, add P.
143 Clusters_.emplace_back(ClusterId::makeValid(Clusters_.size()));
144 Cluster &CurrentCluster = Clusters_.back();
145 ClusterIdForPoint_[P] = CurrentCluster.Id; /* Label initial point */
146 CurrentCluster.PointIndices.push_back(P);
148 // Process P's neighbors.
149 llvm::SetVector<size_t, std::deque<size_t>> ToProcess;
150 ToProcess.insert(Neighbors.begin(), Neighbors.end());
151 while (!ToProcess.empty()) {
152 // Retrieve a point from the set.
153 const size_t Q = *ToProcess.begin();
154 ToProcess.erase(ToProcess.begin());
156 if (ClusterIdForPoint_[Q].isNoise()) {
157 // Change noise point to border point.
158 ClusterIdForPoint_[Q] = CurrentCluster.Id;
159 CurrentCluster.PointIndices.push_back(Q);
160 continue;
162 if (!ClusterIdForPoint_[Q].isUndef()) {
163 continue; // Previously processed.
165 // Add Q to the current custer.
166 ClusterIdForPoint_[Q] = CurrentCluster.Id;
167 CurrentCluster.PointIndices.push_back(Q);
168 // And extend to the neighbors of Q if the region is dense enough.
169 rangeQuery(Q, Neighbors);
170 if (Neighbors.size() + 1 >= MinPts) {
171 ToProcess.insert(Neighbors.begin(), Neighbors.end());
175 // assert(Neighbors.capacity() == (Points_.size() - 1));
176 // ^ True, but it is not quaranteed to be true in all the cases.
178 // Add noisy points to noise cluster.
179 for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) {
180 if (ClusterIdForPoint_[P].isNoise()) {
181 NoiseCluster_.PointIndices.push_back(P);
186 void InstructionBenchmarkClustering::clusterizeNaive(unsigned NumOpcodes) {
187 // Given an instruction Opcode, which are the benchmarks of this instruction?
188 std::vector<llvm::SmallVector<size_t, 1>> OpcodeToPoints;
189 OpcodeToPoints.resize(NumOpcodes);
190 size_t NumOpcodesSeen = 0;
191 for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) {
192 const InstructionBenchmark &Point = Points_[P];
193 const unsigned Opcode = Point.keyInstruction().getOpcode();
194 assert(Opcode < NumOpcodes && "NumOpcodes is incorrect (too small)");
195 llvm::SmallVectorImpl<size_t> &PointsOfOpcode = OpcodeToPoints[Opcode];
196 if (PointsOfOpcode.empty()) // If we previously have not seen any points of
197 ++NumOpcodesSeen; // this opcode, then naturally this is the new opcode.
198 PointsOfOpcode.emplace_back(P);
200 assert(OpcodeToPoints.size() == NumOpcodes && "sanity check");
201 assert(NumOpcodesSeen <= NumOpcodes &&
202 "can't see more opcodes than there are total opcodes");
203 assert(NumOpcodesSeen <= Points_.size() &&
204 "can't see more opcodes than there are total points");
206 Clusters_.reserve(NumOpcodesSeen); // One cluster per opcode.
207 for (ArrayRef<size_t> PointsOfOpcode : llvm::make_filter_range(
208 OpcodeToPoints, [](ArrayRef<size_t> PointsOfOpcode) {
209 return !PointsOfOpcode.empty(); // Ignore opcodes with no points.
210 })) {
211 // Create a new cluster.
212 Clusters_.emplace_back(ClusterId::makeValid(
213 Clusters_.size(), /*IsUnstable=*/!areAllNeighbours(PointsOfOpcode)));
214 Cluster &CurrentCluster = Clusters_.back();
215 // Mark points as belonging to the new cluster.
216 llvm::for_each(PointsOfOpcode, [this, &CurrentCluster](size_t P) {
217 ClusterIdForPoint_[P] = CurrentCluster.Id;
219 // And add all the points of this opcode to the new cluster.
220 CurrentCluster.PointIndices.reserve(PointsOfOpcode.size());
221 CurrentCluster.PointIndices.assign(PointsOfOpcode.begin(),
222 PointsOfOpcode.end());
223 assert(CurrentCluster.PointIndices.size() == PointsOfOpcode.size());
225 assert(Clusters_.size() == NumOpcodesSeen);
228 // Given an instruction Opcode, we can make benchmarks (measurements) of the
229 // instruction characteristics/performance. Then, to facilitate further analysis
230 // we group the benchmarks with *similar* characteristics into clusters.
231 // Now, this is all not entirely deterministic. Some instructions have variable
232 // characteristics, depending on their arguments. And thus, if we do several
233 // benchmarks of the same instruction Opcode, we may end up with *different*
234 // performance characteristics measurements. And when we then do clustering,
235 // these several benchmarks of the same instruction Opcode may end up being
236 // clustered into *different* clusters. This is not great for further analysis.
237 // We shall find every opcode with benchmarks not in just one cluster, and move
238 // *all* the benchmarks of said Opcode into one new unstable cluster per Opcode.
239 void InstructionBenchmarkClustering::stabilize(unsigned NumOpcodes) {
240 // Given an instruction Opcode, in which clusters do benchmarks of this
241 // instruction lie? Normally, they all should be in the same cluster.
242 std::vector<llvm::SmallSet<ClusterId, 1>> OpcodeToClusterIDs;
243 OpcodeToClusterIDs.resize(NumOpcodes);
244 // The list of opcodes that have more than one cluster.
245 llvm::SetVector<size_t> UnstableOpcodes;
246 // Populate OpcodeToClusterIDs and UnstableOpcodes data structures.
247 assert(ClusterIdForPoint_.size() == Points_.size() && "size mismatch");
248 for (const auto &Point : zip(Points_, ClusterIdForPoint_)) {
249 const ClusterId &ClusterIdOfPoint = std::get<1>(Point);
250 if (!ClusterIdOfPoint.isValid())
251 continue; // Only process fully valid clusters.
252 const unsigned Opcode = std::get<0>(Point).keyInstruction().getOpcode();
253 assert(Opcode < NumOpcodes && "NumOpcodes is incorrect (too small)");
254 llvm::SmallSet<ClusterId, 1> &ClusterIDsOfOpcode =
255 OpcodeToClusterIDs[Opcode];
256 ClusterIDsOfOpcode.insert(ClusterIdOfPoint);
257 // Is there more than one ClusterID for this opcode?.
258 if (ClusterIDsOfOpcode.size() < 2)
259 continue; // If not, then at this moment this Opcode is stable.
260 // Else let's record this unstable opcode for future use.
261 UnstableOpcodes.insert(Opcode);
263 assert(OpcodeToClusterIDs.size() == NumOpcodes && "sanity check");
265 // We know with how many [new] clusters we will end up with.
266 const auto NewTotalClusterCount = Clusters_.size() + UnstableOpcodes.size();
267 Clusters_.reserve(NewTotalClusterCount);
268 for (const size_t UnstableOpcode : UnstableOpcodes.getArrayRef()) {
269 const llvm::SmallSet<ClusterId, 1> &ClusterIDs =
270 OpcodeToClusterIDs[UnstableOpcode];
271 assert(ClusterIDs.size() > 1 &&
272 "Should only have Opcodes with more than one cluster.");
274 // Create a new unstable cluster, one per Opcode.
275 Clusters_.emplace_back(ClusterId::makeValidUnstable(Clusters_.size()));
276 Cluster &UnstableCluster = Clusters_.back();
277 // We will find *at least* one point in each of these clusters.
278 UnstableCluster.PointIndices.reserve(ClusterIDs.size());
280 // Go through every cluster which we recorded as containing benchmarks
281 // of this UnstableOpcode. NOTE: we only recorded valid clusters.
282 for (const ClusterId &CID : ClusterIDs) {
283 assert(CID.isValid() &&
284 "We only recorded valid clusters, not noise/error clusters.");
285 Cluster &OldCluster = Clusters_[CID.getId()]; // Valid clusters storage.
286 // Within each cluster, go through each point, and either move it to the
287 // new unstable cluster, or 'keep' it.
288 // In this case, we'll reshuffle OldCluster.PointIndices vector
289 // so that all the points that are *not* for UnstableOpcode are first,
290 // and the rest of the points is for the UnstableOpcode.
291 const auto it = std::stable_partition(
292 OldCluster.PointIndices.begin(), OldCluster.PointIndices.end(),
293 [this, UnstableOpcode](size_t P) {
294 return Points_[P].keyInstruction().getOpcode() != UnstableOpcode;
296 assert(std::distance(it, OldCluster.PointIndices.end()) > 0 &&
297 "Should have found at least one bad point");
298 // Mark to-be-moved points as belonging to the new cluster.
299 std::for_each(it, OldCluster.PointIndices.end(),
300 [this, &UnstableCluster](size_t P) {
301 ClusterIdForPoint_[P] = UnstableCluster.Id;
303 // Actually append to-be-moved points to the new cluster.
304 UnstableCluster.PointIndices.insert(UnstableCluster.PointIndices.end(),
305 it, OldCluster.PointIndices.end());
306 // And finally, remove "to-be-moved" points form the old cluster.
307 OldCluster.PointIndices.erase(it, OldCluster.PointIndices.end());
308 // Now, the old cluster may end up being empty, but let's just keep it
309 // in whatever state it ended up. Purging empty clusters isn't worth it.
311 assert(UnstableCluster.PointIndices.size() > 1 &&
312 "New unstable cluster should end up with more than one point.");
313 assert(UnstableCluster.PointIndices.size() >= ClusterIDs.size() &&
314 "New unstable cluster should end up with no less points than there "
315 "was clusters");
317 assert(Clusters_.size() == NewTotalClusterCount && "sanity check");
320 llvm::Expected<InstructionBenchmarkClustering>
321 InstructionBenchmarkClustering::create(
322 const std::vector<InstructionBenchmark> &Points, const ModeE Mode,
323 const size_t DbscanMinPts, const double AnalysisClusteringEpsilon,
324 llvm::Optional<unsigned> NumOpcodes) {
325 InstructionBenchmarkClustering Clustering(
326 Points, AnalysisClusteringEpsilon * AnalysisClusteringEpsilon);
327 if (auto Error = Clustering.validateAndSetup()) {
328 return std::move(Error);
330 if (Clustering.ErrorCluster_.PointIndices.size() == Points.size()) {
331 return Clustering; // Nothing to cluster.
334 if (Mode == ModeE::Dbscan) {
335 Clustering.clusterizeDbScan(DbscanMinPts);
337 if (NumOpcodes.hasValue())
338 Clustering.stabilize(NumOpcodes.getValue());
339 } else /*if(Mode == ModeE::Naive)*/ {
340 if (!NumOpcodes.hasValue())
341 llvm::report_fatal_error(
342 "'naive' clustering mode requires opcode count to be specified");
343 Clustering.clusterizeNaive(NumOpcodes.getValue());
346 return Clustering;
349 void SchedClassClusterCentroid::addPoint(ArrayRef<BenchmarkMeasure> Point) {
350 if (Representative.empty())
351 Representative.resize(Point.size());
352 assert(Representative.size() == Point.size() &&
353 "All points should have identical dimensions.");
355 for (const auto &I : llvm::zip(Representative, Point))
356 std::get<0>(I).push(std::get<1>(I));
359 std::vector<BenchmarkMeasure> SchedClassClusterCentroid::getAsPoint() const {
360 std::vector<BenchmarkMeasure> ClusterCenterPoint(Representative.size());
361 for (const auto &I : llvm::zip(ClusterCenterPoint, Representative))
362 std::get<0>(I).PerInstructionValue = std::get<1>(I).avg();
363 return ClusterCenterPoint;
366 bool SchedClassClusterCentroid::validate(
367 InstructionBenchmark::ModeE Mode) const {
368 size_t NumMeasurements = Representative.size();
369 switch (Mode) {
370 case InstructionBenchmark::Latency:
371 if (NumMeasurements != 1) {
372 llvm::errs()
373 << "invalid number of measurements in latency mode: expected 1, got "
374 << NumMeasurements << "\n";
375 return false;
377 break;
378 case InstructionBenchmark::Uops:
379 // Can have many measurements.
380 break;
381 case InstructionBenchmark::InverseThroughput:
382 if (NumMeasurements != 1) {
383 llvm::errs() << "invalid number of measurements in inverse throughput "
384 "mode: expected 1, got "
385 << NumMeasurements << "\n";
386 return false;
388 break;
389 default:
390 llvm_unreachable("unimplemented measurement matching mode");
391 return false;
394 return true; // All good.
397 } // namespace exegesis
398 } // namespace llvm