[MIParser] Set RegClassOrRegBank during instruction parsing
[llvm-complete.git] / tools / llvm-exegesis / lib / Clustering.cpp
blob7fb500e8399dbcda7d2ca3e249820f3a64ef00a5
1 //===-- Clustering.cpp ------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "Clustering.h"
10 #include "llvm/ADT/SetVector.h"
11 #include "llvm/ADT/SmallSet.h"
12 #include "llvm/ADT/SmallVector.h"
13 #include <algorithm>
14 #include <string>
15 #include <vector>
17 namespace llvm {
18 namespace exegesis {
20 // The clustering problem has the following characteristics:
21 // (A) - Low dimension (dimensions are typically proc resource units,
22 // typically < 10).
23 // (B) - Number of points : ~thousands (points are measurements of an MCInst)
24 // (C) - Number of clusters: ~tens.
25 // (D) - The number of clusters is not known /a priory/.
26 // (E) - The amount of noise is relatively small.
27 // The problem is rather small. In terms of algorithms, (D) disqualifies
28 // k-means and makes algorithms such as DBSCAN[1] or OPTICS[2] more applicable.
30 // We've used DBSCAN here because it's simple to implement. This is a pretty
31 // straightforward and inefficient implementation of the pseudocode in [2].
33 // [1] https://en.wikipedia.org/wiki/DBSCAN
34 // [2] https://en.wikipedia.org/wiki/OPTICS_algorithm
36 // Finds the points at distance less than sqrt(EpsilonSquared) of Q (not
37 // including Q).
38 void InstructionBenchmarkClustering::rangeQuery(
39 const size_t Q, std::vector<size_t> &Neighbors) const {
40 Neighbors.clear();
41 Neighbors.reserve(Points_.size() - 1); // The Q itself isn't a neighbor.
42 const auto &QMeasurements = Points_[Q].Measurements;
43 for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) {
44 if (P == Q)
45 continue;
46 const auto &PMeasurements = Points_[P].Measurements;
47 if (PMeasurements.empty()) // Error point.
48 continue;
49 if (isNeighbour(PMeasurements, QMeasurements,
50 AnalysisClusteringEpsilonSquared_)) {
51 Neighbors.push_back(P);
56 // Given a set of points, checks that all the points are neighbours
57 // up to AnalysisClusteringEpsilon. This is O(2*N).
58 bool InstructionBenchmarkClustering::areAllNeighbours(
59 ArrayRef<size_t> Pts) const {
60 // First, get the centroid of this group of points. This is O(N).
61 SchedClassClusterCentroid G;
62 for_each(Pts, [this, &G](size_t P) {
63 assert(P < Points_.size());
64 ArrayRef<BenchmarkMeasure> Measurements = Points_[P].Measurements;
65 if (Measurements.empty()) // Error point.
66 return;
67 G.addPoint(Measurements);
68 });
69 const std::vector<BenchmarkMeasure> Centroid = G.getAsPoint();
71 // Since we will be comparing with the centroid, we need to halve the epsilon.
72 double AnalysisClusteringEpsilonHalvedSquared =
73 AnalysisClusteringEpsilonSquared_ / 4.0;
75 // And now check that every point is a neighbour of the centroid. Also O(N).
76 return all_of(
77 Pts, [this, &Centroid, AnalysisClusteringEpsilonHalvedSquared](size_t P) {
78 assert(P < Points_.size());
79 const auto &PMeasurements = Points_[P].Measurements;
80 if (PMeasurements.empty()) // Error point.
81 return true; // Pretend that error point is a neighbour.
82 return isNeighbour(PMeasurements, Centroid,
83 AnalysisClusteringEpsilonHalvedSquared);
84 });
87 InstructionBenchmarkClustering::InstructionBenchmarkClustering(
88 const std::vector<InstructionBenchmark> &Points,
89 const double AnalysisClusteringEpsilonSquared)
90 : Points_(Points),
91 AnalysisClusteringEpsilonSquared_(AnalysisClusteringEpsilonSquared),
92 NoiseCluster_(ClusterId::noise()), ErrorCluster_(ClusterId::error()) {}
94 Error InstructionBenchmarkClustering::validateAndSetup() {
95 ClusterIdForPoint_.resize(Points_.size());
96 // Mark erroneous measurements out.
97 // All points must have the same number of dimensions, in the same order.
98 const std::vector<BenchmarkMeasure> *LastMeasurement = nullptr;
99 for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) {
100 const auto &Point = Points_[P];
101 if (!Point.Error.empty()) {
102 ClusterIdForPoint_[P] = ClusterId::error();
103 ErrorCluster_.PointIndices.push_back(P);
104 continue;
106 const auto *CurMeasurement = &Point.Measurements;
107 if (LastMeasurement) {
108 if (LastMeasurement->size() != CurMeasurement->size()) {
109 return make_error<StringError>("inconsistent measurement dimensions",
110 inconvertibleErrorCode());
112 for (size_t I = 0, E = LastMeasurement->size(); I < E; ++I) {
113 if (LastMeasurement->at(I).Key != CurMeasurement->at(I).Key) {
114 return make_error<StringError>(
115 "inconsistent measurement dimensions keys",
116 inconvertibleErrorCode());
120 LastMeasurement = CurMeasurement;
122 if (LastMeasurement) {
123 NumDimensions_ = LastMeasurement->size();
125 return Error::success();
128 void InstructionBenchmarkClustering::clusterizeDbScan(const size_t MinPts) {
129 std::vector<size_t> Neighbors; // Persistent buffer to avoid allocs.
130 for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) {
131 if (!ClusterIdForPoint_[P].isUndef())
132 continue; // Previously processed in inner loop.
133 rangeQuery(P, Neighbors);
134 if (Neighbors.size() + 1 < MinPts) { // Density check.
135 // The region around P is not dense enough to create a new cluster, mark
136 // as noise for now.
137 ClusterIdForPoint_[P] = ClusterId::noise();
138 continue;
141 // Create a new cluster, add P.
142 Clusters_.emplace_back(ClusterId::makeValid(Clusters_.size()));
143 Cluster &CurrentCluster = Clusters_.back();
144 ClusterIdForPoint_[P] = CurrentCluster.Id; /* Label initial point */
145 CurrentCluster.PointIndices.push_back(P);
147 // Process P's neighbors.
148 SetVector<size_t, std::deque<size_t>> ToProcess;
149 ToProcess.insert(Neighbors.begin(), Neighbors.end());
150 while (!ToProcess.empty()) {
151 // Retrieve a point from the set.
152 const size_t Q = *ToProcess.begin();
153 ToProcess.erase(ToProcess.begin());
155 if (ClusterIdForPoint_[Q].isNoise()) {
156 // Change noise point to border point.
157 ClusterIdForPoint_[Q] = CurrentCluster.Id;
158 CurrentCluster.PointIndices.push_back(Q);
159 continue;
161 if (!ClusterIdForPoint_[Q].isUndef()) {
162 continue; // Previously processed.
164 // Add Q to the current custer.
165 ClusterIdForPoint_[Q] = CurrentCluster.Id;
166 CurrentCluster.PointIndices.push_back(Q);
167 // And extend to the neighbors of Q if the region is dense enough.
168 rangeQuery(Q, Neighbors);
169 if (Neighbors.size() + 1 >= MinPts) {
170 ToProcess.insert(Neighbors.begin(), Neighbors.end());
174 // assert(Neighbors.capacity() == (Points_.size() - 1));
175 // ^ True, but it is not quaranteed to be true in all the cases.
177 // Add noisy points to noise cluster.
178 for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) {
179 if (ClusterIdForPoint_[P].isNoise()) {
180 NoiseCluster_.PointIndices.push_back(P);
185 void InstructionBenchmarkClustering::clusterizeNaive(unsigned NumOpcodes) {
186 // Given an instruction Opcode, which are the benchmarks of this instruction?
187 std::vector<SmallVector<size_t, 1>> OpcodeToPoints;
188 OpcodeToPoints.resize(NumOpcodes);
189 size_t NumOpcodesSeen = 0;
190 for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) {
191 const InstructionBenchmark &Point = Points_[P];
192 const unsigned Opcode = Point.keyInstruction().getOpcode();
193 assert(Opcode < NumOpcodes && "NumOpcodes is incorrect (too small)");
194 SmallVectorImpl<size_t> &PointsOfOpcode = OpcodeToPoints[Opcode];
195 if (PointsOfOpcode.empty()) // If we previously have not seen any points of
196 ++NumOpcodesSeen; // this opcode, then naturally this is the new opcode.
197 PointsOfOpcode.emplace_back(P);
199 assert(OpcodeToPoints.size() == NumOpcodes && "sanity check");
200 assert(NumOpcodesSeen <= NumOpcodes &&
201 "can't see more opcodes than there are total opcodes");
202 assert(NumOpcodesSeen <= Points_.size() &&
203 "can't see more opcodes than there are total points");
205 Clusters_.reserve(NumOpcodesSeen); // One cluster per opcode.
206 for (ArrayRef<size_t> PointsOfOpcode :
207 make_filter_range(OpcodeToPoints, [](ArrayRef<size_t> PointsOfOpcode) {
208 return !PointsOfOpcode.empty(); // Ignore opcodes with no points.
209 })) {
210 // Create a new cluster.
211 Clusters_.emplace_back(ClusterId::makeValid(
212 Clusters_.size(), /*IsUnstable=*/!areAllNeighbours(PointsOfOpcode)));
213 Cluster &CurrentCluster = Clusters_.back();
214 // Mark points as belonging to the new cluster.
215 for_each(PointsOfOpcode, [this, &CurrentCluster](size_t P) {
216 ClusterIdForPoint_[P] = CurrentCluster.Id;
218 // And add all the points of this opcode to the new cluster.
219 CurrentCluster.PointIndices.reserve(PointsOfOpcode.size());
220 CurrentCluster.PointIndices.assign(PointsOfOpcode.begin(),
221 PointsOfOpcode.end());
222 assert(CurrentCluster.PointIndices.size() == PointsOfOpcode.size());
224 assert(Clusters_.size() == NumOpcodesSeen);
227 // Given an instruction Opcode, we can make benchmarks (measurements) of the
228 // instruction characteristics/performance. Then, to facilitate further analysis
229 // we group the benchmarks with *similar* characteristics into clusters.
230 // Now, this is all not entirely deterministic. Some instructions have variable
231 // characteristics, depending on their arguments. And thus, if we do several
232 // benchmarks of the same instruction Opcode, we may end up with *different*
233 // performance characteristics measurements. And when we then do clustering,
234 // these several benchmarks of the same instruction Opcode may end up being
235 // clustered into *different* clusters. This is not great for further analysis.
236 // We shall find every opcode with benchmarks not in just one cluster, and move
237 // *all* the benchmarks of said Opcode into one new unstable cluster per Opcode.
238 void InstructionBenchmarkClustering::stabilize(unsigned NumOpcodes) {
239 // Given an instruction Opcode and Config, in which clusters do benchmarks of
240 // this instruction lie? Normally, they all should be in the same cluster.
241 struct OpcodeAndConfig {
242 explicit OpcodeAndConfig(const InstructionBenchmark &IB)
243 : Opcode(IB.keyInstruction().getOpcode()), Config(&IB.Key.Config) {}
244 unsigned Opcode;
245 const std::string *Config;
247 auto Tie() const -> auto { return std::tie(Opcode, *Config); }
249 bool operator<(const OpcodeAndConfig &O) const { return Tie() < O.Tie(); }
250 bool operator!=(const OpcodeAndConfig &O) const { return Tie() != O.Tie(); }
252 std::map<OpcodeAndConfig, SmallSet<ClusterId, 1>> OpcodeConfigToClusterIDs;
253 // Populate OpcodeConfigToClusterIDs and UnstableOpcodes data structures.
254 assert(ClusterIdForPoint_.size() == Points_.size() && "size mismatch");
255 for (const auto &Point : zip(Points_, ClusterIdForPoint_)) {
256 const ClusterId &ClusterIdOfPoint = std::get<1>(Point);
257 if (!ClusterIdOfPoint.isValid())
258 continue; // Only process fully valid clusters.
259 const OpcodeAndConfig Key(std::get<0>(Point));
260 SmallSet<ClusterId, 1> &ClusterIDsOfOpcode = OpcodeConfigToClusterIDs[Key];
261 ClusterIDsOfOpcode.insert(ClusterIdOfPoint);
264 for (const auto &OpcodeConfigToClusterID : OpcodeConfigToClusterIDs) {
265 const SmallSet<ClusterId, 1> &ClusterIDs = OpcodeConfigToClusterID.second;
266 const OpcodeAndConfig &Key = OpcodeConfigToClusterID.first;
267 // We only care about unstable instructions.
268 if (ClusterIDs.size() < 2)
269 continue;
271 // Create a new unstable cluster, one per Opcode.
272 Clusters_.emplace_back(ClusterId::makeValidUnstable(Clusters_.size()));
273 Cluster &UnstableCluster = Clusters_.back();
274 // We will find *at least* one point in each of these clusters.
275 UnstableCluster.PointIndices.reserve(ClusterIDs.size());
277 // Go through every cluster which we recorded as containing benchmarks
278 // of this UnstableOpcode. NOTE: we only recorded valid clusters.
279 for (const ClusterId &CID : ClusterIDs) {
280 assert(CID.isValid() &&
281 "We only recorded valid clusters, not noise/error clusters.");
282 Cluster &OldCluster = Clusters_[CID.getId()]; // Valid clusters storage.
283 // Within each cluster, go through each point, and either move it to the
284 // new unstable cluster, or 'keep' it.
285 // In this case, we'll reshuffle OldCluster.PointIndices vector
286 // so that all the points that are *not* for UnstableOpcode are first,
287 // and the rest of the points is for the UnstableOpcode.
288 const auto it = std::stable_partition(
289 OldCluster.PointIndices.begin(), OldCluster.PointIndices.end(),
290 [this, &Key](size_t P) {
291 return OpcodeAndConfig(Points_[P]) != Key;
293 assert(std::distance(it, OldCluster.PointIndices.end()) > 0 &&
294 "Should have found at least one bad point");
295 // Mark to-be-moved points as belonging to the new cluster.
296 std::for_each(it, OldCluster.PointIndices.end(),
297 [this, &UnstableCluster](size_t P) {
298 ClusterIdForPoint_[P] = UnstableCluster.Id;
300 // Actually append to-be-moved points to the new cluster.
301 UnstableCluster.PointIndices.insert(UnstableCluster.PointIndices.end(),
302 it, OldCluster.PointIndices.end());
303 // And finally, remove "to-be-moved" points form the old cluster.
304 OldCluster.PointIndices.erase(it, OldCluster.PointIndices.end());
305 // Now, the old cluster may end up being empty, but let's just keep it
306 // in whatever state it ended up. Purging empty clusters isn't worth it.
308 assert(UnstableCluster.PointIndices.size() > 1 &&
309 "New unstable cluster should end up with more than one point.");
310 assert(UnstableCluster.PointIndices.size() >= ClusterIDs.size() &&
311 "New unstable cluster should end up with no less points than there "
312 "was clusters");
316 Expected<InstructionBenchmarkClustering> InstructionBenchmarkClustering::create(
317 const std::vector<InstructionBenchmark> &Points, const ModeE Mode,
318 const size_t DbscanMinPts, const double AnalysisClusteringEpsilon,
319 Optional<unsigned> NumOpcodes) {
320 InstructionBenchmarkClustering Clustering(
321 Points, AnalysisClusteringEpsilon * AnalysisClusteringEpsilon);
322 if (auto Error = Clustering.validateAndSetup()) {
323 return std::move(Error);
325 if (Clustering.ErrorCluster_.PointIndices.size() == Points.size()) {
326 return Clustering; // Nothing to cluster.
329 if (Mode == ModeE::Dbscan) {
330 Clustering.clusterizeDbScan(DbscanMinPts);
332 if (NumOpcodes.hasValue())
333 Clustering.stabilize(NumOpcodes.getValue());
334 } else /*if(Mode == ModeE::Naive)*/ {
335 if (!NumOpcodes.hasValue())
336 report_fatal_error(
337 "'naive' clustering mode requires opcode count to be specified");
338 Clustering.clusterizeNaive(NumOpcodes.getValue());
341 return Clustering;
344 void SchedClassClusterCentroid::addPoint(ArrayRef<BenchmarkMeasure> Point) {
345 if (Representative.empty())
346 Representative.resize(Point.size());
347 assert(Representative.size() == Point.size() &&
348 "All points should have identical dimensions.");
350 for (const auto &I : zip(Representative, Point))
351 std::get<0>(I).push(std::get<1>(I));
354 std::vector<BenchmarkMeasure> SchedClassClusterCentroid::getAsPoint() const {
355 std::vector<BenchmarkMeasure> ClusterCenterPoint(Representative.size());
356 for (const auto &I : zip(ClusterCenterPoint, Representative))
357 std::get<0>(I).PerInstructionValue = std::get<1>(I).avg();
358 return ClusterCenterPoint;
361 bool SchedClassClusterCentroid::validate(
362 InstructionBenchmark::ModeE Mode) const {
363 size_t NumMeasurements = Representative.size();
364 switch (Mode) {
365 case InstructionBenchmark::Latency:
366 if (NumMeasurements != 1) {
367 errs()
368 << "invalid number of measurements in latency mode: expected 1, got "
369 << NumMeasurements << "\n";
370 return false;
372 break;
373 case InstructionBenchmark::Uops:
374 // Can have many measurements.
375 break;
376 case InstructionBenchmark::InverseThroughput:
377 if (NumMeasurements != 1) {
378 errs() << "invalid number of measurements in inverse throughput "
379 "mode: expected 1, got "
380 << NumMeasurements << "\n";
381 return false;
383 break;
384 default:
385 llvm_unreachable("unimplemented measurement matching mode");
386 return false;
389 return true; // All good.
392 } // namespace exegesis
393 } // namespace llvm