1 //===-- Clustering.h --------------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// Utilities to compute benchmark result clusters.
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
15 #define LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
17 #include "BenchmarkResult.h"
18 #include "llvm/ADT/Optional.h"
19 #include "llvm/Support/Error.h"
26 class InstructionBenchmarkClustering
{
28 enum ModeE
{ Dbscan
, Naive
};
30 // Clusters `Points` using DBSCAN with the given parameters. See the cc file
31 // for more explanations on the algorithm.
32 static Expected
<InstructionBenchmarkClustering
>
33 create(const std::vector
<InstructionBenchmark
> &Points
, ModeE Mode
,
34 size_t DbscanMinPts
, double AnalysisClusteringEpsilon
,
35 const MCSubtargetInfo
*SubtargetInfo
= nullptr,
36 const MCInstrInfo
*InstrInfo
= nullptr);
40 static ClusterId
noise() { return ClusterId(kNoise
); }
41 static ClusterId
error() { return ClusterId(kError
); }
42 static ClusterId
makeValid(size_t Id
, bool IsUnstable
= false) {
43 return ClusterId(Id
, IsUnstable
);
45 static ClusterId
makeValidUnstable(size_t Id
) {
46 return makeValid(Id
, /*IsUnstable=*/true);
49 ClusterId() : Id_(kUndef
), IsUnstable_(false) {}
51 // Compare id's, ignoring the 'unstability' bit.
52 bool operator==(const ClusterId
&O
) const { return Id_
== O
.Id_
; }
53 bool operator<(const ClusterId
&O
) const { return Id_
< O
.Id_
; }
55 bool isValid() const { return Id_
<= kMaxValid
; }
56 bool isUnstable() const { return IsUnstable_
; }
57 bool isNoise() const { return Id_
== kNoise
; }
58 bool isError() const { return Id_
== kError
; }
59 bool isUndef() const { return Id_
== kUndef
; }
61 // Precondition: isValid().
62 size_t getId() const {
68 ClusterId(size_t Id
, bool IsUnstable
= false)
69 : Id_(Id
), IsUnstable_(IsUnstable
) {}
71 static constexpr const size_t kMaxValid
=
72 (std::numeric_limits
<size_t>::max() >> 1) - 4;
73 static constexpr const size_t kNoise
= kMaxValid
+ 1;
74 static constexpr const size_t kError
= kMaxValid
+ 2;
75 static constexpr const size_t kUndef
= kMaxValid
+ 3;
77 size_t Id_
: (std::numeric_limits
<size_t>::digits
- 1);
78 size_t IsUnstable_
: 1;
80 static_assert(sizeof(ClusterId
) == sizeof(size_t), "should be a bit field.");
84 explicit Cluster(const ClusterId
&Id
) : Id(Id
) {}
87 // Indices of benchmarks within the cluster.
88 std::vector
<int> PointIndices
;
91 ClusterId
getClusterIdForPoint(size_t P
) const {
92 return ClusterIdForPoint_
[P
];
95 const std::vector
<InstructionBenchmark
> &getPoints() const { return Points_
; }
97 const Cluster
&getCluster(ClusterId Id
) const {
98 assert(!Id
.isUndef() && "unlabeled cluster");
100 return NoiseCluster_
;
103 return ErrorCluster_
;
105 return Clusters_
[Id
.getId()];
108 const std::vector
<Cluster
> &getValidClusters() const { return Clusters_
; }
110 // Returns true if the given point is within a distance Epsilon of each other.
111 bool isNeighbour(const std::vector
<BenchmarkMeasure
> &P
,
112 const std::vector
<BenchmarkMeasure
> &Q
,
113 const double EpsilonSquared_
) const {
114 double DistanceSquared
= 0.0;
115 for (size_t I
= 0, E
= P
.size(); I
< E
; ++I
) {
116 const auto Diff
= P
[I
].PerInstructionValue
- Q
[I
].PerInstructionValue
;
117 DistanceSquared
+= Diff
* Diff
;
119 return DistanceSquared
<= EpsilonSquared_
;
123 InstructionBenchmarkClustering(
124 const std::vector
<InstructionBenchmark
> &Points
,
125 double AnalysisClusteringEpsilonSquared
);
127 Error
validateAndSetup();
129 void clusterizeDbScan(size_t MinPts
);
130 void clusterizeNaive(const MCSubtargetInfo
&SubtargetInfo
,
131 const MCInstrInfo
&InstrInfo
);
133 // Stabilization is only needed if dbscan was used to clusterize.
134 void stabilize(unsigned NumOpcodes
);
136 void rangeQuery(size_t Q
, std::vector
<size_t> &Scratchpad
) const;
138 bool areAllNeighbours(ArrayRef
<size_t> Pts
) const;
140 const std::vector
<InstructionBenchmark
> &Points_
;
141 const double AnalysisClusteringEpsilonSquared_
;
143 int NumDimensions_
= 0;
144 // ClusterForPoint_[P] is the cluster id for Points[P].
145 std::vector
<ClusterId
> ClusterIdForPoint_
;
146 std::vector
<Cluster
> Clusters_
;
147 Cluster NoiseCluster_
;
148 Cluster ErrorCluster_
;
151 class SchedClassClusterCentroid
{
153 const std::vector
<PerInstructionStats
> &getStats() const {
154 return Representative
;
157 std::vector
<BenchmarkMeasure
> getAsPoint() const;
159 void addPoint(ArrayRef
<BenchmarkMeasure
> Point
);
161 bool validate(InstructionBenchmark::ModeE Mode
) const;
164 // Measurement stats for the points in the SchedClassCluster.
165 std::vector
<PerInstructionStats
> Representative
;
168 } // namespace exegesis
171 #endif // LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H