1 // Copyright 2021 Google Inc. All rights reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 #ifndef BENCHMARK_PERF_COUNTERS_H
16 #define BENCHMARK_PERF_COUNTERS_H
24 #include "benchmark/benchmark.h"
29 #ifndef BENCHMARK_OS_WINDOWS
35 // C4251: <symbol> needs to have dll-interface to be used by clients of class
36 #pragma warning(disable : 4251)
42 // Typically, we can only read a small number of counters. There is also a
43 // padding preceding counter values, when reading multiple counters with one
44 // syscall (which is desirable). PerfCounterValues abstracts these details.
45 // The implementation ensures the storage is inlined, and allows 0-based
46 // indexing into the counter values.
47 // The object is used in conjunction with a PerfCounters object, by passing it
48 // to Snapshot(). The Read() method relocates individual reads, discarding
49 // the initial padding from each group leader in the values buffer such that
50 // all user accesses through the [] operator are correct.
51 class BENCHMARK_EXPORT PerfCounterValues
{
53 explicit PerfCounterValues(size_t nr_counters
) : nr_counters_(nr_counters
) {
54 BM_CHECK_LE(nr_counters_
, kMaxCounters
);
57 // We are reading correctly now so the values don't need to skip padding
58 uint64_t operator[](size_t pos
) const { return values_
[pos
]; }
60 // Increased the maximum to 32 only since the buffer
61 // is std::array<> backed
62 static constexpr size_t kMaxCounters
= 32;
65 friend class PerfCounters
;
66 // Get the byte buffer in which perf counters can be captured.
67 // This is used by PerfCounters::Read
68 std::pair
<char*, size_t> get_data_buffer() {
69 return {reinterpret_cast<char*>(values_
.data()),
70 sizeof(uint64_t) * (kPadding
+ nr_counters_
)};
73 // This reading is complex and as the goal of this class is to
74 // abstract away the intrincacies of the reading process, this is
75 // a better place for it
76 size_t Read(const std::vector
<int>& leaders
);
78 // Move the padding to 2 due to the reading algorithm (1st padding plus a
79 // current read padding)
80 static constexpr size_t kPadding
= 2;
81 std::array
<uint64_t, kPadding
+ kMaxCounters
> values_
;
82 const size_t nr_counters_
;
85 // Collect PMU counters. The object, once constructed, is ready to be used by
86 // calling read(). PMU counter collection is enabled from the time create() is
87 // called, to obtain the object, until the object's destructor is called.
88 class BENCHMARK_EXPORT PerfCounters final
{
90 // True iff this platform supports performance counters.
91 static const bool kSupported
;
93 // Returns an empty object
94 static PerfCounters
NoCounters() { return PerfCounters(); }
96 ~PerfCounters() { CloseCounters(); }
97 PerfCounters() = default;
98 PerfCounters(PerfCounters
&&) = default;
99 PerfCounters(const PerfCounters
&) = delete;
100 PerfCounters
& operator=(PerfCounters
&&) noexcept
;
101 PerfCounters
& operator=(const PerfCounters
&) = delete;
103 // Platform-specific implementations may choose to do some library
104 // initialization here.
105 static bool Initialize();
107 // Check if the given counter is supported, if the app wants to
108 // check before passing
109 static bool IsCounterSupported(const std::string
& name
);
111 // Return a PerfCounters object ready to read the counters with the names
112 // specified. The values are user-mode only. The counter name format is
113 // implementation and OS specific.
114 // In case of failure, this method will in the worst case return an
115 // empty object whose state will still be valid.
116 static PerfCounters
Create(const std::vector
<std::string
>& counter_names
);
118 // Take a snapshot of the current value of the counters into the provided
119 // valid PerfCounterValues storage. The values are populated such that:
120 // names()[i]'s value is (*values)[i]
121 BENCHMARK_ALWAYS_INLINE
bool Snapshot(PerfCounterValues
* values
) const {
122 #ifndef BENCHMARK_OS_WINDOWS
123 assert(values
!= nullptr);
124 return values
->Read(leader_ids_
) == counter_ids_
.size();
131 const std::vector
<std::string
>& names() const { return counter_names_
; }
132 size_t num_counters() const { return counter_names_
.size(); }
135 PerfCounters(const std::vector
<std::string
>& counter_names
,
136 std::vector
<int>&& counter_ids
, std::vector
<int>&& leader_ids
)
137 : counter_ids_(std::move(counter_ids
)),
138 leader_ids_(std::move(leader_ids
)),
139 counter_names_(counter_names
) {}
141 void CloseCounters() const;
143 std::vector
<int> counter_ids_
;
144 std::vector
<int> leader_ids_
;
145 std::vector
<std::string
> counter_names_
;
148 // Typical usage of the above primitives.
149 class BENCHMARK_EXPORT PerfCountersMeasurement final
{
151 PerfCountersMeasurement(const std::vector
<std::string
>& counter_names
);
153 size_t num_counters() const { return counters_
.num_counters(); }
155 std::vector
<std::string
> names() const { return counters_
.names(); }
157 BENCHMARK_ALWAYS_INLINE
bool Start() {
158 if (num_counters() == 0) return true;
159 // Tell the compiler to not move instructions above/below where we take
162 valid_read_
&= counters_
.Snapshot(&start_values_
);
168 BENCHMARK_ALWAYS_INLINE
bool Stop(
169 std::vector
<std::pair
<std::string
, double>>& measurements
) {
170 if (num_counters() == 0) return true;
171 // Tell the compiler to not move instructions above/below where we take
174 valid_read_
&= counters_
.Snapshot(&end_values_
);
177 for (size_t i
= 0; i
< counters_
.names().size(); ++i
) {
178 double measurement
= static_cast<double>(end_values_
[i
]) -
179 static_cast<double>(start_values_
[i
]);
180 measurements
.push_back({counters_
.names()[i
], measurement
});
187 PerfCounters counters_
;
188 bool valid_read_
= true;
189 PerfCounterValues start_values_
;
190 PerfCounterValues end_values_
;
193 } // namespace internal
194 } // namespace benchmark
196 #if defined(_MSC_VER)
200 #endif // BENCHMARK_PERF_COUNTERS_H