1 // Copyright 2021 Google Inc. All rights reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 #include "perf_counters.h"
21 #if defined HAVE_LIBPFM
22 #include "perfmon/pfmlib.h"
23 #include "perfmon/pfmlib_perf_event.h"
29 constexpr size_t PerfCounterValues::kMaxCounters
;
31 #if defined HAVE_LIBPFM
33 size_t PerfCounterValues::Read(const std::vector
<int>& leaders
) {
34 // Create a pointer for multiple reads
35 const size_t bufsize
= values_
.size() * sizeof(values_
[0]);
36 char* ptr
= reinterpret_cast<char*>(values_
.data());
37 size_t size
= bufsize
;
38 for (int lead
: leaders
) {
39 auto read_bytes
= ::read(lead
, ptr
, size
);
40 if (read_bytes
>= ssize_t(sizeof(uint64_t))) {
41 // Actual data bytes are all bytes minus initial padding
42 std::size_t data_bytes
= read_bytes
- sizeof(uint64_t);
43 // This should be very cheap since it's in hot cache
44 std::memmove(ptr
, ptr
+ sizeof(uint64_t), data_bytes
);
45 // Increment our counters
50 GetErrorLogInstance() << "Error reading lead " << lead
<< " errno:" << err
51 << " " << ::strerror(err
) << "\n";
55 return (bufsize
- size
) / sizeof(uint64_t);
58 const bool PerfCounters::kSupported
= true;
60 // Initializes libpfm only on the first call. Returns whether that single
61 // initialization was successful.
62 bool PerfCounters::Initialize() {
63 // Function-scope static gets initialized only once on first call.
64 static const bool success
= []() {
65 return pfm_initialize() == PFM_SUCCESS
;
70 bool PerfCounters::IsCounterSupported(const std::string
& name
) {
72 perf_event_attr_t attr
;
73 std::memset(&attr
, 0, sizeof(attr
));
74 pfm_perf_encode_arg_t arg
;
75 std::memset(&arg
, 0, sizeof(arg
));
77 const int mode
= PFM_PLM3
; // user mode only
78 int ret
= pfm_get_os_event_encoding(name
.c_str(), mode
, PFM_OS_PERF_EVENT_EXT
,
80 return (ret
== PFM_SUCCESS
);
83 PerfCounters
PerfCounters::Create(
84 const std::vector
<std::string
>& counter_names
) {
85 if (!counter_names
.empty()) {
89 // Valid counters will populate these arrays but we start empty
90 std::vector
<std::string
> valid_names
;
91 std::vector
<int> counter_ids
;
92 std::vector
<int> leader_ids
;
94 // Resize to the maximum possible
95 valid_names
.reserve(counter_names
.size());
96 counter_ids
.reserve(counter_names
.size());
98 const int kCounterMode
= PFM_PLM3
; // user mode only
100 // Group leads will be assigned on demand. The idea is that once we cannot
101 // create a counter descriptor, the reason is that this group has maxed out
102 // so we set the group_id again to -1 and retry - giving the algorithm a
103 // chance to create a new group leader to hold the next set of counters.
106 // Loop through all performance counters
107 for (size_t i
= 0; i
< counter_names
.size(); ++i
) {
108 // we are about to push into the valid names vector
109 // check if we did not reach the maximum
110 if (valid_names
.size() == PerfCounterValues::kMaxCounters
) {
111 // Log a message if we maxed out and stop adding
112 GetErrorLogInstance()
113 << counter_names
.size() << " counters were requested. The maximum is "
114 << PerfCounterValues::kMaxCounters
<< " and " << valid_names
.size()
115 << " were already added. All remaining counters will be ignored\n";
116 // stop the loop and return what we have already
120 // Check if this name is empty
121 const auto& name
= counter_names
[i
];
123 GetErrorLogInstance()
124 << "A performance counter name was the empty string\n";
128 // Here first means first in group, ie the group leader
129 const bool is_first
= (group_id
< 0);
131 // This struct will be populated by libpfm from the counter string
132 // and then fed into the syscall perf_event_open
133 struct perf_event_attr attr
{};
134 attr
.size
= sizeof(attr
);
136 // This is the input struct to libpfm.
137 pfm_perf_encode_arg_t arg
{};
139 const int pfm_get
= pfm_get_os_event_encoding(name
.c_str(), kCounterMode
,
140 PFM_OS_PERF_EVENT
, &arg
);
141 if (pfm_get
!= PFM_SUCCESS
) {
142 GetErrorLogInstance()
143 << "Unknown performance counter name: " << name
<< "\n";
147 // We then proceed to populate the remaining fields in our attribute struct
148 // Note: the man page for perf_event_create suggests inherit = true and
149 // read_format = PERF_FORMAT_GROUP don't work together, but that's not the
151 attr
.disabled
= is_first
;
153 attr
.pinned
= is_first
;
154 attr
.exclude_kernel
= true;
155 attr
.exclude_user
= false;
156 attr
.exclude_hv
= true;
158 // Read all counters in a group in one read.
159 attr
.read_format
= PERF_FORMAT_GROUP
;
163 static constexpr size_t kNrOfSyscallRetries
= 5;
164 // Retry syscall as it was interrupted often (b/64774091).
165 for (size_t num_retries
= 0; num_retries
< kNrOfSyscallRetries
;
167 id
= perf_event_open(&attr
, 0, -1, group_id
, 0);
168 if (id
>= 0 || errno
!= EINTR
) {
173 // If the file descriptor is negative we might have reached a limit
174 // in the current group. Set the group_id to -1 and retry
176 // Create a new group
179 // At this point we have already retried to set a new group id and
180 // failed. We then give up.
186 // We failed to get a new file descriptor. We might have reached a hard
187 // hardware limit that cannot be resolved even with group multiplexing
189 GetErrorLogInstance() << "***WARNING** Failed to get a file descriptor "
190 "for performance counter "
191 << name
<< ". Ignoring\n";
193 // We give up on this counter but try to keep going
194 // as the others would be fine
198 // This is a leader, store and assign it to the current file descriptor
199 leader_ids
.push_back(id
);
202 // This is a valid counter, add it to our descriptor's list
203 counter_ids
.push_back(id
);
204 valid_names
.push_back(name
);
207 // Loop through all group leaders activating them
208 // There is another option of starting ALL counters in a process but
209 // that would be far reaching an intrusion. If the user is using PMCs
210 // by themselves then this would have a side effect on them. It is
211 // friendlier to loop through all groups individually.
212 for (int lead
: leader_ids
) {
213 if (ioctl(lead
, PERF_EVENT_IOC_ENABLE
) != 0) {
214 // This should never happen but if it does, we give up on the
215 // entire batch as recovery would be a mess.
216 GetErrorLogInstance() << "***WARNING*** Failed to start counters. "
217 "Claring out all counters.\n";
219 // Close all peformance counters
220 for (int id
: counter_ids
) {
224 // Return an empty object so our internal state is still good and
225 // the process can continue normally without impact
230 return PerfCounters(std::move(valid_names
), std::move(counter_ids
),
231 std::move(leader_ids
));
234 void PerfCounters::CloseCounters() const {
235 if (counter_ids_
.empty()) {
238 for (int lead
: leader_ids_
) {
239 ioctl(lead
, PERF_EVENT_IOC_DISABLE
);
241 for (int fd
: counter_ids_
) {
245 #else // defined HAVE_LIBPFM
246 size_t PerfCounterValues::Read(const std::vector
<int>&) { return 0; }
248 const bool PerfCounters::kSupported
= false;
250 bool PerfCounters::Initialize() { return false; }
252 bool PerfCounters::IsCounterSupported(const std::string
&) { return false; }
254 PerfCounters
PerfCounters::Create(
255 const std::vector
<std::string
>& counter_names
) {
256 if (!counter_names
.empty()) {
257 GetErrorLogInstance() << "Performance counters not supported.\n";
262 void PerfCounters::CloseCounters() const {}
263 #endif // defined HAVE_LIBPFM
265 PerfCountersMeasurement::PerfCountersMeasurement(
266 const std::vector
<std::string
>& counter_names
)
267 : start_values_(counter_names
.size()), end_values_(counter_names
.size()) {
268 counters_
= PerfCounters::Create(counter_names
);
271 PerfCounters
& PerfCounters::operator=(PerfCounters
&& other
) noexcept
{
272 if (this != &other
) {
275 counter_ids_
= std::move(other
.counter_ids_
);
276 leader_ids_
= std::move(other
.leader_ids_
);
277 counter_names_
= std::move(other
.counter_names_
);
281 } // namespace internal
282 } // namespace benchmark