1 //===-- X86Counter.cpp ------------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "X86Counter.h"
11 #if defined(__linux__) && defined(HAVE_LIBPFM) && \
12 defined(LIBPFM_HAS_FIELD_CYCLES)
14 // FIXME: Use appropriate wrappers for poll.h and mman.h
15 // to support Windows and remove this linux-only guard.
17 #include "llvm/Support/Endian.h"
18 #include "llvm/Support/Errc.h"
20 #include <perfmon/perf_event.h>
21 #include <perfmon/pfmlib.h>
22 #include <perfmon/pfmlib_perf_event.h>
39 // Number of entries in the LBR.
40 static constexpr int kLbrEntries
= 16;
41 static constexpr size_t kBufferPages
= 8;
42 static const size_t kDataBufferSize
= kBufferPages
* getpagesize();
44 // First page is reserved for perf_event_mmap_page. Data buffer starts on
45 // the next page, so we allocate one more page.
46 static const size_t kMappedBufferSize
= (kBufferPages
+ 1) * getpagesize();
48 // Waits for the LBR perf events.
49 static int pollLbrPerfEvent(const int FileDescriptor
) {
51 PollFd
.fd
= FileDescriptor
;
52 PollFd
.events
= POLLIN
;
54 return poll(&PollFd
, 1 /* num of fds */, 10000 /* timeout in ms */);
57 // Copies the data-buffer into Buf, given the pointer to MMapped.
58 static void copyDataBuffer(void *MMappedBuffer
, char *Buf
, uint64_t Tail
,
60 // First page is reserved for perf_event_mmap_page. Data buffer starts on
62 char *Start
= reinterpret_cast<char *>(MMappedBuffer
) + getpagesize();
63 // The LBR buffer is a cyclic buffer, we copy data to another buffer.
64 uint64_t Offset
= Tail
% kDataBufferSize
;
65 size_t CopySize
= kDataBufferSize
- Offset
;
66 memcpy(Buf
, Start
+ Offset
, CopySize
);
67 if (CopySize
>= DataSize
)
70 memcpy(Buf
+ CopySize
, Start
, Offset
);
74 // Parses the given data-buffer for stats and fill the CycleArray.
75 // If data has been extracted successfully, also modifies the code to jump
76 // out the benchmark loop.
77 static llvm::Error
parseDataBuffer(const char *DataBuf
, size_t DataSize
,
78 const void *From
, const void *To
,
79 llvm::SmallVector
<int64_t, 4> *CycleArray
) {
80 const char *DataPtr
= DataBuf
;
81 while (DataPtr
< DataBuf
+ DataSize
) {
82 struct perf_event_header Header
;
83 memcpy(&Header
, DataPtr
, sizeof(struct perf_event_header
));
84 if (Header
.type
!= PERF_RECORD_SAMPLE
) {
85 // Ignores non-sample records.
86 DataPtr
+= Header
.size
;
89 DataPtr
+= sizeof(Header
);
91 llvm::support::endian::read64(DataPtr
, llvm::endianness::native
);
92 DataPtr
+= sizeof(Count
);
94 struct perf_branch_entry Entry
;
95 memcpy(&Entry
, DataPtr
, sizeof(struct perf_branch_entry
));
97 // Read the perf_branch_entry array.
98 for (uint64_t i
= 0; i
< Count
; ++i
) {
99 const uint64_t BlockStart
= From
== nullptr
100 ? std::numeric_limits
<uint64_t>::min()
101 : reinterpret_cast<uint64_t>(From
);
102 const uint64_t BlockEnd
= To
== nullptr
103 ? std::numeric_limits
<uint64_t>::max()
104 : reinterpret_cast<uint64_t>(To
);
106 if (BlockStart
<= Entry
.from
&& BlockEnd
>= Entry
.to
)
107 CycleArray
->push_back(Entry
.cycles
);
110 // We've reached the last entry.
111 return llvm::Error::success();
113 // Advance to next entry
114 DataPtr
+= sizeof(Entry
);
115 memcpy(&Entry
, DataPtr
, sizeof(struct perf_branch_entry
));
118 return llvm::make_error
<llvm::StringError
>("Unable to parse databuffer.",
119 llvm::errc::io_error
);
122 X86LbrPerfEvent::X86LbrPerfEvent(unsigned SamplingPeriod
) {
123 assert(SamplingPeriod
> 0 && "SamplingPeriod must be positive");
124 EventString
= "BR_INST_RETIRED.NEAR_TAKEN";
125 Attr
= new perf_event_attr();
126 Attr
->size
= sizeof(*Attr
);
127 Attr
->type
= PERF_TYPE_RAW
;
128 // FIXME This is SKL's encoding. Not sure if it'll change.
129 Attr
->config
= 0x20c4; // BR_INST_RETIRED.NEAR_TAKEN
130 Attr
->sample_type
= PERF_SAMPLE_BRANCH_STACK
;
131 // Don't need to specify "USER" because we've already excluded HV and Kernel.
132 Attr
->branch_sample_type
= PERF_SAMPLE_BRANCH_ANY
;
133 Attr
->sample_period
= SamplingPeriod
;
134 Attr
->wakeup_events
= 1; // We need this even when using ioctl REFRESH.
136 Attr
->exclude_kernel
= 1;
137 Attr
->exclude_hv
= 1;
138 Attr
->read_format
= PERF_FORMAT_GROUP
;
140 FullQualifiedEventString
= EventString
;
143 X86LbrCounter::X86LbrCounter(pfm::PerfEvent
&&NewEvent
)
144 : Counter(std::move(NewEvent
)) {
145 MMappedBuffer
= mmap(nullptr, kMappedBufferSize
, PROT_READ
| PROT_WRITE
,
146 MAP_SHARED
, FileDescriptor
, 0);
147 if (MMappedBuffer
== MAP_FAILED
)
148 llvm::errs() << "Failed to mmap buffer.";
151 X86LbrCounter::~X86LbrCounter() {
152 if (0 != munmap(MMappedBuffer
, kMappedBufferSize
))
153 llvm::errs() << "Failed to munmap buffer.";
156 void X86LbrCounter::start() {
157 ioctl(FileDescriptor
, PERF_EVENT_IOC_REFRESH
, 1024 /* kMaxPollsPerFd */);
160 llvm::Error
X86LbrCounter::checkLbrSupport() {
161 // Do a sample read and check if the results contain non-zero values.
163 X86LbrCounter
counter(X86LbrPerfEvent(123));
166 // Prevent the compiler from unrolling the loop and get rid of all the
167 // branches. We need at least 16 iterations.
171 volatile int *P
= &V
;
173 std::chrono::high_resolution_clock::now() + std::chrono::microseconds(5);
176 I
< kLbrEntries
|| std::chrono::high_resolution_clock::now() < TimeLimit
;
184 auto ResultOrError
= counter
.doReadCounter(nullptr, nullptr);
186 if (!ResultOrError
.get().empty())
187 // If there is at least one non-zero entry, then LBR is supported.
188 for (const int64_t &Value
: ResultOrError
.get())
190 return Error::success();
192 return llvm::make_error
<llvm::StringError
>(
193 "LBR format with cycles is not suppported on the host.",
194 llvm::errc::not_supported
);
197 llvm::Expected
<llvm::SmallVector
<int64_t, 4>>
198 X86LbrCounter::readOrError(StringRef FunctionBytes
) const {
199 // Disable the event before reading
200 ioctl(FileDescriptor
, PERF_EVENT_IOC_DISABLE
, 0);
202 // Find the boundary of the function so that we could filter the LBRs
203 // to keep only the relevant records.
204 if (FunctionBytes
.empty())
205 return llvm::make_error
<llvm::StringError
>("Empty function bytes",
206 llvm::errc::invalid_argument
);
207 const void *From
= reinterpret_cast<const void *>(FunctionBytes
.data());
208 const void *To
= reinterpret_cast<const void *>(FunctionBytes
.data() +
209 FunctionBytes
.size());
210 return doReadCounter(From
, To
);
213 llvm::Expected
<llvm::SmallVector
<int64_t, 4>>
214 X86LbrCounter::doReadCounter(const void *From
, const void *To
) const {
215 // The max number of time-outs/retries before we give up.
216 static constexpr int kMaxTimeouts
= 160;
218 // Parses the LBR buffer and fills CycleArray with the sequence of cycle
219 // counts from the buffer.
220 llvm::SmallVector
<int64_t, 4> CycleArray
;
221 auto DataBuf
= std::make_unique
<char[]>(kDataBufferSize
);
225 while (PollResult
<= 0) {
226 PollResult
= pollLbrPerfEvent(FileDescriptor
);
229 if (PollResult
== -1)
230 return llvm::make_error
<llvm::StringError
>("Cannot poll LBR perf event.",
231 llvm::errc::io_error
);
232 if (NumTimeouts
++ >= kMaxTimeouts
)
233 return llvm::make_error
<llvm::StringError
>(
234 "LBR polling still timed out after max number of attempts.",
235 llvm::errc::device_or_resource_busy
);
238 struct perf_event_mmap_page Page
;
239 memcpy(&Page
, MMappedBuffer
, sizeof(struct perf_event_mmap_page
));
241 const uint64_t DataTail
= Page
.data_tail
;
242 const uint64_t DataHead
= Page
.data_head
;
243 // We're supposed to use a barrier after reading data_head.
244 std::atomic_thread_fence(std::memory_order_acq_rel
);
245 const size_t DataSize
= DataHead
- DataTail
;
246 if (DataSize
> kDataBufferSize
)
247 return llvm::make_error
<llvm::StringError
>(
248 "DataSize larger than buffer size.", llvm::errc::invalid_argument
);
250 copyDataBuffer(MMappedBuffer
, DataBuf
.get(), DataTail
, DataSize
);
252 parseDataBuffer(DataBuf
.get(), DataSize
, From
, To
, &CycleArray
);
255 return std::move(error
);
258 } // namespace exegesis
261 #endif // defined(__linux__) && defined(HAVE_LIBPFM) &&
262 // defined(LIBPFM_HAS_FIELD_CYCLES)