1 //===-- X86Counter.cpp ------------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "X86Counter.h"
11 #if defined(__linux__) && defined(HAVE_LIBPFM) && \
12 defined(LIBPFM_HAS_FIELD_CYCLES)
14 // FIXME: Use appropriate wrappers for poll.h and mman.h
15 // to support Windows and remove this linux-only guard.
17 #include "llvm/Support/Endian.h"
18 #include "llvm/Support/Errc.h"
20 #include <perfmon/perf_event.h>
21 #include <perfmon/pfmlib.h>
22 #include <perfmon/pfmlib_perf_event.h>
39 // Number of entries in the LBR.
40 static constexpr int kLbrEntries
= 16;
41 static constexpr size_t kBufferPages
= 8;
42 static const size_t kDataBufferSize
= kBufferPages
* getpagesize();
44 // First page is reserved for perf_event_mmap_page. Data buffer starts on
45 // the next page, so we allocate one more page.
46 static const size_t kMappedBufferSize
= (kBufferPages
+ 1) * getpagesize();
48 // Waits for the LBR perf events.
49 static int pollLbrPerfEvent(const int FileDescriptor
) {
51 PollFd
.fd
= FileDescriptor
;
52 PollFd
.events
= POLLIN
;
54 return poll(&PollFd
, 1 /* num of fds */, 10000 /* timeout in ms */);
57 // Copies the data-buffer into Buf, given the pointer to MMapped.
58 static void copyDataBuffer(void *MMappedBuffer
, char *Buf
, uint64_t Tail
,
60 // First page is reserved for perf_event_mmap_page. Data buffer starts on
62 char *Start
= reinterpret_cast<char *>(MMappedBuffer
) + getpagesize();
63 // The LBR buffer is a cyclic buffer, we copy data to another buffer.
64 uint64_t Offset
= Tail
% kDataBufferSize
;
65 size_t CopySize
= kDataBufferSize
- Offset
;
66 memcpy(Buf
, Start
+ Offset
, CopySize
);
67 if (CopySize
>= DataSize
)
70 memcpy(Buf
+ CopySize
, Start
, Offset
);
74 // Parses the given data-buffer for stats and fill the CycleArray.
75 // If data has been extracted successfully, also modifies the code to jump
76 // out the benchmark loop.
77 static llvm::Error
parseDataBuffer(const char *DataBuf
, size_t DataSize
,
78 const void *From
, const void *To
,
79 llvm::SmallVector
<int64_t, 4> *CycleArray
) {
80 const char *DataPtr
= DataBuf
;
81 while (DataPtr
< DataBuf
+ DataSize
) {
82 struct perf_event_header Header
;
83 memcpy(&Header
, DataPtr
, sizeof(struct perf_event_header
));
84 if (Header
.type
!= PERF_RECORD_SAMPLE
) {
85 // Ignores non-sample records.
86 DataPtr
+= Header
.size
;
89 DataPtr
+= sizeof(Header
);
90 uint64_t Count
= llvm::support::endian::read64(DataPtr
, support::native
);
91 DataPtr
+= sizeof(Count
);
93 struct perf_branch_entry Entry
;
94 memcpy(&Entry
, DataPtr
, sizeof(struct perf_branch_entry
));
96 // Read the perf_branch_entry array.
97 for (uint64_t i
= 0; i
< Count
; ++i
) {
98 const uint64_t BlockStart
= From
== nullptr
99 ? std::numeric_limits
<uint64_t>::min()
100 : reinterpret_cast<uint64_t>(From
);
101 const uint64_t BlockEnd
= To
== nullptr
102 ? std::numeric_limits
<uint64_t>::max()
103 : reinterpret_cast<uint64_t>(To
);
105 if (BlockStart
<= Entry
.from
&& BlockEnd
>= Entry
.to
)
106 CycleArray
->push_back(Entry
.cycles
);
109 // We've reached the last entry.
110 return llvm::Error::success();
112 // Advance to next entry
113 DataPtr
+= sizeof(Entry
);
114 memcpy(&Entry
, DataPtr
, sizeof(struct perf_branch_entry
));
117 return llvm::make_error
<llvm::StringError
>("Unable to parse databuffer.",
118 llvm::errc::io_error
);
121 X86LbrPerfEvent::X86LbrPerfEvent(unsigned SamplingPeriod
) {
122 assert(SamplingPeriod
> 0 && "SamplingPeriod must be positive");
123 EventString
= "BR_INST_RETIRED.NEAR_TAKEN";
124 Attr
= new perf_event_attr();
125 Attr
->size
= sizeof(*Attr
);
126 Attr
->type
= PERF_TYPE_RAW
;
127 // FIXME This is SKL's encoding. Not sure if it'll change.
128 Attr
->config
= 0x20c4; // BR_INST_RETIRED.NEAR_TAKEN
129 Attr
->sample_type
= PERF_SAMPLE_BRANCH_STACK
;
130 // Don't need to specify "USER" because we've already excluded HV and Kernel.
131 Attr
->branch_sample_type
= PERF_SAMPLE_BRANCH_ANY
;
132 Attr
->sample_period
= SamplingPeriod
;
133 Attr
->wakeup_events
= 1; // We need this even when using ioctl REFRESH.
135 Attr
->exclude_kernel
= 1;
136 Attr
->exclude_hv
= 1;
137 Attr
->read_format
= PERF_FORMAT_GROUP
;
139 FullQualifiedEventString
= EventString
;
142 X86LbrCounter::X86LbrCounter(pfm::PerfEvent
&&NewEvent
)
143 : Counter(std::move(NewEvent
)) {
144 MMappedBuffer
= mmap(nullptr, kMappedBufferSize
, PROT_READ
| PROT_WRITE
,
145 MAP_SHARED
, FileDescriptor
, 0);
146 if (MMappedBuffer
== MAP_FAILED
)
147 llvm::errs() << "Failed to mmap buffer.";
150 X86LbrCounter::~X86LbrCounter() {
151 if (0 != munmap(MMappedBuffer
, kMappedBufferSize
))
152 llvm::errs() << "Failed to munmap buffer.";
155 void X86LbrCounter::start() {
156 ioctl(FileDescriptor
, PERF_EVENT_IOC_REFRESH
, 1024 /* kMaxPollsPerFd */);
159 llvm::Error
X86LbrCounter::checkLbrSupport() {
160 // Do a sample read and check if the results contain non-zero values.
162 X86LbrCounter
counter(X86LbrPerfEvent(123));
165 // Prevent the compiler from unrolling the loop and get rid of all the
166 // branches. We need at least 16 iterations.
170 volatile int *P
= &V
;
172 std::chrono::high_resolution_clock::now() + std::chrono::microseconds(5);
175 I
< kLbrEntries
|| std::chrono::high_resolution_clock::now() < TimeLimit
;
183 auto ResultOrError
= counter
.doReadCounter(nullptr, nullptr);
185 if (!ResultOrError
.get().empty())
186 // If there is at least one non-zero entry, then LBR is supported.
187 for (const int64_t &Value
: ResultOrError
.get())
189 return Error::success();
191 return llvm::make_error
<llvm::StringError
>(
192 "LBR format with cycles is not suppported on the host.",
193 llvm::errc::not_supported
);
196 llvm::Expected
<llvm::SmallVector
<int64_t, 4>>
197 X86LbrCounter::readOrError(StringRef FunctionBytes
) const {
198 // Disable the event before reading
199 ioctl(FileDescriptor
, PERF_EVENT_IOC_DISABLE
, 0);
201 // Find the boundary of the function so that we could filter the LBRs
202 // to keep only the relevant records.
203 if (FunctionBytes
.empty())
204 return llvm::make_error
<llvm::StringError
>("Empty function bytes",
205 llvm::errc::invalid_argument
);
206 const void *From
= reinterpret_cast<const void *>(FunctionBytes
.data());
207 const void *To
= reinterpret_cast<const void *>(FunctionBytes
.data() +
208 FunctionBytes
.size());
209 return doReadCounter(From
, To
);
212 llvm::Expected
<llvm::SmallVector
<int64_t, 4>>
213 X86LbrCounter::doReadCounter(const void *From
, const void *To
) const {
214 // The max number of time-outs/retries before we give up.
215 static constexpr int kMaxTimeouts
= 160;
217 // Parses the LBR buffer and fills CycleArray with the sequence of cycle
218 // counts from the buffer.
219 llvm::SmallVector
<int64_t, 4> CycleArray
;
220 auto DataBuf
= std::make_unique
<char[]>(kDataBufferSize
);
224 while (PollResult
<= 0) {
225 PollResult
= pollLbrPerfEvent(FileDescriptor
);
228 if (PollResult
== -1)
229 return llvm::make_error
<llvm::StringError
>("Cannot poll LBR perf event.",
230 llvm::errc::io_error
);
231 if (NumTimeouts
++ >= kMaxTimeouts
)
232 return llvm::make_error
<llvm::StringError
>(
233 "LBR polling still timed out after max number of attempts.",
234 llvm::errc::device_or_resource_busy
);
237 struct perf_event_mmap_page Page
;
238 memcpy(&Page
, MMappedBuffer
, sizeof(struct perf_event_mmap_page
));
240 const uint64_t DataTail
= Page
.data_tail
;
241 const uint64_t DataHead
= Page
.data_head
;
242 // We're supposed to use a barrier after reading data_head.
243 std::atomic_thread_fence(std::memory_order_acq_rel
);
244 const size_t DataSize
= DataHead
- DataTail
;
245 if (DataSize
> kDataBufferSize
)
246 return llvm::make_error
<llvm::StringError
>(
247 "DataSize larger than buffer size.", llvm::errc::invalid_argument
);
249 copyDataBuffer(MMappedBuffer
, DataBuf
.get(), DataTail
, DataSize
);
251 parseDataBuffer(DataBuf
.get(), DataSize
, From
, To
, &CycleArray
);
254 return std::move(error
);
257 } // namespace exegesis
260 #endif // defined(__linux__) && defined(HAVE_LIBPFM) &&
261 // defined(LIBPFM_HAS_FIELD_CYCLES)