[llvm] Stop including unordered_map (NFC)
[llvm-project.git] / llvm / tools / llvm-exegesis / lib / X86 / X86Counter.cpp
blob423c45e22bf8c04539eb187348421ce5c2e7efbe
1 //===-- X86Counter.cpp ------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "X86Counter.h"
11 #if defined(__linux__) && defined(HAVE_LIBPFM) && \
12 defined(LIBPFM_HAS_FIELD_CYCLES)
14 // FIXME: Use appropriate wrappers for poll.h and mman.h
15 // to support Windows and remove this linux-only guard.
17 #include "llvm/Support/Endian.h"
18 #include "llvm/Support/Errc.h"
20 #include <perfmon/perf_event.h>
21 #include <perfmon/pfmlib.h>
22 #include <perfmon/pfmlib_perf_event.h>
24 #include <atomic>
25 #include <chrono>
26 #include <cstddef>
27 #include <cstdint>
28 #include <limits>
29 #include <memory>
30 #include <vector>
32 #include <poll.h>
33 #include <sys/mman.h>
34 #include <unistd.h>
36 namespace llvm {
37 namespace exegesis {
39 // Number of entries in the LBR.
40 static constexpr int kLbrEntries = 16;
41 static constexpr size_t kBufferPages = 8;
42 static const size_t kDataBufferSize = kBufferPages * getpagesize();
44 // First page is reserved for perf_event_mmap_page. Data buffer starts on
45 // the next page, so we allocate one more page.
46 static const size_t kMappedBufferSize = (kBufferPages + 1) * getpagesize();
48 // Waits for the LBR perf events.
49 static int pollLbrPerfEvent(const int FileDescriptor) {
50 struct pollfd PollFd;
51 PollFd.fd = FileDescriptor;
52 PollFd.events = POLLIN;
53 PollFd.revents = 0;
54 return poll(&PollFd, 1 /* num of fds */, 10000 /* timeout in ms */);
57 // Copies the data-buffer into Buf, given the pointer to MMapped.
58 static void copyDataBuffer(void *MMappedBuffer, char *Buf, uint64_t Tail,
59 size_t DataSize) {
60 // First page is reserved for perf_event_mmap_page. Data buffer starts on
61 // the next page.
62 char *Start = reinterpret_cast<char *>(MMappedBuffer) + getpagesize();
63 // The LBR buffer is a cyclic buffer, we copy data to another buffer.
64 uint64_t Offset = Tail % kDataBufferSize;
65 size_t CopySize = kDataBufferSize - Offset;
66 memcpy(Buf, Start + Offset, CopySize);
67 if (CopySize >= DataSize)
68 return;
70 memcpy(Buf + CopySize, Start, Offset);
71 return;
74 // Parses the given data-buffer for stats and fill the CycleArray.
75 // If data has been extracted successfully, also modifies the code to jump
76 // out the benchmark loop.
77 static llvm::Error parseDataBuffer(const char *DataBuf, size_t DataSize,
78 const void *From, const void *To,
79 llvm::SmallVector<int64_t, 4> *CycleArray) {
80 const char *DataPtr = DataBuf;
81 while (DataPtr < DataBuf + DataSize) {
82 struct perf_event_header Header;
83 memcpy(&Header, DataPtr, sizeof(struct perf_event_header));
84 if (Header.type != PERF_RECORD_SAMPLE) {
85 // Ignores non-sample records.
86 DataPtr += Header.size;
87 continue;
89 DataPtr += sizeof(Header);
90 uint64_t Count =
91 llvm::support::endian::read64(DataPtr, llvm::endianness::native);
92 DataPtr += sizeof(Count);
94 struct perf_branch_entry Entry;
95 memcpy(&Entry, DataPtr, sizeof(struct perf_branch_entry));
97 // Read the perf_branch_entry array.
98 for (uint64_t i = 0; i < Count; ++i) {
99 const uint64_t BlockStart = From == nullptr
100 ? std::numeric_limits<uint64_t>::min()
101 : reinterpret_cast<uint64_t>(From);
102 const uint64_t BlockEnd = To == nullptr
103 ? std::numeric_limits<uint64_t>::max()
104 : reinterpret_cast<uint64_t>(To);
106 if (BlockStart <= Entry.from && BlockEnd >= Entry.to)
107 CycleArray->push_back(Entry.cycles);
109 if (i == Count - 1)
110 // We've reached the last entry.
111 return llvm::Error::success();
113 // Advance to next entry
114 DataPtr += sizeof(Entry);
115 memcpy(&Entry, DataPtr, sizeof(struct perf_branch_entry));
118 return llvm::make_error<llvm::StringError>("Unable to parse databuffer.",
119 llvm::errc::io_error);
122 X86LbrPerfEvent::X86LbrPerfEvent(unsigned SamplingPeriod) {
123 assert(SamplingPeriod > 0 && "SamplingPeriod must be positive");
124 EventString = "BR_INST_RETIRED.NEAR_TAKEN";
125 Attr = new perf_event_attr();
126 Attr->size = sizeof(*Attr);
127 Attr->type = PERF_TYPE_RAW;
128 // FIXME This is SKL's encoding. Not sure if it'll change.
129 Attr->config = 0x20c4; // BR_INST_RETIRED.NEAR_TAKEN
130 Attr->sample_type = PERF_SAMPLE_BRANCH_STACK;
131 // Don't need to specify "USER" because we've already excluded HV and Kernel.
132 Attr->branch_sample_type = PERF_SAMPLE_BRANCH_ANY;
133 Attr->sample_period = SamplingPeriod;
134 Attr->wakeup_events = 1; // We need this even when using ioctl REFRESH.
135 Attr->disabled = 1;
136 Attr->exclude_kernel = 1;
137 Attr->exclude_hv = 1;
138 Attr->read_format = PERF_FORMAT_GROUP;
140 FullQualifiedEventString = EventString;
143 X86LbrCounter::X86LbrCounter(pfm::PerfEvent &&NewEvent)
144 : Counter(std::move(NewEvent)) {
145 MMappedBuffer = mmap(nullptr, kMappedBufferSize, PROT_READ | PROT_WRITE,
146 MAP_SHARED, FileDescriptor, 0);
147 if (MMappedBuffer == MAP_FAILED)
148 llvm::errs() << "Failed to mmap buffer.";
151 X86LbrCounter::~X86LbrCounter() {
152 if (0 != munmap(MMappedBuffer, kMappedBufferSize))
153 llvm::errs() << "Failed to munmap buffer.";
156 void X86LbrCounter::start() {
157 ioctl(FileDescriptor, PERF_EVENT_IOC_REFRESH, 1024 /* kMaxPollsPerFd */);
160 llvm::Error X86LbrCounter::checkLbrSupport() {
161 // Do a sample read and check if the results contain non-zero values.
163 X86LbrCounter counter(X86LbrPerfEvent(123));
164 counter.start();
166 // Prevent the compiler from unrolling the loop and get rid of all the
167 // branches. We need at least 16 iterations.
168 int Sum = 0;
169 int V = 1;
171 volatile int *P = &V;
172 auto TimeLimit =
173 std::chrono::high_resolution_clock::now() + std::chrono::microseconds(5);
175 for (int I = 0;
176 I < kLbrEntries || std::chrono::high_resolution_clock::now() < TimeLimit;
177 ++I) {
178 Sum += *P;
181 counter.stop();
182 (void)Sum;
184 auto ResultOrError = counter.doReadCounter(nullptr, nullptr);
185 if (ResultOrError)
186 if (!ResultOrError.get().empty())
187 // If there is at least one non-zero entry, then LBR is supported.
188 for (const int64_t &Value : ResultOrError.get())
189 if (Value != 0)
190 return Error::success();
192 return llvm::make_error<llvm::StringError>(
193 "LBR format with cycles is not suppported on the host.",
194 llvm::errc::not_supported);
197 llvm::Expected<llvm::SmallVector<int64_t, 4>>
198 X86LbrCounter::readOrError(StringRef FunctionBytes) const {
199 // Disable the event before reading
200 ioctl(FileDescriptor, PERF_EVENT_IOC_DISABLE, 0);
202 // Find the boundary of the function so that we could filter the LBRs
203 // to keep only the relevant records.
204 if (FunctionBytes.empty())
205 return llvm::make_error<llvm::StringError>("Empty function bytes",
206 llvm::errc::invalid_argument);
207 const void *From = reinterpret_cast<const void *>(FunctionBytes.data());
208 const void *To = reinterpret_cast<const void *>(FunctionBytes.data() +
209 FunctionBytes.size());
210 return doReadCounter(From, To);
213 llvm::Expected<llvm::SmallVector<int64_t, 4>>
214 X86LbrCounter::doReadCounter(const void *From, const void *To) const {
215 // The max number of time-outs/retries before we give up.
216 static constexpr int kMaxTimeouts = 160;
218 // Parses the LBR buffer and fills CycleArray with the sequence of cycle
219 // counts from the buffer.
220 llvm::SmallVector<int64_t, 4> CycleArray;
221 auto DataBuf = std::make_unique<char[]>(kDataBufferSize);
222 int NumTimeouts = 0;
223 int PollResult = 0;
225 while (PollResult <= 0) {
226 PollResult = pollLbrPerfEvent(FileDescriptor);
227 if (PollResult > 0)
228 break;
229 if (PollResult == -1)
230 return llvm::make_error<llvm::StringError>("Cannot poll LBR perf event.",
231 llvm::errc::io_error);
232 if (NumTimeouts++ >= kMaxTimeouts)
233 return llvm::make_error<llvm::StringError>(
234 "LBR polling still timed out after max number of attempts.",
235 llvm::errc::device_or_resource_busy);
238 struct perf_event_mmap_page Page;
239 memcpy(&Page, MMappedBuffer, sizeof(struct perf_event_mmap_page));
241 const uint64_t DataTail = Page.data_tail;
242 const uint64_t DataHead = Page.data_head;
243 // We're supposed to use a barrier after reading data_head.
244 std::atomic_thread_fence(std::memory_order_acq_rel);
245 const size_t DataSize = DataHead - DataTail;
246 if (DataSize > kDataBufferSize)
247 return llvm::make_error<llvm::StringError>(
248 "DataSize larger than buffer size.", llvm::errc::invalid_argument);
250 copyDataBuffer(MMappedBuffer, DataBuf.get(), DataTail, DataSize);
251 llvm::Error error =
252 parseDataBuffer(DataBuf.get(), DataSize, From, To, &CycleArray);
253 if (!error)
254 return CycleArray;
255 return std::move(error);
258 } // namespace exegesis
259 } // namespace llvm
261 #endif // defined(__linux__) && defined(HAVE_LIBPFM) &&
262 // defined(LIBPFM_HAS_FIELD_CYCLES)