Allow SymbolUserOpInterface operators to be used in RemoveDeadValues Pass (#117405)
[llvm-project.git] / llvm / tools / llvm-exegesis / lib / BenchmarkRunner.cpp
blob9116b5ced02748559cef8d476d19d0a76803063f
1 //===-- BenchmarkRunner.cpp -------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "BenchmarkRunner.h"
10 #include "Assembler.h"
11 #include "Error.h"
12 #include "MCInstrDescView.h"
13 #include "MmapUtils.h"
14 #include "PerfHelper.h"
15 #include "SubprocessMemory.h"
16 #include "Target.h"
17 #include "llvm/ADT/ScopeExit.h"
18 #include "llvm/ADT/StringExtras.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
22 #include "llvm/Support/CrashRecoveryContext.h"
23 #include "llvm/Support/Error.h"
24 #include "llvm/Support/FileSystem.h"
25 #include "llvm/Support/MemoryBuffer.h"
26 #include "llvm/Support/Program.h"
27 #include "llvm/Support/Signals.h"
28 #include "llvm/Support/SystemZ/zOSSupport.h"
29 #include <cmath>
30 #include <memory>
31 #include <string>
33 #ifdef __linux__
34 #ifdef HAVE_LIBPFM
35 #include <perfmon/perf_event.h>
36 #endif
37 #include <sys/mman.h>
38 #include <sys/ptrace.h>
39 #include <sys/resource.h>
40 #include <sys/socket.h>
41 #include <sys/syscall.h>
42 #include <sys/wait.h>
43 #include <unistd.h>
45 #if defined(__GLIBC__) && __has_include(<sys/rseq.h>) && defined(HAVE_BUILTIN_THREAD_POINTER)
46 #include <sys/rseq.h>
47 #if defined(RSEQ_SIG) && defined(SYS_rseq)
48 #define GLIBC_INITS_RSEQ
49 #endif
50 #endif
51 #endif // __linux__
53 namespace llvm {
54 namespace exegesis {
56 BenchmarkRunner::BenchmarkRunner(const LLVMState &State, Benchmark::ModeE Mode,
57 BenchmarkPhaseSelectorE BenchmarkPhaseSelector,
58 ExecutionModeE ExecutionMode,
59 ArrayRef<ValidationEvent> ValCounters)
60 : State(State), Mode(Mode), BenchmarkPhaseSelector(BenchmarkPhaseSelector),
61 ExecutionMode(ExecutionMode), ValidationCounters(ValCounters),
62 Scratch(std::make_unique<ScratchSpace>()) {}
64 BenchmarkRunner::~BenchmarkRunner() = default;
66 void BenchmarkRunner::FunctionExecutor::accumulateCounterValues(
67 const SmallVectorImpl<int64_t> &NewValues,
68 SmallVectorImpl<int64_t> *Result) {
69 const size_t NumValues = std::max(NewValues.size(), Result->size());
70 if (NumValues > Result->size())
71 Result->resize(NumValues, 0);
72 for (size_t I = 0, End = NewValues.size(); I < End; ++I)
73 (*Result)[I] += NewValues[I];
76 Expected<SmallVector<int64_t, 4>>
77 BenchmarkRunner::FunctionExecutor::runAndSample(
78 const char *Counters, ArrayRef<const char *> ValidationCounters,
79 SmallVectorImpl<int64_t> &ValidationCounterValues) const {
80 // We sum counts when there are several counters for a single ProcRes
81 // (e.g. P23 on SandyBridge).
82 SmallVector<int64_t, 4> CounterValues;
83 SmallVector<StringRef, 2> CounterNames;
84 StringRef(Counters).split(CounterNames, '+');
85 for (auto &CounterName : CounterNames) {
86 CounterName = CounterName.trim();
87 Expected<SmallVector<int64_t, 4>> ValueOrError = runWithCounter(
88 CounterName, ValidationCounters, ValidationCounterValues);
89 if (!ValueOrError)
90 return ValueOrError.takeError();
91 accumulateCounterValues(ValueOrError.get(), &CounterValues);
93 return CounterValues;
96 namespace {
97 class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor {
98 public:
99 static Expected<std::unique_ptr<InProcessFunctionExecutorImpl>>
100 create(const LLVMState &State, object::OwningBinary<object::ObjectFile> Obj,
101 BenchmarkRunner::ScratchSpace *Scratch,
102 std::optional<int> BenchmarkProcessCPU) {
103 Expected<ExecutableFunction> EF =
104 ExecutableFunction::create(State.createTargetMachine(), std::move(Obj));
106 if (!EF)
107 return EF.takeError();
109 return std::unique_ptr<InProcessFunctionExecutorImpl>(
110 new InProcessFunctionExecutorImpl(State, std::move(*EF), Scratch));
113 private:
114 InProcessFunctionExecutorImpl(const LLVMState &State,
115 ExecutableFunction Function,
116 BenchmarkRunner::ScratchSpace *Scratch)
117 : State(State), Function(std::move(Function)), Scratch(Scratch) {}
119 static void accumulateCounterValues(const SmallVector<int64_t, 4> &NewValues,
120 SmallVector<int64_t, 4> *Result) {
121 const size_t NumValues = std::max(NewValues.size(), Result->size());
122 if (NumValues > Result->size())
123 Result->resize(NumValues, 0);
124 for (size_t I = 0, End = NewValues.size(); I < End; ++I)
125 (*Result)[I] += NewValues[I];
128 Expected<SmallVector<int64_t, 4>> runWithCounter(
129 StringRef CounterName, ArrayRef<const char *> ValidationCounters,
130 SmallVectorImpl<int64_t> &ValidationCounterValues) const override {
131 const ExegesisTarget &ET = State.getExegesisTarget();
132 char *const ScratchPtr = Scratch->ptr();
133 auto CounterOrError =
134 ET.createCounter(CounterName, State, ValidationCounters);
136 if (!CounterOrError)
137 return CounterOrError.takeError();
139 pfm::CounterGroup *Counter = CounterOrError.get().get();
140 Scratch->clear();
142 auto PS = ET.withSavedState();
143 CrashRecoveryContext CRC;
144 CrashRecoveryContext::Enable();
145 const bool Crashed = !CRC.RunSafely([this, Counter, ScratchPtr]() {
146 Counter->start();
147 this->Function(ScratchPtr);
148 Counter->stop();
150 CrashRecoveryContext::Disable();
151 PS.reset();
152 if (Crashed) {
153 #ifdef LLVM_ON_UNIX
154 // See "Exit Status for Commands":
155 // https://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xcu_chap02.html
156 constexpr const int kSigOffset = 128;
157 return make_error<SnippetSignal>(CRC.RetCode - kSigOffset);
158 #else
159 // The exit code of the process on windows is not meaningful as a
160 // signal, so simply pass in -1 as the signal into the error.
161 return make_error<SnippetSignal>(-1);
162 #endif // LLVM_ON_UNIX
166 auto ValidationValuesOrErr = Counter->readValidationCountersOrError();
167 if (!ValidationValuesOrErr)
168 return ValidationValuesOrErr.takeError();
170 ArrayRef RealValidationValues = *ValidationValuesOrErr;
171 for (size_t I = 0; I < RealValidationValues.size(); ++I)
172 ValidationCounterValues[I] = RealValidationValues[I];
174 return Counter->readOrError(Function.getFunctionBytes());
177 const LLVMState &State;
178 const ExecutableFunction Function;
179 BenchmarkRunner::ScratchSpace *const Scratch;
182 #ifdef __linux__
183 // The following class implements a function executor that executes the
184 // benchmark code within a subprocess rather than within the main llvm-exegesis
185 // process. This allows for much more control over the execution context of the
186 // snippet, particularly with regard to memory. This class performs all the
187 // necessary functions to create the subprocess, execute the snippet in the
188 // subprocess, and report results/handle errors.
189 class SubProcessFunctionExecutorImpl
190 : public BenchmarkRunner::FunctionExecutor {
191 public:
192 static Expected<std::unique_ptr<SubProcessFunctionExecutorImpl>>
193 create(const LLVMState &State, object::OwningBinary<object::ObjectFile> Obj,
194 const BenchmarkKey &Key, std::optional<int> BenchmarkProcessCPU) {
195 Expected<ExecutableFunction> EF =
196 ExecutableFunction::create(State.createTargetMachine(), std::move(Obj));
197 if (!EF)
198 return EF.takeError();
200 return std::unique_ptr<SubProcessFunctionExecutorImpl>(
201 new SubProcessFunctionExecutorImpl(State, std::move(*EF), Key,
202 BenchmarkProcessCPU));
205 private:
206 SubProcessFunctionExecutorImpl(const LLVMState &State,
207 ExecutableFunction Function,
208 const BenchmarkKey &Key,
209 std::optional<int> BenchmarkCPU)
210 : State(State), Function(std::move(Function)), Key(Key),
211 BenchmarkProcessCPU(BenchmarkCPU) {}
213 enum ChildProcessExitCodeE {
214 CounterFDReadFailed = 1,
215 RSeqDisableFailed,
216 FunctionDataMappingFailed,
217 AuxiliaryMemorySetupFailed,
218 SetCPUAffinityFailed
221 StringRef childProcessExitCodeToString(int ExitCode) const {
222 switch (ExitCode) {
223 case ChildProcessExitCodeE::CounterFDReadFailed:
224 return "Counter file descriptor read failed";
225 case ChildProcessExitCodeE::RSeqDisableFailed:
226 return "Disabling restartable sequences failed";
227 case ChildProcessExitCodeE::FunctionDataMappingFailed:
228 return "Failed to map memory for assembled snippet";
229 case ChildProcessExitCodeE::AuxiliaryMemorySetupFailed:
230 return "Failed to setup auxiliary memory";
231 case ChildProcessExitCodeE::SetCPUAffinityFailed:
232 return "Failed to set CPU affinity of the benchmarking process";
233 default:
234 return "Child process returned with unknown exit code";
238 Error sendFileDescriptorThroughSocket(int SocketFD, int FD) const {
239 struct msghdr Message = {};
240 char Buffer[CMSG_SPACE(sizeof(FD))];
241 memset(Buffer, 0, sizeof(Buffer));
242 Message.msg_control = Buffer;
243 Message.msg_controllen = sizeof(Buffer);
245 struct cmsghdr *ControlMessage = CMSG_FIRSTHDR(&Message);
246 ControlMessage->cmsg_level = SOL_SOCKET;
247 ControlMessage->cmsg_type = SCM_RIGHTS;
248 ControlMessage->cmsg_len = CMSG_LEN(sizeof(FD));
250 memcpy(CMSG_DATA(ControlMessage), &FD, sizeof(FD));
252 Message.msg_controllen = CMSG_SPACE(sizeof(FD));
254 ssize_t BytesWritten = sendmsg(SocketFD, &Message, 0);
256 if (BytesWritten < 0)
257 return make_error<Failure>("Failed to write FD to socket: " +
258 Twine(strerror(errno)));
260 return Error::success();
263 Expected<int> getFileDescriptorFromSocket(int SocketFD) const {
264 struct msghdr Message = {};
266 char ControlBuffer[256];
267 Message.msg_control = ControlBuffer;
268 Message.msg_controllen = sizeof(ControlBuffer);
270 ssize_t BytesRead = recvmsg(SocketFD, &Message, 0);
272 if (BytesRead < 0)
273 return make_error<Failure>("Failed to read FD from socket: " +
274 Twine(strerror(errno)));
276 struct cmsghdr *ControlMessage = CMSG_FIRSTHDR(&Message);
278 int FD;
280 if (ControlMessage->cmsg_len != CMSG_LEN(sizeof(FD)))
281 return make_error<Failure>("Failed to get correct number of bytes for "
282 "file descriptor from socket.");
284 memcpy(&FD, CMSG_DATA(ControlMessage), sizeof(FD));
286 return FD;
289 Error
290 runParentProcess(pid_t ChildPID, int WriteFD, StringRef CounterName,
291 SmallVectorImpl<int64_t> &CounterValues,
292 ArrayRef<const char *> ValidationCounters,
293 SmallVectorImpl<int64_t> &ValidationCounterValues) const {
294 auto WriteFDClose = make_scope_exit([WriteFD]() { close(WriteFD); });
295 const ExegesisTarget &ET = State.getExegesisTarget();
296 auto CounterOrError =
297 ET.createCounter(CounterName, State, ValidationCounters, ChildPID);
299 if (!CounterOrError)
300 return CounterOrError.takeError();
302 pfm::CounterGroup *Counter = CounterOrError.get().get();
304 // Make sure to attach to the process (and wait for the sigstop to be
305 // delivered and for the process to continue) before we write to the counter
306 // file descriptor. Attaching to the process before writing to the socket
307 // ensures that the subprocess at most has blocked on the read call. If we
308 // attach afterwards, the subprocess might exit before we get to the attach
309 // call due to effects like scheduler contention, introducing transient
310 // failures.
311 if (ptrace(PTRACE_ATTACH, ChildPID, NULL, NULL) != 0)
312 return make_error<Failure>("Failed to attach to the child process: " +
313 Twine(strerror(errno)));
315 if (waitpid(ChildPID, NULL, 0) == -1) {
316 return make_error<Failure>(
317 "Failed to wait for child process to stop after attaching: " +
318 Twine(strerror(errno)));
321 if (ptrace(PTRACE_CONT, ChildPID, NULL, NULL) != 0)
322 return make_error<Failure>(
323 "Failed to continue execution of the child process: " +
324 Twine(strerror(errno)));
326 int CounterFileDescriptor = Counter->getFileDescriptor();
327 Error SendError =
328 sendFileDescriptorThroughSocket(WriteFD, CounterFileDescriptor);
330 if (SendError)
331 return SendError;
333 int ChildStatus;
334 if (waitpid(ChildPID, &ChildStatus, 0) == -1) {
335 return make_error<Failure>(
336 "Waiting for the child process to complete failed: " +
337 Twine(strerror(errno)));
340 if (WIFEXITED(ChildStatus)) {
341 int ChildExitCode = WEXITSTATUS(ChildStatus);
342 if (ChildExitCode == 0) {
343 // The child exited succesfully, read counter values and return
344 // success.
345 auto CounterValueOrErr = Counter->readOrError();
346 if (!CounterValueOrErr)
347 return CounterValueOrErr.takeError();
348 CounterValues = std::move(*CounterValueOrErr);
350 auto ValidationValuesOrErr = Counter->readValidationCountersOrError();
351 if (!ValidationValuesOrErr)
352 return ValidationValuesOrErr.takeError();
354 ArrayRef RealValidationValues = *ValidationValuesOrErr;
355 for (size_t I = 0; I < RealValidationValues.size(); ++I)
356 ValidationCounterValues[I] = RealValidationValues[I];
358 return Error::success();
360 // The child exited, but not successfully.
361 return make_error<Failure>(
362 "Child benchmarking process exited with non-zero exit code: " +
363 childProcessExitCodeToString(ChildExitCode));
366 // An error was encountered running the snippet, process it
367 siginfo_t ChildSignalInfo;
368 if (ptrace(PTRACE_GETSIGINFO, ChildPID, NULL, &ChildSignalInfo) == -1) {
369 return make_error<Failure>("Getting signal info from the child failed: " +
370 Twine(strerror(errno)));
373 // Send SIGKILL rather than SIGTERM as the child process has no SIGTERM
374 // handlers to run, and calling SIGTERM would mean that ptrace will force
375 // it to block in the signal-delivery-stop for the SIGSEGV/other signals,
376 // and upon exit.
377 if (kill(ChildPID, SIGKILL) == -1)
378 return make_error<Failure>("Failed to kill child benchmarking proces: " +
379 Twine(strerror(errno)));
381 // Wait for the process to exit so that there are no zombie processes left
382 // around.
383 if (waitpid(ChildPID, NULL, 0) == -1)
384 return make_error<Failure>("Failed to wait for process to die: " +
385 Twine(strerror(errno)));
387 if (ChildSignalInfo.si_signo == SIGSEGV)
388 return make_error<SnippetSegmentationFault>(
389 reinterpret_cast<uintptr_t>(ChildSignalInfo.si_addr));
391 return make_error<SnippetSignal>(ChildSignalInfo.si_signo);
394 static void setCPUAffinityIfRequested(int CPUToUse) {
395 // Special case this function for x86_64 for now as certain more esoteric
396 // platforms have different definitions for some of the libc functions that
397 // cause buildtime failures. Additionally, the subprocess executor mode (the
398 // sole mode where this is supported) currently only supports x86_64.
400 // Also check that we have the SYS_getcpu macro defined, meaning the syscall
401 // actually exists within the build environment. We manually use the syscall
402 // rather than the libc wrapper given the wrapper for getcpu is only available
403 // in glibc 2.29 and later.
404 #if defined(__x86_64__) && defined(SYS_getcpu)
405 // Set the CPU affinity for the child process, so that we ensure that if
406 // the user specified a CPU the process should run on, the benchmarking
407 // process is running on that CPU.
408 cpu_set_t CPUMask;
409 CPU_ZERO(&CPUMask);
410 CPU_SET(CPUToUse, &CPUMask);
411 // TODO(boomanaiden154): Rewrite this to use LLVM primitives once they
412 // are available.
413 int SetAffinityReturn = sched_setaffinity(0, sizeof(CPUMask), &CPUMask);
414 if (SetAffinityReturn == -1) {
415 exit(ChildProcessExitCodeE::SetCPUAffinityFailed);
418 // Check (if assertions are enabled) that we are actually running on the
419 // CPU that was specified by the user.
420 [[maybe_unused]] unsigned int CurrentCPU;
421 assert(syscall(SYS_getcpu, &CurrentCPU, nullptr) == 0 &&
422 "Expected getcpu call to succeed.");
423 assert(static_cast<int>(CurrentCPU) == CPUToUse &&
424 "Expected current CPU to equal the CPU requested by the user");
425 #endif // defined(__x86_64__) && defined(SYS_getcpu)
426 exit(ChildProcessExitCodeE::SetCPUAffinityFailed);
429 Error createSubProcessAndRunBenchmark(
430 StringRef CounterName, SmallVectorImpl<int64_t> &CounterValues,
431 ArrayRef<const char *> ValidationCounters,
432 SmallVectorImpl<int64_t> &ValidationCounterValues) const {
433 int PipeFiles[2];
434 int PipeSuccessOrErr = socketpair(AF_UNIX, SOCK_DGRAM, 0, PipeFiles);
435 if (PipeSuccessOrErr != 0) {
436 return make_error<Failure>(
437 "Failed to create a pipe for interprocess communication between "
438 "llvm-exegesis and the benchmarking subprocess: " +
439 Twine(strerror(errno)));
442 SubprocessMemory SPMemory;
443 Error MemoryInitError = SPMemory.initializeSubprocessMemory(getpid());
444 if (MemoryInitError)
445 return MemoryInitError;
447 Error AddMemDefError =
448 SPMemory.addMemoryDefinition(Key.MemoryValues, getpid());
449 if (AddMemDefError)
450 return AddMemDefError;
452 long ParentTID = SubprocessMemory::getCurrentTID();
453 pid_t ParentOrChildPID = fork();
455 if (ParentOrChildPID == -1) {
456 return make_error<Failure>("Failed to create child process: " +
457 Twine(strerror(errno)));
460 if (ParentOrChildPID == 0) {
461 if (BenchmarkProcessCPU.has_value()) {
462 setCPUAffinityIfRequested(*BenchmarkProcessCPU);
465 // We are in the child process, close the write end of the pipe.
466 close(PipeFiles[1]);
467 // Unregister handlers, signal handling is now handled through ptrace in
468 // the host process.
469 sys::unregisterHandlers();
470 runChildSubprocess(PipeFiles[0], Key, ParentTID);
471 // The child process terminates in the above function, so we should never
472 // get to this point.
473 llvm_unreachable("Child process didn't exit when expected.");
476 // Close the read end of the pipe as we only need to write to the subprocess
477 // from the parent process.
478 close(PipeFiles[0]);
479 return runParentProcess(ParentOrChildPID, PipeFiles[1], CounterName,
480 CounterValues, ValidationCounters,
481 ValidationCounterValues);
484 void disableCoreDumps() const {
485 struct rlimit rlim;
487 rlim.rlim_cur = 0;
488 setrlimit(RLIMIT_CORE, &rlim);
491 [[noreturn]] void runChildSubprocess(int Pipe, const BenchmarkKey &Key,
492 long ParentTID) const {
493 // Disable core dumps in the child process as otherwise everytime we
494 // encounter an execution failure like a segmentation fault, we will create
495 // a core dump. We report the information directly rather than require the
496 // user inspect a core dump.
497 disableCoreDumps();
499 // The following occurs within the benchmarking subprocess.
500 pid_t ParentPID = getppid();
502 Expected<int> CounterFileDescriptorOrError =
503 getFileDescriptorFromSocket(Pipe);
505 if (!CounterFileDescriptorOrError)
506 exit(ChildProcessExitCodeE::CounterFDReadFailed);
508 int CounterFileDescriptor = *CounterFileDescriptorOrError;
510 // Glibc versions greater than 2.35 automatically call rseq during
511 // initialization. Unmapping the region that glibc sets up for this causes
512 // segfaults in the program. Unregister the rseq region so that we can safely
513 // unmap it later
514 #ifdef GLIBC_INITS_RSEQ
515 unsigned int RseqStructSize = __rseq_size;
517 // Glibc v2.40 (the change is also expected to be backported to v2.35)
518 // changes the definition of __rseq_size to be the usable area of the struct
519 // rather than the actual size of the struct. v2.35 uses only 20 bytes of
520 // the 32 byte struct. For now, it should be safe to assume that if the
521 // usable size is less than 32, the actual size of the struct will be 32
522 // bytes given alignment requirements.
523 if (__rseq_size < 32)
524 RseqStructSize = 32;
526 long RseqDisableOutput = syscall(
527 SYS_rseq,
528 reinterpret_cast<uintptr_t>(__builtin_thread_pointer()) + __rseq_offset,
529 RseqStructSize, RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
530 if (RseqDisableOutput != 0)
531 exit(ChildProcessExitCodeE::RSeqDisableFailed);
532 #endif // GLIBC_INITS_RSEQ
534 // The frontend that generates the memory annotation structures should
535 // validate that the address to map the snippet in at is a multiple of
536 // the page size. Assert that this is true here.
537 assert(Key.SnippetAddress % getpagesize() == 0 &&
538 "The snippet address needs to be aligned to a page boundary.");
540 size_t FunctionDataCopySize = this->Function.FunctionBytes.size();
541 void *MapAddress = NULL;
542 int MapFlags = MAP_PRIVATE | MAP_ANONYMOUS;
544 if (Key.SnippetAddress != 0) {
545 MapAddress = reinterpret_cast<void *>(Key.SnippetAddress);
546 MapFlags |= MAP_FIXED_NOREPLACE;
549 char *FunctionDataCopy =
550 (char *)mmap(MapAddress, FunctionDataCopySize, PROT_READ | PROT_WRITE,
551 MapFlags, 0, 0);
552 if (reinterpret_cast<intptr_t>(FunctionDataCopy) == -1)
553 exit(ChildProcessExitCodeE::FunctionDataMappingFailed);
555 memcpy(FunctionDataCopy, this->Function.FunctionBytes.data(),
556 this->Function.FunctionBytes.size());
557 mprotect(FunctionDataCopy, FunctionDataCopySize, PROT_READ | PROT_EXEC);
559 Expected<int> AuxMemFDOrError =
560 SubprocessMemory::setupAuxiliaryMemoryInSubprocess(
561 Key.MemoryValues, ParentPID, ParentTID, CounterFileDescriptor);
562 if (!AuxMemFDOrError)
563 exit(ChildProcessExitCodeE::AuxiliaryMemorySetupFailed);
565 ((void (*)(size_t, int))(uintptr_t)FunctionDataCopy)(FunctionDataCopySize,
566 *AuxMemFDOrError);
568 exit(0);
571 Expected<SmallVector<int64_t, 4>> runWithCounter(
572 StringRef CounterName, ArrayRef<const char *> ValidationCounters,
573 SmallVectorImpl<int64_t> &ValidationCounterValues) const override {
574 SmallVector<int64_t, 4> Value(1, 0);
575 Error PossibleBenchmarkError = createSubProcessAndRunBenchmark(
576 CounterName, Value, ValidationCounters, ValidationCounterValues);
578 if (PossibleBenchmarkError)
579 return std::move(PossibleBenchmarkError);
581 return Value;
584 const LLVMState &State;
585 const ExecutableFunction Function;
586 const BenchmarkKey &Key;
587 const std::optional<int> BenchmarkProcessCPU;
589 #endif // __linux__
590 } // namespace
592 Expected<SmallString<0>> BenchmarkRunner::assembleSnippet(
593 const BenchmarkCode &BC, const SnippetRepetitor &Repetitor,
594 unsigned MinInstructions, unsigned LoopBodySize,
595 bool GenerateMemoryInstructions) const {
596 const std::vector<MCInst> &Instructions = BC.Key.Instructions;
597 SmallString<0> Buffer;
598 raw_svector_ostream OS(Buffer);
599 if (Error E = assembleToStream(
600 State.getExegesisTarget(), State.createTargetMachine(), BC.LiveIns,
601 Repetitor.Repeat(Instructions, MinInstructions, LoopBodySize,
602 GenerateMemoryInstructions),
603 OS, BC.Key, GenerateMemoryInstructions)) {
604 return std::move(E);
606 return Buffer;
609 Expected<BenchmarkRunner::RunnableConfiguration>
610 BenchmarkRunner::getRunnableConfiguration(
611 const BenchmarkCode &BC, unsigned MinInstructions, unsigned LoopBodySize,
612 const SnippetRepetitor &Repetitor) const {
613 RunnableConfiguration RC;
615 Benchmark &BenchmarkResult = RC.BenchmarkResult;
616 BenchmarkResult.Mode = Mode;
617 BenchmarkResult.CpuName =
618 std::string(State.getTargetMachine().getTargetCPU());
619 BenchmarkResult.LLVMTriple =
620 State.getTargetMachine().getTargetTriple().normalize();
621 BenchmarkResult.MinInstructions = MinInstructions;
622 BenchmarkResult.Info = BC.Info;
624 const std::vector<MCInst> &Instructions = BC.Key.Instructions;
626 bool GenerateMemoryInstructions = ExecutionMode == ExecutionModeE::SubProcess;
628 BenchmarkResult.Key = BC.Key;
630 // Assemble at least kMinInstructionsForSnippet instructions by repeating
631 // the snippet for debug/analysis. This is so that the user clearly
632 // understands that the inside instructions are repeated.
633 if (BenchmarkPhaseSelector > BenchmarkPhaseSelectorE::PrepareSnippet) {
634 const int MinInstructionsForSnippet = 4 * Instructions.size();
635 const int LoopBodySizeForSnippet = 2 * Instructions.size();
636 auto Snippet =
637 assembleSnippet(BC, Repetitor, MinInstructionsForSnippet,
638 LoopBodySizeForSnippet, GenerateMemoryInstructions);
639 if (Error E = Snippet.takeError())
640 return std::move(E);
642 if (auto Err = getBenchmarkFunctionBytes(*Snippet,
643 BenchmarkResult.AssembledSnippet))
644 return std::move(Err);
647 // Assemble enough repetitions of the snippet so we have at least
648 // MinInstructions instructions.
649 if (BenchmarkPhaseSelector >
650 BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) {
651 auto Snippet =
652 assembleSnippet(BC, Repetitor, BenchmarkResult.MinInstructions,
653 LoopBodySize, GenerateMemoryInstructions);
654 if (Error E = Snippet.takeError())
655 return std::move(E);
656 RC.ObjectFile = getObjectFromBuffer(*Snippet);
659 return std::move(RC);
662 Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>>
663 BenchmarkRunner::createFunctionExecutor(
664 object::OwningBinary<object::ObjectFile> ObjectFile,
665 const BenchmarkKey &Key, std::optional<int> BenchmarkProcessCPU) const {
666 switch (ExecutionMode) {
667 case ExecutionModeE::InProcess: {
668 if (BenchmarkProcessCPU.has_value())
669 return make_error<Failure>("The inprocess execution mode does not "
670 "support benchmark core pinning.");
672 auto InProcessExecutorOrErr = InProcessFunctionExecutorImpl::create(
673 State, std::move(ObjectFile), Scratch.get(), BenchmarkProcessCPU);
674 if (!InProcessExecutorOrErr)
675 return InProcessExecutorOrErr.takeError();
677 return std::move(*InProcessExecutorOrErr);
679 case ExecutionModeE::SubProcess: {
680 #ifdef __linux__
681 auto SubProcessExecutorOrErr = SubProcessFunctionExecutorImpl::create(
682 State, std::move(ObjectFile), Key, BenchmarkProcessCPU);
683 if (!SubProcessExecutorOrErr)
684 return SubProcessExecutorOrErr.takeError();
686 return std::move(*SubProcessExecutorOrErr);
687 #else
688 return make_error<Failure>(
689 "The subprocess execution mode is only supported on Linux");
690 #endif
693 llvm_unreachable("ExecutionMode is outside expected range");
696 std::pair<Error, Benchmark> BenchmarkRunner::runConfiguration(
697 RunnableConfiguration &&RC, const std::optional<StringRef> &DumpFile,
698 std::optional<int> BenchmarkProcessCPU) const {
699 Benchmark &BenchmarkResult = RC.BenchmarkResult;
700 object::OwningBinary<object::ObjectFile> &ObjectFile = RC.ObjectFile;
702 if (DumpFile && BenchmarkPhaseSelector >
703 BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) {
704 auto ObjectFilePath =
705 writeObjectFile(ObjectFile.getBinary()->getData(), *DumpFile);
706 if (Error E = ObjectFilePath.takeError()) {
707 return {std::move(E), std::move(BenchmarkResult)};
709 outs() << "Check generated assembly with: /usr/bin/objdump -d "
710 << *ObjectFilePath << "\n";
713 if (BenchmarkPhaseSelector < BenchmarkPhaseSelectorE::Measure) {
714 BenchmarkResult.Error = "actual measurements skipped.";
715 return {Error::success(), std::move(BenchmarkResult)};
718 Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>> Executor =
719 createFunctionExecutor(std::move(ObjectFile), RC.BenchmarkResult.Key,
720 BenchmarkProcessCPU);
721 if (!Executor)
722 return {Executor.takeError(), std::move(BenchmarkResult)};
723 auto NewMeasurements = runMeasurements(**Executor);
725 if (Error E = NewMeasurements.takeError()) {
726 return {std::move(E), std::move(BenchmarkResult)};
728 assert(BenchmarkResult.MinInstructions > 0 && "invalid MinInstructions");
729 for (BenchmarkMeasure &BM : *NewMeasurements) {
730 // Scale the measurements by the number of instructions.
731 BM.PerInstructionValue /= BenchmarkResult.MinInstructions;
732 // Scale the measurements by the number of times the entire snippet is
733 // repeated.
734 BM.PerSnippetValue /=
735 std::ceil(BenchmarkResult.MinInstructions /
736 static_cast<double>(BenchmarkResult.Key.Instructions.size()));
738 BenchmarkResult.Measurements = std::move(*NewMeasurements);
740 return {Error::success(), std::move(BenchmarkResult)};
743 Expected<std::string>
744 BenchmarkRunner::writeObjectFile(StringRef Buffer, StringRef FileName) const {
745 int ResultFD = 0;
746 SmallString<256> ResultPath = FileName;
747 if (Error E = errorCodeToError(
748 FileName.empty() ? sys::fs::createTemporaryFile("snippet", "o",
749 ResultFD, ResultPath)
750 : sys::fs::openFileForReadWrite(
751 FileName, ResultFD, sys::fs::CD_CreateAlways,
752 sys::fs::OF_None)))
753 return std::move(E);
754 raw_fd_ostream OFS(ResultFD, true /*ShouldClose*/);
755 OFS.write(Buffer.data(), Buffer.size());
756 OFS.flush();
757 return std::string(ResultPath);
760 static bool EventLessThan(const std::pair<ValidationEvent, const char *> LHS,
761 const ValidationEvent RHS) {
762 return static_cast<int>(LHS.first) < static_cast<int>(RHS);
765 Error BenchmarkRunner::getValidationCountersToRun(
766 SmallVector<const char *> &ValCountersToRun) const {
767 const PfmCountersInfo &PCI = State.getPfmCounters();
768 ValCountersToRun.reserve(ValidationCounters.size());
770 ValCountersToRun.reserve(ValidationCounters.size());
771 ArrayRef TargetValidationEvents(PCI.ValidationEvents,
772 PCI.NumValidationEvents);
773 for (const ValidationEvent RequestedValEvent : ValidationCounters) {
774 auto ValCounterIt =
775 lower_bound(TargetValidationEvents, RequestedValEvent, EventLessThan);
776 if (ValCounterIt == TargetValidationEvents.end() ||
777 ValCounterIt->first != RequestedValEvent)
778 return make_error<Failure>("Cannot create validation counter");
780 assert(ValCounterIt->first == RequestedValEvent &&
781 "The array of validation events from the target should be sorted");
782 ValCountersToRun.push_back(ValCounterIt->second);
785 return Error::success();
788 BenchmarkRunner::FunctionExecutor::~FunctionExecutor() {}
790 } // namespace exegesis
791 } // namespace llvm