llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp

   1 //===-- BenchmarkRunner.cpp -------------------------------------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #include "BenchmarkRunner.h"
  10 #include "Assembler.h"
  11 #include "Error.h"
  12 #include "MCInstrDescView.h"
  13 #include "MmapUtils.h"
  14 #include "PerfHelper.h"
  15 #include "SubprocessMemory.h"
  16 #include "Target.h"
  17 #include "llvm/ADT/ScopeExit.h"
  18 #include "llvm/ADT/StringExtras.h"
  19 #include "llvm/ADT/StringRef.h"
  20 #include "llvm/ADT/Twine.h"
  21 #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
  22 #include "llvm/Support/CrashRecoveryContext.h"
  23 #include "llvm/Support/Error.h"
  24 #include "llvm/Support/FileSystem.h"
  25 #include "llvm/Support/MemoryBuffer.h"
  26 #include "llvm/Support/Program.h"
  27 #include "llvm/Support/Signals.h"
  28 #include "llvm/Support/SystemZ/zOSSupport.h"
  29 #include <cmath>
  30 #include <memory>
  31 #include <string>
  32
  33 #ifdef __linux__
  34 #ifdef HAVE_LIBPFM
  35 #include <perfmon/perf_event.h>
  36 #endif
  37 #include <sys/mman.h>
  38 #include <sys/ptrace.h>
  39 #include <sys/resource.h>
  40 #include <sys/socket.h>
  41 #include <sys/syscall.h>
  42 #include <sys/wait.h>
  43 #include <unistd.h>
  44
  45 #if defined(__GLIBC__) && __has_include(<sys/rseq.h>) && defined(HAVE_BUILTIN_THREAD_POINTER)
  46 #include <sys/rseq.h>
  47 #if defined(RSEQ_SIG) && defined(SYS_rseq)
  48 #define GLIBC_INITS_RSEQ
  49 #endif
  50 #endif
  51 #endif // __linux__
  52
  53 namespace llvm {
  54 namespace exegesis {
  55
  56 BenchmarkRunner::BenchmarkRunner(const LLVMState &State, Benchmark::ModeE Mode,
  57                                  BenchmarkPhaseSelectorE BenchmarkPhaseSelector,
  58                                  ExecutionModeE ExecutionMode,
  59                                  ArrayRef<ValidationEvent> ValCounters)
  60     : State(State), Mode(Mode), BenchmarkPhaseSelector(BenchmarkPhaseSelector),
  61       ExecutionMode(ExecutionMode), ValidationCounters(ValCounters),
  62       Scratch(std::make_unique<ScratchSpace>()) {}
  63
  64 BenchmarkRunner::~BenchmarkRunner() = default;
  65
  66 void BenchmarkRunner::FunctionExecutor::accumulateCounterValues(
  67     const SmallVectorImpl<int64_t> &NewValues,
  68     SmallVectorImpl<int64_t> *Result) {
  69   const size_t NumValues = std::max(NewValues.size(), Result->size());
  70   if (NumValues > Result->size())
  71     Result->resize(NumValues, 0);
  72   for (size_t I = 0, End = NewValues.size(); I < End; ++I)
  73     (*Result)[I] += NewValues[I];
  74 }
  75
  76 Expected<SmallVector<int64_t, 4>>
  77 BenchmarkRunner::FunctionExecutor::runAndSample(
  78     const char *Counters, ArrayRef<const char *> ValidationCounters,
  79     SmallVectorImpl<int64_t> &ValidationCounterValues) const {
  80   // We sum counts when there are several counters for a single ProcRes
  81   // (e.g. P23 on SandyBridge).
  82   SmallVector<int64_t, 4> CounterValues;
  83   SmallVector<StringRef, 2> CounterNames;
  84   StringRef(Counters).split(CounterNames, '+');
  85   for (auto &CounterName : CounterNames) {
  86     CounterName = CounterName.trim();
  87     Expected<SmallVector<int64_t, 4>> ValueOrError = runWithCounter(
  88         CounterName, ValidationCounters, ValidationCounterValues);
  89     if (!ValueOrError)
  90       return ValueOrError.takeError();
  91     accumulateCounterValues(ValueOrError.get(), &CounterValues);
  92   }
  93   return CounterValues;
  94 }
  95
  96 namespace {
  97 class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor {
  98 public:
  99   static Expected<std::unique_ptr<InProcessFunctionExecutorImpl>>
 100   create(const LLVMState &State, object::OwningBinary<object::ObjectFile> Obj,
 101          BenchmarkRunner::ScratchSpace *Scratch,
 102          std::optional<int> BenchmarkProcessCPU) {
 103     Expected<ExecutableFunction> EF =
 104         ExecutableFunction::create(State.createTargetMachine(), std::move(Obj));
 105
 106     if (!EF)
 107       return EF.takeError();
 108
 109     return std::unique_ptr<InProcessFunctionExecutorImpl>(
 110         new InProcessFunctionExecutorImpl(State, std::move(*EF), Scratch));
 111   }
 112
 113 private:
 114   InProcessFunctionExecutorImpl(const LLVMState &State,
 115                                 ExecutableFunction Function,
 116                                 BenchmarkRunner::ScratchSpace *Scratch)
 117       : State(State), Function(std::move(Function)), Scratch(Scratch) {}
 118
 119   static void accumulateCounterValues(const SmallVector<int64_t, 4> &NewValues,
 120                                       SmallVector<int64_t, 4> *Result) {
 121     const size_t NumValues = std::max(NewValues.size(), Result->size());
 122     if (NumValues > Result->size())
 123       Result->resize(NumValues, 0);
 124     for (size_t I = 0, End = NewValues.size(); I < End; ++I)
 125       (*Result)[I] += NewValues[I];
 126   }
 127
 128   Expected<SmallVector<int64_t, 4>> runWithCounter(
 129       StringRef CounterName, ArrayRef<const char *> ValidationCounters,
 130       SmallVectorImpl<int64_t> &ValidationCounterValues) const override {
 131     const ExegesisTarget &ET = State.getExegesisTarget();
 132     char *const ScratchPtr = Scratch->ptr();
 133     auto CounterOrError =
 134         ET.createCounter(CounterName, State, ValidationCounters);
 135
 136     if (!CounterOrError)
 137       return CounterOrError.takeError();
 138
 139     pfm::CounterGroup *Counter = CounterOrError.get().get();
 140     Scratch->clear();
 141     {
 142       auto PS = ET.withSavedState();
 143       CrashRecoveryContext CRC;
 144       CrashRecoveryContext::Enable();
 145       const bool Crashed = !CRC.RunSafely([this, Counter, ScratchPtr]() {
 146         Counter->start();
 147         this->Function(ScratchPtr);
 148         Counter->stop();
 149       });
 150       CrashRecoveryContext::Disable();
 151       PS.reset();
 152       if (Crashed) {
 153 #ifdef LLVM_ON_UNIX
 154         // See "Exit Status for Commands":
 155         // https://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xcu_chap02.html
 156         constexpr const int kSigOffset = 128;
 157         return make_error<SnippetSignal>(CRC.RetCode - kSigOffset);
 158 #else
 159         // The exit code of the process on windows is not meaningful as a
 160         // signal, so simply pass in -1 as the signal into the error.
 161         return make_error<SnippetSignal>(-1);
 162 #endif // LLVM_ON_UNIX
 163       }
 164     }
 165
 166     auto ValidationValuesOrErr = Counter->readValidationCountersOrError();
 167     if (!ValidationValuesOrErr)
 168       return ValidationValuesOrErr.takeError();
 169
 170     ArrayRef RealValidationValues = *ValidationValuesOrErr;
 171     for (size_t I = 0; I < RealValidationValues.size(); ++I)
 172       ValidationCounterValues[I] = RealValidationValues[I];
 173
 174     return Counter->readOrError(Function.getFunctionBytes());
 175   }
 176
 177   const LLVMState &State;
 178   const ExecutableFunction Function;
 179   BenchmarkRunner::ScratchSpace *const Scratch;
 180 };
 181
 182 #ifdef __linux__
 183 // The following class implements a function executor that executes the
 184 // benchmark code within a subprocess rather than within the main llvm-exegesis
 185 // process. This allows for much more control over the execution context of the
 186 // snippet, particularly with regard to memory. This class performs all the
 187 // necessary functions to create the subprocess, execute the snippet in the
 188 // subprocess, and report results/handle errors.
 189 class SubProcessFunctionExecutorImpl
 190     : public BenchmarkRunner::FunctionExecutor {
 191 public:
 192   static Expected<std::unique_ptr<SubProcessFunctionExecutorImpl>>
 193   create(const LLVMState &State, object::OwningBinary<object::ObjectFile> Obj,
 194          const BenchmarkKey &Key, std::optional<int> BenchmarkProcessCPU) {
 195     Expected<ExecutableFunction> EF =
 196         ExecutableFunction::create(State.createTargetMachine(), std::move(Obj));
 197     if (!EF)
 198       return EF.takeError();
 199
 200     return std::unique_ptr<SubProcessFunctionExecutorImpl>(
 201         new SubProcessFunctionExecutorImpl(State, std::move(*EF), Key,
 202                                            BenchmarkProcessCPU));
 203   }
 204
 205 private:
 206   SubProcessFunctionExecutorImpl(const LLVMState &State,
 207                                  ExecutableFunction Function,
 208                                  const BenchmarkKey &Key,
 209                                  std::optional<int> BenchmarkCPU)
 210       : State(State), Function(std::move(Function)), Key(Key),
 211         BenchmarkProcessCPU(BenchmarkCPU) {}
 212
 213   enum ChildProcessExitCodeE {
 214     CounterFDReadFailed = 1,
 215     RSeqDisableFailed,
 216     FunctionDataMappingFailed,
 217     AuxiliaryMemorySetupFailed,
 218     SetCPUAffinityFailed
 219   };
 220
 221   StringRef childProcessExitCodeToString(int ExitCode) const {
 222     switch (ExitCode) {
 223     case ChildProcessExitCodeE::CounterFDReadFailed:
 224       return "Counter file descriptor read failed";
 225     case ChildProcessExitCodeE::RSeqDisableFailed:
 226       return "Disabling restartable sequences failed";
 227     case ChildProcessExitCodeE::FunctionDataMappingFailed:
 228       return "Failed to map memory for assembled snippet";
 229     case ChildProcessExitCodeE::AuxiliaryMemorySetupFailed:
 230       return "Failed to setup auxiliary memory";
 231     case ChildProcessExitCodeE::SetCPUAffinityFailed:
 232       return "Failed to set CPU affinity of the benchmarking process";
 233     default:
 234       return "Child process returned with unknown exit code";
 235     }
 236   }
 237
 238   Error sendFileDescriptorThroughSocket(int SocketFD, int FD) const {
 239     struct msghdr Message = {};
 240     char Buffer[CMSG_SPACE(sizeof(FD))];
 241     memset(Buffer, 0, sizeof(Buffer));
 242     Message.msg_control = Buffer;
 243     Message.msg_controllen = sizeof(Buffer);
 244
 245     struct cmsghdr *ControlMessage = CMSG_FIRSTHDR(&Message);
 246     ControlMessage->cmsg_level = SOL_SOCKET;
 247     ControlMessage->cmsg_type = SCM_RIGHTS;
 248     ControlMessage->cmsg_len = CMSG_LEN(sizeof(FD));
 249
 250     memcpy(CMSG_DATA(ControlMessage), &FD, sizeof(FD));
 251
 252     Message.msg_controllen = CMSG_SPACE(sizeof(FD));
 253
 254     ssize_t BytesWritten = sendmsg(SocketFD, &Message, 0);
 255
 256     if (BytesWritten < 0)
 257       return make_error<Failure>("Failed to write FD to socket: " +
 258                                  Twine(strerror(errno)));
 259
 260     return Error::success();
 261   }
 262
 263   Expected<int> getFileDescriptorFromSocket(int SocketFD) const {
 264     struct msghdr Message = {};
 265
 266     char ControlBuffer[256];
 267     Message.msg_control = ControlBuffer;
 268     Message.msg_controllen = sizeof(ControlBuffer);
 269
 270     ssize_t BytesRead = recvmsg(SocketFD, &Message, 0);
 271
 272     if (BytesRead < 0)
 273       return make_error<Failure>("Failed to read FD from socket: " +
 274                                  Twine(strerror(errno)));
 275
 276     struct cmsghdr *ControlMessage = CMSG_FIRSTHDR(&Message);
 277
 278     int FD;
 279
 280     if (ControlMessage->cmsg_len != CMSG_LEN(sizeof(FD)))
 281       return make_error<Failure>("Failed to get correct number of bytes for "
 282                                  "file descriptor from socket.");
 283
 284     memcpy(&FD, CMSG_DATA(ControlMessage), sizeof(FD));
 285
 286     return FD;
 287   }
 288
 289   Error
 290   runParentProcess(pid_t ChildPID, int WriteFD, StringRef CounterName,
 291                    SmallVectorImpl<int64_t> &CounterValues,
 292                    ArrayRef<const char *> ValidationCounters,
 293                    SmallVectorImpl<int64_t> &ValidationCounterValues) const {
 294     auto WriteFDClose = make_scope_exit([WriteFD]() { close(WriteFD); });
 295     const ExegesisTarget &ET = State.getExegesisTarget();
 296     auto CounterOrError =
 297         ET.createCounter(CounterName, State, ValidationCounters, ChildPID);
 298
 299     if (!CounterOrError)
 300       return CounterOrError.takeError();
 301
 302     pfm::CounterGroup *Counter = CounterOrError.get().get();
 303
 304     // Make sure to attach to the process (and wait for the sigstop to be
 305     // delivered and for the process to continue) before we write to the counter
 306     // file descriptor. Attaching to the process before writing to the socket
 307     // ensures that the subprocess at most has blocked on the read call. If we
 308     // attach afterwards, the subprocess might exit before we get to the attach
 309     // call due to effects like scheduler contention, introducing transient
 310     // failures.
 311     if (ptrace(PTRACE_ATTACH, ChildPID, NULL, NULL) != 0)
 312       return make_error<Failure>("Failed to attach to the child process: " +
 313                                  Twine(strerror(errno)));
 314
 315     if (waitpid(ChildPID, NULL, 0) == -1) {
 316       return make_error<Failure>(
 317           "Failed to wait for child process to stop after attaching: " +
 318           Twine(strerror(errno)));
 319     }
 320
 321     if (ptrace(PTRACE_CONT, ChildPID, NULL, NULL) != 0)
 322       return make_error<Failure>(
 323           "Failed to continue execution of the child process: " +
 324           Twine(strerror(errno)));
 325
 326     int CounterFileDescriptor = Counter->getFileDescriptor();
 327     Error SendError =
 328         sendFileDescriptorThroughSocket(WriteFD, CounterFileDescriptor);
 329
 330     if (SendError)
 331       return SendError;
 332
 333     int ChildStatus;
 334     if (waitpid(ChildPID, &ChildStatus, 0) == -1) {
 335       return make_error<Failure>(
 336           "Waiting for the child process to complete failed: " +
 337           Twine(strerror(errno)));
 338     }
 339
 340     if (WIFEXITED(ChildStatus)) {
 341       int ChildExitCode = WEXITSTATUS(ChildStatus);
 342       if (ChildExitCode == 0) {
 343         // The child exited succesfully, read counter values and return
 344         // success.
 345         auto CounterValueOrErr = Counter->readOrError();
 346         if (!CounterValueOrErr)
 347           return CounterValueOrErr.takeError();
 348         CounterValues = std::move(*CounterValueOrErr);
 349
 350         auto ValidationValuesOrErr = Counter->readValidationCountersOrError();
 351         if (!ValidationValuesOrErr)
 352           return ValidationValuesOrErr.takeError();
 353
 354         ArrayRef RealValidationValues = *ValidationValuesOrErr;
 355         for (size_t I = 0; I < RealValidationValues.size(); ++I)
 356           ValidationCounterValues[I] = RealValidationValues[I];
 357
 358         return Error::success();
 359       }
 360       // The child exited, but not successfully.
 361       return make_error<Failure>(
 362           "Child benchmarking process exited with non-zero exit code: " +
 363           childProcessExitCodeToString(ChildExitCode));
 364     }
 365
 366     // An error was encountered running the snippet, process it
 367     siginfo_t ChildSignalInfo;
 368     if (ptrace(PTRACE_GETSIGINFO, ChildPID, NULL, &ChildSignalInfo) == -1) {
 369       return make_error<Failure>("Getting signal info from the child failed: " +
 370                                  Twine(strerror(errno)));
 371     }
 372
 373     // Send SIGKILL rather than SIGTERM as the child process has no SIGTERM
 374     // handlers to run, and calling SIGTERM would mean that ptrace will force
 375     // it to block in the signal-delivery-stop for the SIGSEGV/other signals,
 376     // and upon exit.
 377     if (kill(ChildPID, SIGKILL) == -1)
 378       return make_error<Failure>("Failed to kill child benchmarking proces: " +
 379                                  Twine(strerror(errno)));
 380
 381     // Wait for the process to exit so that there are no zombie processes left
 382     // around.
 383     if (waitpid(ChildPID, NULL, 0) == -1)
 384       return make_error<Failure>("Failed to wait for process to die: " +
 385                                  Twine(strerror(errno)));
 386
 387     if (ChildSignalInfo.si_signo == SIGSEGV)
 388       return make_error<SnippetSegmentationFault>(
 389           reinterpret_cast<uintptr_t>(ChildSignalInfo.si_addr));
 390
 391     return make_error<SnippetSignal>(ChildSignalInfo.si_signo);
 392   }
 393
 394   static void setCPUAffinityIfRequested(int CPUToUse) {
 395 // Special case this function for x86_64 for now as certain more esoteric
 396 // platforms have different definitions for some of the libc functions that
 397 // cause buildtime failures. Additionally, the subprocess executor mode (the
 398 // sole mode where this is supported) currently only supports x86_64.
 399
 400 // Also check that we have the SYS_getcpu macro defined, meaning the syscall
 401 // actually exists within the build environment. We manually use the syscall
 402 // rather than the libc wrapper given the wrapper for getcpu is only available
 403 // in glibc 2.29 and later.
 404 #if defined(__x86_64__) && defined(SYS_getcpu)
 405     // Set the CPU affinity for the child process, so that we ensure that if
 406     // the user specified a CPU the process should run on, the benchmarking
 407     // process is running on that CPU.
 408     cpu_set_t CPUMask;
 409     CPU_ZERO(&CPUMask);
 410     CPU_SET(CPUToUse, &CPUMask);
 411     // TODO(boomanaiden154): Rewrite this to use LLVM primitives once they
 412     // are available.
 413     int SetAffinityReturn = sched_setaffinity(0, sizeof(CPUMask), &CPUMask);
 414     if (SetAffinityReturn == -1) {
 415       exit(ChildProcessExitCodeE::SetCPUAffinityFailed);
 416     }
 417
 418     // Check (if assertions are enabled) that we are actually running on the
 419     // CPU that was specified by the user.
 420     [[maybe_unused]] unsigned int CurrentCPU;
 421     assert(syscall(SYS_getcpu, &CurrentCPU, nullptr) == 0 &&
 422            "Expected getcpu call to succeed.");
 423     assert(static_cast<int>(CurrentCPU) == CPUToUse &&
 424            "Expected current CPU to equal the CPU requested by the user");
 425 #endif // defined(__x86_64__) && defined(SYS_getcpu)
 426     exit(ChildProcessExitCodeE::SetCPUAffinityFailed);
 427   }
 428
 429   Error createSubProcessAndRunBenchmark(
 430       StringRef CounterName, SmallVectorImpl<int64_t> &CounterValues,
 431       ArrayRef<const char *> ValidationCounters,
 432       SmallVectorImpl<int64_t> &ValidationCounterValues) const {
 433     int PipeFiles[2];
 434     int PipeSuccessOrErr = socketpair(AF_UNIX, SOCK_DGRAM, 0, PipeFiles);
 435     if (PipeSuccessOrErr != 0) {
 436       return make_error<Failure>(
 437           "Failed to create a pipe for interprocess communication between "
 438           "llvm-exegesis and the benchmarking subprocess: " +
 439           Twine(strerror(errno)));
 440     }
 441
 442     SubprocessMemory SPMemory;
 443     Error MemoryInitError = SPMemory.initializeSubprocessMemory(getpid());
 444     if (MemoryInitError)
 445       return MemoryInitError;
 446
 447     Error AddMemDefError =
 448         SPMemory.addMemoryDefinition(Key.MemoryValues, getpid());
 449     if (AddMemDefError)
 450       return AddMemDefError;
 451
 452     long ParentTID = SubprocessMemory::getCurrentTID();
 453     pid_t ParentOrChildPID = fork();
 454
 455     if (ParentOrChildPID == -1) {
 456       return make_error<Failure>("Failed to create child process: " +
 457                                  Twine(strerror(errno)));
 458     }
 459
 460     if (ParentOrChildPID == 0) {
 461       if (BenchmarkProcessCPU.has_value()) {
 462         setCPUAffinityIfRequested(*BenchmarkProcessCPU);
 463       }
 464
 465       // We are in the child process, close the write end of the pipe.
 466       close(PipeFiles[1]);
 467       // Unregister handlers, signal handling is now handled through ptrace in
 468       // the host process.
 469       sys::unregisterHandlers();
 470       runChildSubprocess(PipeFiles[0], Key, ParentTID);
 471       // The child process terminates in the above function, so we should never
 472       // get to this point.
 473       llvm_unreachable("Child process didn't exit when expected.");
 474     }
 475
 476     // Close the read end of the pipe as we only need to write to the subprocess
 477     // from the parent process.
 478     close(PipeFiles[0]);
 479     return runParentProcess(ParentOrChildPID, PipeFiles[1], CounterName,
 480                             CounterValues, ValidationCounters,
 481                             ValidationCounterValues);
 482   }
 483
 484   void disableCoreDumps() const {
 485     struct rlimit rlim;
 486
 487     rlim.rlim_cur = 0;
 488     setrlimit(RLIMIT_CORE, &rlim);
 489   }
 490
 491   [[noreturn]] void runChildSubprocess(int Pipe, const BenchmarkKey &Key,
 492                                        long ParentTID) const {
 493     // Disable core dumps in the child process as otherwise everytime we
 494     // encounter an execution failure like a segmentation fault, we will create
 495     // a core dump. We report the information directly rather than require the
 496     // user inspect a core dump.
 497     disableCoreDumps();
 498
 499     // The following occurs within the benchmarking subprocess.
 500     pid_t ParentPID = getppid();
 501
 502     Expected<int> CounterFileDescriptorOrError =
 503         getFileDescriptorFromSocket(Pipe);
 504
 505     if (!CounterFileDescriptorOrError)
 506       exit(ChildProcessExitCodeE::CounterFDReadFailed);
 507
 508     int CounterFileDescriptor = *CounterFileDescriptorOrError;
 509
 510 // Glibc versions greater than 2.35 automatically call rseq during
 511 // initialization. Unmapping the region that glibc sets up for this causes
 512 // segfaults in the program. Unregister the rseq region so that we can safely
 513 // unmap it later
 514 #ifdef GLIBC_INITS_RSEQ
 515     unsigned int RseqStructSize = __rseq_size;
 516
 517     // Glibc v2.40 (the change is also expected to be backported to v2.35)
 518     // changes the definition of __rseq_size to be the usable area of the struct
 519     // rather than the actual size of the struct. v2.35 uses only 20 bytes of
 520     // the 32 byte struct. For now, it should be safe to assume that if the
 521     // usable size is less than 32, the actual size of the struct will be 32
 522     // bytes given alignment requirements.
 523     if (__rseq_size < 32)
 524       RseqStructSize = 32;
 525
 526     long RseqDisableOutput = syscall(
 527         SYS_rseq,
 528         reinterpret_cast<uintptr_t>(__builtin_thread_pointer()) + __rseq_offset,
 529         RseqStructSize, RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
 530     if (RseqDisableOutput != 0)
 531       exit(ChildProcessExitCodeE::RSeqDisableFailed);
 532 #endif // GLIBC_INITS_RSEQ
 533
 534     // The frontend that generates the memory annotation structures should
 535     // validate that the address to map the snippet in at is a multiple of
 536     // the page size. Assert that this is true here.
 537     assert(Key.SnippetAddress % getpagesize() == 0 &&
 538            "The snippet address needs to be aligned to a page boundary.");
 539
 540     size_t FunctionDataCopySize = this->Function.FunctionBytes.size();
 541     void *MapAddress = NULL;
 542     int MapFlags = MAP_PRIVATE | MAP_ANONYMOUS;
 543
 544     if (Key.SnippetAddress != 0) {
 545       MapAddress = reinterpret_cast<void *>(Key.SnippetAddress);
 546       MapFlags |= MAP_FIXED_NOREPLACE;
 547     }
 548
 549     char *FunctionDataCopy =
 550         (char *)mmap(MapAddress, FunctionDataCopySize, PROT_READ | PROT_WRITE,
 551                      MapFlags, 0, 0);
 552     if (reinterpret_cast<intptr_t>(FunctionDataCopy) == -1)
 553       exit(ChildProcessExitCodeE::FunctionDataMappingFailed);
 554
 555     memcpy(FunctionDataCopy, this->Function.FunctionBytes.data(),
 556            this->Function.FunctionBytes.size());
 557     mprotect(FunctionDataCopy, FunctionDataCopySize, PROT_READ | PROT_EXEC);
 558
 559     Expected<int> AuxMemFDOrError =
 560         SubprocessMemory::setupAuxiliaryMemoryInSubprocess(
 561             Key.MemoryValues, ParentPID, ParentTID, CounterFileDescriptor);
 562     if (!AuxMemFDOrError)
 563       exit(ChildProcessExitCodeE::AuxiliaryMemorySetupFailed);
 564
 565     ((void (*)(size_t, int))(uintptr_t)FunctionDataCopy)(FunctionDataCopySize,
 566                                                          *AuxMemFDOrError);
 567
 568     exit(0);
 569   }
 570
 571   Expected<SmallVector<int64_t, 4>> runWithCounter(
 572       StringRef CounterName, ArrayRef<const char *> ValidationCounters,
 573       SmallVectorImpl<int64_t> &ValidationCounterValues) const override {
 574     SmallVector<int64_t, 4> Value(1, 0);
 575     Error PossibleBenchmarkError = createSubProcessAndRunBenchmark(
 576         CounterName, Value, ValidationCounters, ValidationCounterValues);
 577
 578     if (PossibleBenchmarkError)
 579       return std::move(PossibleBenchmarkError);
 580
 581     return Value;
 582   }
 583
 584   const LLVMState &State;
 585   const ExecutableFunction Function;
 586   const BenchmarkKey &Key;
 587   const std::optional<int> BenchmarkProcessCPU;
 588 };
 589 #endif // __linux__
 590 } // namespace
 591
 592 Expected<SmallString<0>> BenchmarkRunner::assembleSnippet(
 593     const BenchmarkCode &BC, const SnippetRepetitor &Repetitor,
 594     unsigned MinInstructions, unsigned LoopBodySize,
 595     bool GenerateMemoryInstructions) const {
 596   const std::vector<MCInst> &Instructions = BC.Key.Instructions;
 597   SmallString<0> Buffer;
 598   raw_svector_ostream OS(Buffer);
 599   if (Error E = assembleToStream(
 600           State.getExegesisTarget(), State.createTargetMachine(), BC.LiveIns,
 601           Repetitor.Repeat(Instructions, MinInstructions, LoopBodySize,
 602                            GenerateMemoryInstructions),
 603           OS, BC.Key, GenerateMemoryInstructions)) {
 604     return std::move(E);
 605   }
 606   return Buffer;
 607 }
 608
 609 Expected<BenchmarkRunner::RunnableConfiguration>
 610 BenchmarkRunner::getRunnableConfiguration(
 611     const BenchmarkCode &BC, unsigned MinInstructions, unsigned LoopBodySize,
 612     const SnippetRepetitor &Repetitor) const {
 613   RunnableConfiguration RC;
 614
 615   Benchmark &BenchmarkResult = RC.BenchmarkResult;
 616   BenchmarkResult.Mode = Mode;
 617   BenchmarkResult.CpuName =
 618       std::string(State.getTargetMachine().getTargetCPU());
 619   BenchmarkResult.LLVMTriple =
 620       State.getTargetMachine().getTargetTriple().normalize();
 621   BenchmarkResult.MinInstructions = MinInstructions;
 622   BenchmarkResult.Info = BC.Info;
 623
 624   const std::vector<MCInst> &Instructions = BC.Key.Instructions;
 625
 626   bool GenerateMemoryInstructions = ExecutionMode == ExecutionModeE::SubProcess;
 627
 628   BenchmarkResult.Key = BC.Key;
 629
 630   // Assemble at least kMinInstructionsForSnippet instructions by repeating
 631   // the snippet for debug/analysis. This is so that the user clearly
 632   // understands that the inside instructions are repeated.
 633   if (BenchmarkPhaseSelector > BenchmarkPhaseSelectorE::PrepareSnippet) {
 634     const int MinInstructionsForSnippet = 4 * Instructions.size();
 635     const int LoopBodySizeForSnippet = 2 * Instructions.size();
 636     auto Snippet =
 637         assembleSnippet(BC, Repetitor, MinInstructionsForSnippet,
 638                         LoopBodySizeForSnippet, GenerateMemoryInstructions);
 639     if (Error E = Snippet.takeError())
 640       return std::move(E);
 641
 642     if (auto Err = getBenchmarkFunctionBytes(*Snippet,
 643                                              BenchmarkResult.AssembledSnippet))
 644       return std::move(Err);
 645   }
 646
 647   // Assemble enough repetitions of the snippet so we have at least
 648   // MinInstructions instructions.
 649   if (BenchmarkPhaseSelector >
 650       BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) {
 651     auto Snippet =
 652         assembleSnippet(BC, Repetitor, BenchmarkResult.MinInstructions,
 653                         LoopBodySize, GenerateMemoryInstructions);
 654     if (Error E = Snippet.takeError())
 655       return std::move(E);
 656     RC.ObjectFile = getObjectFromBuffer(*Snippet);
 657   }
 658
 659   return std::move(RC);
 660 }
 661
 662 Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>>
 663 BenchmarkRunner::createFunctionExecutor(
 664     object::OwningBinary<object::ObjectFile> ObjectFile,
 665     const BenchmarkKey &Key, std::optional<int> BenchmarkProcessCPU) const {
 666   switch (ExecutionMode) {
 667   case ExecutionModeE::InProcess: {
 668     if (BenchmarkProcessCPU.has_value())
 669       return make_error<Failure>("The inprocess execution mode does not "
 670                                  "support benchmark core pinning.");
 671
 672     auto InProcessExecutorOrErr = InProcessFunctionExecutorImpl::create(
 673         State, std::move(ObjectFile), Scratch.get(), BenchmarkProcessCPU);
 674     if (!InProcessExecutorOrErr)
 675       return InProcessExecutorOrErr.takeError();
 676
 677     return std::move(*InProcessExecutorOrErr);
 678   }
 679   case ExecutionModeE::SubProcess: {
 680 #ifdef __linux__
 681     auto SubProcessExecutorOrErr = SubProcessFunctionExecutorImpl::create(
 682         State, std::move(ObjectFile), Key, BenchmarkProcessCPU);
 683     if (!SubProcessExecutorOrErr)
 684       return SubProcessExecutorOrErr.takeError();
 685
 686     return std::move(*SubProcessExecutorOrErr);
 687 #else
 688     return make_error<Failure>(
 689         "The subprocess execution mode is only supported on Linux");
 690 #endif
 691   }
 692   }
 693   llvm_unreachable("ExecutionMode is outside expected range");
 694 }
 695
 696 std::pair<Error, Benchmark> BenchmarkRunner::runConfiguration(
 697     RunnableConfiguration &&RC, const std::optional<StringRef> &DumpFile,
 698     std::optional<int> BenchmarkProcessCPU) const {
 699   Benchmark &BenchmarkResult = RC.BenchmarkResult;
 700   object::OwningBinary<object::ObjectFile> &ObjectFile = RC.ObjectFile;
 701
 702   if (DumpFile && BenchmarkPhaseSelector >
 703                       BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) {
 704     auto ObjectFilePath =
 705         writeObjectFile(ObjectFile.getBinary()->getData(), *DumpFile);
 706     if (Error E = ObjectFilePath.takeError()) {
 707       return {std::move(E), std::move(BenchmarkResult)};
 708     }
 709     outs() << "Check generated assembly with: /usr/bin/objdump -d "
 710            << *ObjectFilePath << "\n";
 711   }
 712
 713   if (BenchmarkPhaseSelector < BenchmarkPhaseSelectorE::Measure) {
 714     BenchmarkResult.Error = "actual measurements skipped.";
 715     return {Error::success(), std::move(BenchmarkResult)};
 716   }
 717
 718   Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>> Executor =
 719       createFunctionExecutor(std::move(ObjectFile), RC.BenchmarkResult.Key,
 720                              BenchmarkProcessCPU);
 721   if (!Executor)
 722     return {Executor.takeError(), std::move(BenchmarkResult)};
 723   auto NewMeasurements = runMeasurements(**Executor);
 724
 725   if (Error E = NewMeasurements.takeError()) {
 726     return {std::move(E), std::move(BenchmarkResult)};
 727   }
 728   assert(BenchmarkResult.MinInstructions > 0 && "invalid MinInstructions");
 729   for (BenchmarkMeasure &BM : *NewMeasurements) {
 730     // Scale the measurements by the number of instructions.
 731     BM.PerInstructionValue /= BenchmarkResult.MinInstructions;
 732     // Scale the measurements by the number of times the entire snippet is
 733     // repeated.
 734     BM.PerSnippetValue /=
 735         std::ceil(BenchmarkResult.MinInstructions /
 736                   static_cast<double>(BenchmarkResult.Key.Instructions.size()));
 737   }
 738   BenchmarkResult.Measurements = std::move(*NewMeasurements);
 739
 740   return {Error::success(), std::move(BenchmarkResult)};
 741 }
 742
 743 Expected<std::string>
 744 BenchmarkRunner::writeObjectFile(StringRef Buffer, StringRef FileName) const {
 745   int ResultFD = 0;
 746   SmallString<256> ResultPath = FileName;
 747   if (Error E = errorCodeToError(
 748           FileName.empty() ? sys::fs::createTemporaryFile("snippet", "o",
 749                                                           ResultFD, ResultPath)
 750                            : sys::fs::openFileForReadWrite(
 751                                  FileName, ResultFD, sys::fs::CD_CreateAlways,
 752                                  sys::fs::OF_None)))
 753     return std::move(E);
 754   raw_fd_ostream OFS(ResultFD, true /*ShouldClose*/);
 755   OFS.write(Buffer.data(), Buffer.size());
 756   OFS.flush();
 757   return std::string(ResultPath);
 758 }
 759
 760 static bool EventLessThan(const std::pair<ValidationEvent, const char *> LHS,
 761                           const ValidationEvent RHS) {
 762   return static_cast<int>(LHS.first) < static_cast<int>(RHS);
 763 }
 764
 765 Error BenchmarkRunner::getValidationCountersToRun(
 766     SmallVector<const char *> &ValCountersToRun) const {
 767   const PfmCountersInfo &PCI = State.getPfmCounters();
 768   ValCountersToRun.reserve(ValidationCounters.size());
 769
 770   ValCountersToRun.reserve(ValidationCounters.size());
 771   ArrayRef TargetValidationEvents(PCI.ValidationEvents,
 772                                   PCI.NumValidationEvents);
 773   for (const ValidationEvent RequestedValEvent : ValidationCounters) {
 774     auto ValCounterIt =
 775         lower_bound(TargetValidationEvents, RequestedValEvent, EventLessThan);
 776     if (ValCounterIt == TargetValidationEvents.end() ||
 777         ValCounterIt->first != RequestedValEvent)
 778       return make_error<Failure>("Cannot create validation counter");
 779
 780     assert(ValCounterIt->first == RequestedValEvent &&
 781            "The array of validation events from the target should be sorted");
 782     ValCountersToRun.push_back(ValCounterIt->second);
 783   }
 784
 785   return Error::success();
 786 }
 787
 788 BenchmarkRunner::FunctionExecutor::~FunctionExecutor() {}
 789
 790 } // namespace exegesis
 791 } // namespace llvm