llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp

   1 //===-- BenchmarkRunner.cpp -------------------------------------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #include <memory>
  10 #include <string>
  11
  12 #include "Assembler.h"
  13 #include "BenchmarkRunner.h"
  14 #include "Error.h"
  15 #include "MCInstrDescView.h"
  16 #include "PerfHelper.h"
  17 #include "SubprocessMemory.h"
  18 #include "Target.h"
  19 #include "llvm/ADT/StringExtras.h"
  20 #include "llvm/ADT/StringRef.h"
  21 #include "llvm/ADT/Twine.h"
  22 #include "llvm/Support/CrashRecoveryContext.h"
  23 #include "llvm/Support/Error.h"
  24 #include "llvm/Support/FileSystem.h"
  25 #include "llvm/Support/MemoryBuffer.h"
  26 #include "llvm/Support/Program.h"
  27 #include "llvm/Support/Signals.h"
  28
  29 #ifdef __linux__
  30 #ifdef HAVE_LIBPFM
  31 #include <perfmon/perf_event.h>
  32 #endif
  33 #include <sys/mman.h>
  34 #include <sys/ptrace.h>
  35 #include <sys/resource.h>
  36 #include <sys/socket.h>
  37 #include <sys/syscall.h>
  38 #include <sys/wait.h>
  39 #include <unistd.h>
  40
  41 #if defined(__GLIBC__) && __has_include(<sys/rseq.h>) && defined(HAVE_BUILTIN_THREAD_POINTER)
  42 #include <sys/rseq.h>
  43 #if defined(RSEQ_SIG) && defined(SYS_rseq)
  44 #define GLIBC_INITS_RSEQ
  45 #endif
  46 #endif
  47
  48 // Before kernel 4.17, Linux did not support MAP_FIXED_NOREPLACE, so if it is
  49 // not available, simplfy define it as MAP_FIXED which performs the same
  50 // function but does not guarantee existing mappings won't get clobbered.
  51 #ifndef MAP_FIXED_NOREPLACE
  52 #define MAP_FIXED_NOREPLACE MAP_FIXED
  53 #endif
  54 #endif // __linux__
  55
  56 namespace llvm {
  57 namespace exegesis {
  58
  59 BenchmarkRunner::BenchmarkRunner(const LLVMState &State, Benchmark::ModeE Mode,
  60                                  BenchmarkPhaseSelectorE BenchmarkPhaseSelector,
  61                                  ExecutionModeE ExecutionMode)
  62     : State(State), Mode(Mode), BenchmarkPhaseSelector(BenchmarkPhaseSelector),
  63       ExecutionMode(ExecutionMode), Scratch(std::make_unique<ScratchSpace>()) {}
  64
  65 BenchmarkRunner::~BenchmarkRunner() = default;
  66
  67 void BenchmarkRunner::FunctionExecutor::accumulateCounterValues(
  68     const llvm::SmallVectorImpl<int64_t> &NewValues,
  69     llvm::SmallVectorImpl<int64_t> *Result) {
  70   const size_t NumValues = std::max(NewValues.size(), Result->size());
  71   if (NumValues > Result->size())
  72     Result->resize(NumValues, 0);
  73   for (size_t I = 0, End = NewValues.size(); I < End; ++I)
  74     (*Result)[I] += NewValues[I];
  75 }
  76
  77 Expected<llvm::SmallVector<int64_t, 4>>
  78 BenchmarkRunner::FunctionExecutor::runAndSample(const char *Counters) const {
  79   // We sum counts when there are several counters for a single ProcRes
  80   // (e.g. P23 on SandyBridge).
  81   llvm::SmallVector<int64_t, 4> CounterValues;
  82   SmallVector<StringRef, 2> CounterNames;
  83   StringRef(Counters).split(CounterNames, '+');
  84   for (auto &CounterName : CounterNames) {
  85     CounterName = CounterName.trim();
  86     Expected<SmallVector<int64_t, 4>> ValueOrError =
  87         runWithCounter(CounterName);
  88     if (!ValueOrError)
  89       return ValueOrError.takeError();
  90     accumulateCounterValues(ValueOrError.get(), &CounterValues);
  91   }
  92   return CounterValues;
  93 }
  94
  95 namespace {
  96 class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor {
  97 public:
  98   static Expected<std::unique_ptr<InProcessFunctionExecutorImpl>>
  99   create(const LLVMState &State, object::OwningBinary<object::ObjectFile> Obj,
 100          BenchmarkRunner::ScratchSpace *Scratch) {
 101     Expected<ExecutableFunction> EF =
 102         ExecutableFunction::create(State.createTargetMachine(), std::move(Obj));
 103
 104     if (!EF)
 105       return EF.takeError();
 106
 107     return std::unique_ptr<InProcessFunctionExecutorImpl>(
 108         new InProcessFunctionExecutorImpl(State, std::move(*EF), Scratch));
 109   }
 110
 111 private:
 112   InProcessFunctionExecutorImpl(const LLVMState &State,
 113                                 ExecutableFunction Function,
 114                                 BenchmarkRunner::ScratchSpace *Scratch)
 115       : State(State), Function(std::move(Function)), Scratch(Scratch) {}
 116
 117   static void
 118   accumulateCounterValues(const llvm::SmallVector<int64_t, 4> &NewValues,
 119                           llvm::SmallVector<int64_t, 4> *Result) {
 120     const size_t NumValues = std::max(NewValues.size(), Result->size());
 121     if (NumValues > Result->size())
 122       Result->resize(NumValues, 0);
 123     for (size_t I = 0, End = NewValues.size(); I < End; ++I)
 124       (*Result)[I] += NewValues[I];
 125   }
 126
 127   Expected<llvm::SmallVector<int64_t, 4>>
 128   runWithCounter(StringRef CounterName) const override {
 129     const ExegesisTarget &ET = State.getExegesisTarget();
 130     char *const ScratchPtr = Scratch->ptr();
 131     auto CounterOrError = ET.createCounter(CounterName, State);
 132
 133     if (!CounterOrError)
 134       return CounterOrError.takeError();
 135
 136     pfm::Counter *Counter = CounterOrError.get().get();
 137     Scratch->clear();
 138     {
 139       auto PS = ET.withSavedState();
 140       CrashRecoveryContext CRC;
 141       CrashRecoveryContext::Enable();
 142       const bool Crashed = !CRC.RunSafely([this, Counter, ScratchPtr]() {
 143         Counter->start();
 144         this->Function(ScratchPtr);
 145         Counter->stop();
 146       });
 147       CrashRecoveryContext::Disable();
 148       PS.reset();
 149       if (Crashed) {
 150 #ifdef LLVM_ON_UNIX
 151         // See "Exit Status for Commands":
 152         // https://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xcu_chap02.html
 153         constexpr const int kSigOffset = 128;
 154         return make_error<SnippetSignal>(CRC.RetCode - kSigOffset);
 155 #else
 156         // The exit code of the process on windows is not meaningful as a
 157         // signal, so simply pass in -1 as the signal into the error.
 158         return make_error<SnippetSignal>(-1);
 159 #endif // LLVM_ON_UNIX
 160       }
 161     }
 162
 163     return Counter->readOrError(Function.getFunctionBytes());
 164   }
 165
 166   const LLVMState &State;
 167   const ExecutableFunction Function;
 168   BenchmarkRunner::ScratchSpace *const Scratch;
 169 };
 170
 171 #ifdef __linux__
 172 // The following class implements a function executor that executes the
 173 // benchmark code within a subprocess rather than within the main llvm-exegesis
 174 // process. This allows for much more control over the execution context of the
 175 // snippet, particularly with regard to memory. This class performs all the
 176 // necessary functions to create the subprocess, execute the snippet in the
 177 // subprocess, and report results/handle errors.
 178 class SubProcessFunctionExecutorImpl
 179     : public BenchmarkRunner::FunctionExecutor {
 180 public:
 181   static Expected<std::unique_ptr<SubProcessFunctionExecutorImpl>>
 182   create(const LLVMState &State, object::OwningBinary<object::ObjectFile> Obj,
 183          const BenchmarkKey &Key) {
 184     Expected<ExecutableFunction> EF =
 185         ExecutableFunction::create(State.createTargetMachine(), std::move(Obj));
 186     if (!EF)
 187       return EF.takeError();
 188
 189     return std::unique_ptr<SubProcessFunctionExecutorImpl>(
 190         new SubProcessFunctionExecutorImpl(State, std::move(*EF), Key));
 191   }
 192
 193 private:
 194   SubProcessFunctionExecutorImpl(const LLVMState &State,
 195                                  ExecutableFunction Function,
 196                                  const BenchmarkKey &Key)
 197       : State(State), Function(std::move(Function)), Key(Key) {}
 198
 199   enum ChildProcessExitCodeE {
 200     CounterFDReadFailed = 1,
 201     RSeqDisableFailed,
 202     FunctionDataMappingFailed,
 203     AuxiliaryMemorySetupFailed
 204   };
 205
 206   StringRef childProcessExitCodeToString(int ExitCode) const {
 207     switch (ExitCode) {
 208     case ChildProcessExitCodeE::CounterFDReadFailed:
 209       return "Counter file descriptor read failed";
 210     case ChildProcessExitCodeE::RSeqDisableFailed:
 211       return "Disabling restartable sequences failed";
 212     case ChildProcessExitCodeE::FunctionDataMappingFailed:
 213       return "Failed to map memory for assembled snippet";
 214     case ChildProcessExitCodeE::AuxiliaryMemorySetupFailed:
 215       return "Failed to setup auxiliary memory";
 216     default:
 217       return "Child process returned with unknown exit code";
 218     }
 219   }
 220
 221   Error sendFileDescriptorThroughSocket(int SocketFD, int FD) const {
 222     struct msghdr Message = {};
 223     char Buffer[CMSG_SPACE(sizeof(FD))];
 224     memset(Buffer, 0, sizeof(Buffer));
 225     Message.msg_control = Buffer;
 226     Message.msg_controllen = sizeof(Buffer);
 227
 228     struct cmsghdr *ControlMessage = CMSG_FIRSTHDR(&Message);
 229     ControlMessage->cmsg_level = SOL_SOCKET;
 230     ControlMessage->cmsg_type = SCM_RIGHTS;
 231     ControlMessage->cmsg_len = CMSG_LEN(sizeof(FD));
 232
 233     memcpy(CMSG_DATA(ControlMessage), &FD, sizeof(FD));
 234
 235     Message.msg_controllen = CMSG_SPACE(sizeof(FD));
 236
 237     ssize_t BytesWritten = sendmsg(SocketFD, &Message, 0);
 238
 239     if (BytesWritten < 0)
 240       return make_error<Failure>("Failed to write FD to socket: " +
 241                                  Twine(strerror(errno)));
 242
 243     return Error::success();
 244   }
 245
 246   Expected<int> getFileDescriptorFromSocket(int SocketFD) const {
 247     struct msghdr Message = {};
 248
 249     char ControlBuffer[256];
 250     Message.msg_control = ControlBuffer;
 251     Message.msg_controllen = sizeof(ControlBuffer);
 252
 253     ssize_t BytesRead = recvmsg(SocketFD, &Message, 0);
 254
 255     if (BytesRead < 0)
 256       return make_error<Failure>("Failed to read FD from socket: " +
 257                                  Twine(strerror(errno)));
 258
 259     struct cmsghdr *ControlMessage = CMSG_FIRSTHDR(&Message);
 260
 261     int FD;
 262
 263     if (ControlMessage->cmsg_len != CMSG_LEN(sizeof(FD)))
 264       return make_error<Failure>("Failed to get correct number of bytes for "
 265                                  "file descriptor from socket.");
 266
 267     memcpy(&FD, CMSG_DATA(ControlMessage), sizeof(FD));
 268
 269     return FD;
 270   }
 271
 272   Error createSubProcessAndRunBenchmark(
 273       StringRef CounterName, SmallVectorImpl<int64_t> &CounterValues) const {
 274     int PipeFiles[2];
 275     int PipeSuccessOrErr = socketpair(AF_UNIX, SOCK_DGRAM, 0, PipeFiles);
 276     if (PipeSuccessOrErr != 0) {
 277       return make_error<Failure>(
 278           "Failed to create a pipe for interprocess communication between "
 279           "llvm-exegesis and the benchmarking subprocess: " +
 280           Twine(strerror(errno)));
 281     }
 282
 283     SubprocessMemory SPMemory;
 284     Error MemoryInitError = SPMemory.initializeSubprocessMemory(getpid());
 285     if (MemoryInitError)
 286       return MemoryInitError;
 287
 288     Error AddMemDefError =
 289         SPMemory.addMemoryDefinition(Key.MemoryValues, getpid());
 290     if (AddMemDefError)
 291       return AddMemDefError;
 292
 293     pid_t ParentOrChildPID = fork();
 294
 295     if (ParentOrChildPID == -1) {
 296       return make_error<Failure>("Failed to create child process: " +
 297                                  Twine(strerror(errno)));
 298     }
 299
 300     if (ParentOrChildPID == 0) {
 301       // We are in the child process, close the write end of the pipe
 302       close(PipeFiles[1]);
 303       // Unregister handlers, signal handling is now handled through ptrace in
 304       // the host process
 305       llvm::sys::unregisterHandlers();
 306       prepareAndRunBenchmark(PipeFiles[0], Key);
 307       // The child process terminates in the above function, so we should never
 308       // get to this point.
 309       llvm_unreachable("Child process didn't exit when expected.");
 310     }
 311
 312     const ExegesisTarget &ET = State.getExegesisTarget();
 313     auto CounterOrError =
 314         ET.createCounter(CounterName, State, ParentOrChildPID);
 315
 316     if (!CounterOrError)
 317       return CounterOrError.takeError();
 318
 319     pfm::Counter *Counter = CounterOrError.get().get();
 320
 321     close(PipeFiles[0]);
 322
 323     // Make sure to attach to the process (and wait for the sigstop to be
 324     // delivered and for the process to continue) before we write to the counter
 325     // file descriptor. Attaching to the process before writing to the socket
 326     // ensures that the subprocess at most has blocked on the read call. If we
 327     // attach afterwards, the subprocess might exit before we get to the attach
 328     // call due to effects like scheduler contention, introducing transient
 329     // failures.
 330     if (ptrace(PTRACE_ATTACH, ParentOrChildPID, NULL, NULL) != 0)
 331       return make_error<Failure>("Failed to attach to the child process: " +
 332                                  Twine(strerror(errno)));
 333
 334     if (wait(NULL) == -1) {
 335       return make_error<Failure>(
 336           "Failed to wait for child process to stop after attaching: " +
 337           Twine(strerror(errno)));
 338     }
 339
 340     if (ptrace(PTRACE_CONT, ParentOrChildPID, NULL, NULL) != 0)
 341       return make_error<Failure>(
 342           "Failed to continue execution of the child process: " +
 343           Twine(strerror(errno)));
 344
 345     int CounterFileDescriptor = Counter->getFileDescriptor();
 346     Error SendError =
 347         sendFileDescriptorThroughSocket(PipeFiles[1], CounterFileDescriptor);
 348
 349     if (SendError)
 350       return SendError;
 351
 352     int ChildStatus;
 353     if (wait(&ChildStatus) == -1) {
 354       return make_error<Failure>(
 355           "Waiting for the child process to complete failed: " +
 356           Twine(strerror(errno)));
 357     }
 358
 359     if (WIFEXITED(ChildStatus)) {
 360       int ChildExitCode = WEXITSTATUS(ChildStatus);
 361       if (ChildExitCode == 0) {
 362         // The child exited succesfully, read counter values and return
 363         // success
 364         CounterValues[0] = Counter->read();
 365         return Error::success();
 366       }
 367       // The child exited, but not successfully
 368       return make_error<Failure>(
 369           "Child benchmarking process exited with non-zero exit code: " +
 370           childProcessExitCodeToString(ChildExitCode));
 371     }
 372
 373     // An error was encountered running the snippet, process it
 374     siginfo_t ChildSignalInfo;
 375     if (ptrace(PTRACE_GETSIGINFO, ParentOrChildPID, NULL, &ChildSignalInfo) ==
 376         -1) {
 377       return make_error<Failure>("Getting signal info from the child failed: " +
 378                                  Twine(strerror(errno)));
 379     }
 380
 381     if (ChildSignalInfo.si_signo == SIGSEGV)
 382       return make_error<SnippetSegmentationFault>(
 383           reinterpret_cast<intptr_t>(ChildSignalInfo.si_addr));
 384
 385     return make_error<SnippetSignal>(ChildSignalInfo.si_signo);
 386   }
 387
 388   void disableCoreDumps() const {
 389     struct rlimit rlim;
 390
 391     rlim.rlim_cur = 0;
 392     setrlimit(RLIMIT_CORE, &rlim);
 393   }
 394
 395   [[noreturn]] void prepareAndRunBenchmark(int Pipe,
 396                                            const BenchmarkKey &Key) const {
 397     // Disable core dumps in the child process as otherwise everytime we
 398     // encounter an execution failure like a segmentation fault, we will create
 399     // a core dump. We report the information directly rather than require the
 400     // user inspect a core dump.
 401     disableCoreDumps();
 402
 403     // The following occurs within the benchmarking subprocess
 404     pid_t ParentPID = getppid();
 405
 406     Expected<int> CounterFileDescriptorOrError =
 407         getFileDescriptorFromSocket(Pipe);
 408
 409     if (!CounterFileDescriptorOrError)
 410       exit(ChildProcessExitCodeE::CounterFDReadFailed);
 411
 412     int CounterFileDescriptor = *CounterFileDescriptorOrError;
 413
 414 // Glibc versions greater than 2.35 automatically call rseq during
 415 // initialization. Unmapping the region that glibc sets up for this causes
 416 // segfaults in the program Unregister the rseq region so that we can safely
 417 // unmap it later
 418 #ifdef GLIBC_INITS_RSEQ
 419     long RseqDisableOutput =
 420         syscall(SYS_rseq, (intptr_t)__builtin_thread_pointer() + __rseq_offset,
 421                 __rseq_size, RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
 422     if (RseqDisableOutput != 0)
 423       exit(ChildProcessExitCodeE::RSeqDisableFailed);
 424 #endif // GLIBC_INITS_RSEQ
 425
 426     size_t FunctionDataCopySize = this->Function.FunctionBytes.size();
 427     void *MapAddress = NULL;
 428     int MapFlags = MAP_PRIVATE | MAP_ANONYMOUS;
 429
 430     if (Key.SnippetAddress != 0) {
 431       MapAddress = reinterpret_cast<void *>(Key.SnippetAddress);
 432       MapFlags |= MAP_FIXED_NOREPLACE;
 433     }
 434
 435     char *FunctionDataCopy =
 436         (char *)mmap(MapAddress, FunctionDataCopySize, PROT_READ | PROT_WRITE,
 437                      MapFlags, 0, 0);
 438     if ((intptr_t)FunctionDataCopy == -1)
 439       exit(ChildProcessExitCodeE::FunctionDataMappingFailed);
 440
 441     memcpy(FunctionDataCopy, this->Function.FunctionBytes.data(),
 442            this->Function.FunctionBytes.size());
 443     mprotect(FunctionDataCopy, FunctionDataCopySize, PROT_READ | PROT_EXEC);
 444
 445     Expected<int> AuxMemFDOrError =
 446         SubprocessMemory::setupAuxiliaryMemoryInSubprocess(
 447             Key.MemoryValues, ParentPID, CounterFileDescriptor);
 448     if (!AuxMemFDOrError)
 449       exit(ChildProcessExitCodeE::AuxiliaryMemorySetupFailed);
 450
 451     ((void (*)(size_t, int))(intptr_t)FunctionDataCopy)(FunctionDataCopySize,
 452                                                         *AuxMemFDOrError);
 453
 454     exit(0);
 455   }
 456
 457   Expected<llvm::SmallVector<int64_t, 4>>
 458   runWithCounter(StringRef CounterName) const override {
 459     SmallVector<int64_t, 4> Value(1, 0);
 460     Error PossibleBenchmarkError =
 461         createSubProcessAndRunBenchmark(CounterName, Value);
 462
 463     if (PossibleBenchmarkError) {
 464       return std::move(PossibleBenchmarkError);
 465     }
 466
 467     return Value;
 468   }
 469
 470   const LLVMState &State;
 471   const ExecutableFunction Function;
 472   const BenchmarkKey &Key;
 473 };
 474 #endif // __linux__
 475 } // namespace
 476
 477 Expected<SmallString<0>> BenchmarkRunner::assembleSnippet(
 478     const BenchmarkCode &BC, const SnippetRepetitor &Repetitor,
 479     unsigned MinInstructions, unsigned LoopBodySize,
 480     bool GenerateMemoryInstructions) const {
 481   const std::vector<MCInst> &Instructions = BC.Key.Instructions;
 482   SmallString<0> Buffer;
 483   raw_svector_ostream OS(Buffer);
 484   if (Error E = assembleToStream(
 485           State.getExegesisTarget(), State.createTargetMachine(), BC.LiveIns,
 486           BC.Key.RegisterInitialValues,
 487           Repetitor.Repeat(Instructions, MinInstructions, LoopBodySize,
 488                            GenerateMemoryInstructions),
 489           OS, BC.Key, GenerateMemoryInstructions)) {
 490     return std::move(E);
 491   }
 492   return Buffer;
 493 }
 494
 495 Expected<BenchmarkRunner::RunnableConfiguration>
 496 BenchmarkRunner::getRunnableConfiguration(
 497     const BenchmarkCode &BC, unsigned NumRepetitions, unsigned LoopBodySize,
 498     const SnippetRepetitor &Repetitor) const {
 499   RunnableConfiguration RC;
 500
 501   Benchmark &InstrBenchmark = RC.InstrBenchmark;
 502   InstrBenchmark.Mode = Mode;
 503   InstrBenchmark.CpuName = std::string(State.getTargetMachine().getTargetCPU());
 504   InstrBenchmark.LLVMTriple =
 505       State.getTargetMachine().getTargetTriple().normalize();
 506   InstrBenchmark.NumRepetitions = NumRepetitions;
 507   InstrBenchmark.Info = BC.Info;
 508
 509   const std::vector<MCInst> &Instructions = BC.Key.Instructions;
 510
 511   bool GenerateMemoryInstructions = ExecutionMode == ExecutionModeE::SubProcess;
 512
 513   InstrBenchmark.Key = BC.Key;
 514
 515   // Assemble at least kMinInstructionsForSnippet instructions by repeating
 516   // the snippet for debug/analysis. This is so that the user clearly
 517   // understands that the inside instructions are repeated.
 518   if (BenchmarkPhaseSelector > BenchmarkPhaseSelectorE::PrepareSnippet) {
 519     const int MinInstructionsForSnippet = 4 * Instructions.size();
 520     const int LoopBodySizeForSnippet = 2 * Instructions.size();
 521     auto Snippet =
 522         assembleSnippet(BC, Repetitor, MinInstructionsForSnippet,
 523                         LoopBodySizeForSnippet, GenerateMemoryInstructions);
 524     if (Error E = Snippet.takeError())
 525       return std::move(E);
 526
 527     if (auto Err = getBenchmarkFunctionBytes(*Snippet,
 528                                              InstrBenchmark.AssembledSnippet))
 529       return std::move(Err);
 530   }
 531
 532   // Assemble NumRepetitions instructions repetitions of the snippet for
 533   // measurements.
 534   if (BenchmarkPhaseSelector >
 535       BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) {
 536     auto Snippet = assembleSnippet(BC, Repetitor, InstrBenchmark.NumRepetitions,
 537                                    LoopBodySize, GenerateMemoryInstructions);
 538     if (Error E = Snippet.takeError())
 539       return std::move(E);
 540     RC.ObjectFile = getObjectFromBuffer(*Snippet);
 541   }
 542
 543   return std::move(RC);
 544 }
 545
 546 Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>>
 547 BenchmarkRunner::createFunctionExecutor(
 548     object::OwningBinary<object::ObjectFile> ObjectFile,
 549     const BenchmarkKey &Key) const {
 550   switch (ExecutionMode) {
 551   case ExecutionModeE::InProcess: {
 552     auto InProcessExecutorOrErr = InProcessFunctionExecutorImpl::create(
 553         State, std::move(ObjectFile), Scratch.get());
 554     if (!InProcessExecutorOrErr)
 555       return InProcessExecutorOrErr.takeError();
 556
 557     return std::move(*InProcessExecutorOrErr);
 558   }
 559   case ExecutionModeE::SubProcess: {
 560 #ifdef __linux__
 561     auto SubProcessExecutorOrErr = SubProcessFunctionExecutorImpl::create(
 562         State, std::move(ObjectFile), Key);
 563     if (!SubProcessExecutorOrErr)
 564       return SubProcessExecutorOrErr.takeError();
 565
 566     return std::move(*SubProcessExecutorOrErr);
 567 #else
 568     return make_error<Failure>(
 569         "The subprocess execution mode is only supported on Linux");
 570 #endif
 571   }
 572   }
 573   llvm_unreachable("ExecutionMode is outside expected range");
 574 }
 575
 576 std::pair<Error, Benchmark> BenchmarkRunner::runConfiguration(
 577     RunnableConfiguration &&RC,
 578     const std::optional<StringRef> &DumpFile) const {
 579   Benchmark &InstrBenchmark = RC.InstrBenchmark;
 580   object::OwningBinary<object::ObjectFile> &ObjectFile = RC.ObjectFile;
 581
 582   if (DumpFile && BenchmarkPhaseSelector >
 583                       BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) {
 584     auto ObjectFilePath =
 585         writeObjectFile(ObjectFile.getBinary()->getData(), *DumpFile);
 586     if (Error E = ObjectFilePath.takeError()) {
 587       return {std::move(E), std::move(InstrBenchmark)};
 588     }
 589     outs() << "Check generated assembly with: /usr/bin/objdump -d "
 590            << *ObjectFilePath << "\n";
 591   }
 592
 593   if (BenchmarkPhaseSelector < BenchmarkPhaseSelectorE::Measure) {
 594     InstrBenchmark.Error = "actual measurements skipped.";
 595     return {Error::success(), std::move(InstrBenchmark)};
 596   }
 597
 598   Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>> Executor =
 599       createFunctionExecutor(std::move(ObjectFile), RC.InstrBenchmark.Key);
 600   if (!Executor)
 601     return {Executor.takeError(), std::move(InstrBenchmark)};
 602   auto NewMeasurements = runMeasurements(**Executor);
 603
 604   if (Error E = NewMeasurements.takeError()) {
 605     return {std::move(E), std::move(InstrBenchmark)};
 606   }
 607   assert(InstrBenchmark.NumRepetitions > 0 && "invalid NumRepetitions");
 608   for (BenchmarkMeasure &BM : *NewMeasurements) {
 609     // Scale the measurements by instruction.
 610     BM.PerInstructionValue /= InstrBenchmark.NumRepetitions;
 611     // Scale the measurements by snippet.
 612     BM.PerSnippetValue *=
 613         static_cast<double>(InstrBenchmark.Key.Instructions.size()) /
 614         InstrBenchmark.NumRepetitions;
 615   }
 616   InstrBenchmark.Measurements = std::move(*NewMeasurements);
 617
 618   return {Error::success(), std::move(InstrBenchmark)};
 619 }
 620
 621 Expected<std::string>
 622 BenchmarkRunner::writeObjectFile(StringRef Buffer, StringRef FileName) const {
 623   int ResultFD = 0;
 624   SmallString<256> ResultPath = FileName;
 625   if (Error E = errorCodeToError(
 626           FileName.empty() ? sys::fs::createTemporaryFile("snippet", "o",
 627                                                           ResultFD, ResultPath)
 628                            : sys::fs::openFileForReadWrite(
 629                                  FileName, ResultFD, sys::fs::CD_CreateAlways,
 630                                  sys::fs::OF_None)))
 631     return std::move(E);
 632   raw_fd_ostream OFS(ResultFD, true /*ShouldClose*/);
 633   OFS.write(Buffer.data(), Buffer.size());
 634   OFS.flush();
 635   return std::string(ResultPath.str());
 636 }
 637
 638 BenchmarkRunner::FunctionExecutor::~FunctionExecutor() {}
 639
 640 } // namespace exegesis
 641 } // namespace llvm