[clang][modules] Don't prevent translation of FW_Private includes when explicitly...
[llvm-project.git] / llvm / tools / llvm-exegesis / lib / BenchmarkRunner.cpp
blob7ec24eb2f866f8625be457554d86112ead4657fe
1 //===-- BenchmarkRunner.cpp -------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include <array>
10 #include <memory>
11 #include <string>
13 #include "Assembler.h"
14 #include "BenchmarkRunner.h"
15 #include "Error.h"
16 #include "MCInstrDescView.h"
17 #include "PerfHelper.h"
18 #include "SubprocessMemory.h"
19 #include "Target.h"
20 #include "llvm/ADT/ScopeExit.h"
21 #include "llvm/ADT/StringExtras.h"
22 #include "llvm/ADT/StringRef.h"
23 #include "llvm/ADT/Twine.h"
24 #include "llvm/Support/CrashRecoveryContext.h"
25 #include "llvm/Support/Error.h"
26 #include "llvm/Support/FileSystem.h"
27 #include "llvm/Support/MemoryBuffer.h"
28 #include "llvm/Support/Program.h"
29 #include "llvm/Support/Signals.h"
31 #ifdef __linux__
32 #ifdef HAVE_LIBPFM
33 #include <perfmon/perf_event.h>
34 #endif
35 #include <sys/mman.h>
36 #include <sys/ptrace.h>
37 #include <sys/socket.h>
38 #include <sys/syscall.h>
39 #include <sys/wait.h>
40 #include <unistd.h>
42 #if defined(__GLIBC__) && __has_include(<sys/rseq.h>) && defined(HAVE_BUILTIN_THREAD_POINTER)
43 #include <sys/rseq.h>
44 #if defined(RSEQ_SIG) && defined(SYS_rseq)
45 #define GLIBC_INITS_RSEQ
46 #endif
47 #endif
48 #endif
50 namespace llvm {
51 namespace exegesis {
53 BenchmarkRunner::BenchmarkRunner(const LLVMState &State, Benchmark::ModeE Mode,
54 BenchmarkPhaseSelectorE BenchmarkPhaseSelector,
55 ExecutionModeE ExecutionMode)
56 : State(State), Mode(Mode), BenchmarkPhaseSelector(BenchmarkPhaseSelector),
57 ExecutionMode(ExecutionMode), Scratch(std::make_unique<ScratchSpace>()) {}
59 BenchmarkRunner::~BenchmarkRunner() = default;
61 void BenchmarkRunner::FunctionExecutor::accumulateCounterValues(
62 const llvm::SmallVectorImpl<int64_t> &NewValues,
63 llvm::SmallVectorImpl<int64_t> *Result) {
64 const size_t NumValues = std::max(NewValues.size(), Result->size());
65 if (NumValues > Result->size())
66 Result->resize(NumValues, 0);
67 for (size_t I = 0, End = NewValues.size(); I < End; ++I)
68 (*Result)[I] += NewValues[I];
71 Expected<llvm::SmallVector<int64_t, 4>>
72 BenchmarkRunner::FunctionExecutor::runAndSample(const char *Counters) const {
73 // We sum counts when there are several counters for a single ProcRes
74 // (e.g. P23 on SandyBridge).
75 llvm::SmallVector<int64_t, 4> CounterValues;
76 SmallVector<StringRef, 2> CounterNames;
77 StringRef(Counters).split(CounterNames, '+');
78 for (auto &CounterName : CounterNames) {
79 CounterName = CounterName.trim();
80 Expected<SmallVector<int64_t, 4>> ValueOrError =
81 runWithCounter(CounterName);
82 if (!ValueOrError)
83 return ValueOrError.takeError();
84 accumulateCounterValues(ValueOrError.get(), &CounterValues);
86 return CounterValues;
89 namespace {
90 class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor {
91 public:
92 InProcessFunctionExecutorImpl(const LLVMState &State,
93 object::OwningBinary<object::ObjectFile> Obj,
94 BenchmarkRunner::ScratchSpace *Scratch)
95 : State(State), Function(State.createTargetMachine(), std::move(Obj)),
96 Scratch(Scratch) {}
98 private:
99 static void
100 accumulateCounterValues(const llvm::SmallVector<int64_t, 4> &NewValues,
101 llvm::SmallVector<int64_t, 4> *Result) {
102 const size_t NumValues = std::max(NewValues.size(), Result->size());
103 if (NumValues > Result->size())
104 Result->resize(NumValues, 0);
105 for (size_t I = 0, End = NewValues.size(); I < End; ++I)
106 (*Result)[I] += NewValues[I];
109 Expected<llvm::SmallVector<int64_t, 4>>
110 runWithCounter(StringRef CounterName) const override {
111 const ExegesisTarget &ET = State.getExegesisTarget();
112 char *const ScratchPtr = Scratch->ptr();
113 auto CounterOrError = ET.createCounter(CounterName, State);
115 if (!CounterOrError)
116 return CounterOrError.takeError();
118 pfm::Counter *Counter = CounterOrError.get().get();
119 Scratch->clear();
121 auto PS = ET.withSavedState();
122 CrashRecoveryContext CRC;
123 CrashRecoveryContext::Enable();
124 const bool Crashed = !CRC.RunSafely([this, Counter, ScratchPtr]() {
125 Counter->start();
126 this->Function(ScratchPtr);
127 Counter->stop();
129 CrashRecoveryContext::Disable();
130 PS.reset();
131 if (Crashed) {
132 std::string Msg = "snippet crashed while running";
133 #ifdef LLVM_ON_UNIX
134 // See "Exit Status for Commands":
135 // https://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xcu_chap02.html
136 constexpr const int kSigOffset = 128;
137 if (const char *const SigName = strsignal(CRC.RetCode - kSigOffset)) {
138 Msg += ": ";
139 Msg += SigName;
141 #endif
142 return make_error<SnippetCrash>(std::move(Msg));
146 return Counter->readOrError(Function.getFunctionBytes());
149 const LLVMState &State;
150 const ExecutableFunction Function;
151 BenchmarkRunner::ScratchSpace *const Scratch;
154 #ifdef __linux__
155 // The following class implements a function executor that executes the
156 // benchmark code within a subprocess rather than within the main llvm-exegesis
157 // process. This allows for much more control over the execution context of the
158 // snippet, particularly with regard to memory. This class performs all the
159 // necessary functions to create the subprocess, execute the snippet in the
160 // subprocess, and report results/handle errors.
161 class SubProcessFunctionExecutorImpl
162 : public BenchmarkRunner::FunctionExecutor {
163 public:
164 SubProcessFunctionExecutorImpl(const LLVMState &State,
165 object::OwningBinary<object::ObjectFile> Obj,
166 const BenchmarkKey &Key)
167 : State(State), Function(State.createTargetMachine(), std::move(Obj)),
168 Key(Key) {}
170 private:
171 enum ChildProcessExitCodeE {
172 CounterFDReadFailed = 1,
173 RSeqDisableFailed,
174 FunctionDataMappingFailed,
175 AuxiliaryMemorySetupFailed
178 StringRef childProcessExitCodeToString(int ExitCode) const {
179 switch (ExitCode) {
180 case ChildProcessExitCodeE::CounterFDReadFailed:
181 return "Counter file descriptor read failed";
182 case ChildProcessExitCodeE::RSeqDisableFailed:
183 return "Disabling restartable sequences failed";
184 case ChildProcessExitCodeE::FunctionDataMappingFailed:
185 return "Failed to map memory for assembled snippet";
186 case ChildProcessExitCodeE::AuxiliaryMemorySetupFailed:
187 return "Failed to setup auxiliary memory";
188 default:
189 return "Child process returned with unknown exit code";
193 Error sendFileDescriptorThroughSocket(int SocketFD, int FD) const {
194 struct msghdr Message = {};
195 char Buffer[CMSG_SPACE(sizeof(FD))];
196 memset(Buffer, 0, sizeof(Buffer));
197 Message.msg_control = Buffer;
198 Message.msg_controllen = sizeof(Buffer);
200 struct cmsghdr *ControlMessage = CMSG_FIRSTHDR(&Message);
201 ControlMessage->cmsg_level = SOL_SOCKET;
202 ControlMessage->cmsg_type = SCM_RIGHTS;
203 ControlMessage->cmsg_len = CMSG_LEN(sizeof(FD));
205 memcpy(CMSG_DATA(ControlMessage), &FD, sizeof(FD));
207 Message.msg_controllen = CMSG_SPACE(sizeof(FD));
209 ssize_t BytesWritten = sendmsg(SocketFD, &Message, 0);
211 if (BytesWritten < 0)
212 return make_error<Failure>("Failed to write FD to socket: " +
213 Twine(strerror(errno)));
215 return Error::success();
218 Expected<int> getFileDescriptorFromSocket(int SocketFD) const {
219 struct msghdr Message = {};
221 char ControlBuffer[256];
222 Message.msg_control = ControlBuffer;
223 Message.msg_controllen = sizeof(ControlBuffer);
225 ssize_t BytesRead = recvmsg(SocketFD, &Message, 0);
227 if (BytesRead < 0)
228 return make_error<Failure>("Failed to read FD from socket: " +
229 Twine(strerror(errno)));
231 struct cmsghdr *ControlMessage = CMSG_FIRSTHDR(&Message);
233 int FD;
235 if (ControlMessage->cmsg_len != CMSG_LEN(sizeof(FD)))
236 return make_error<Failure>("Failed to get correct number of bytes for "
237 "file descriptor from socket.");
239 memcpy(&FD, CMSG_DATA(ControlMessage), sizeof(FD));
241 return FD;
244 Error createSubProcessAndRunBenchmark(
245 StringRef CounterName, SmallVectorImpl<int64_t> &CounterValues) const {
246 int PipeFiles[2];
247 int PipeSuccessOrErr = socketpair(AF_UNIX, SOCK_DGRAM, 0, PipeFiles);
248 if (PipeSuccessOrErr != 0) {
249 return make_error<Failure>(
250 "Failed to create a pipe for interprocess communication between "
251 "llvm-exegesis and the benchmarking subprocess: " +
252 Twine(strerror(errno)));
255 SubprocessMemory SPMemory;
256 Error MemoryInitError = SPMemory.initializeSubprocessMemory(getpid());
257 if (MemoryInitError)
258 return MemoryInitError;
260 Error AddMemDefError =
261 SPMemory.addMemoryDefinition(Key.MemoryValues, getpid());
262 if (AddMemDefError)
263 return AddMemDefError;
265 pid_t ParentOrChildPID = fork();
267 if (ParentOrChildPID == -1) {
268 return make_error<Failure>("Failed to create child process: " +
269 Twine(strerror(errno)));
272 if (ParentOrChildPID == 0) {
273 // We are in the child process, close the write end of the pipe
274 close(PipeFiles[1]);
275 // Unregister handlers, signal handling is now handled through ptrace in
276 // the host process
277 llvm::sys::unregisterHandlers();
278 prepareAndRunBenchmark(PipeFiles[0], Key);
279 // The child process terminates in the above function, so we should never
280 // get to this point.
281 llvm_unreachable("Child process didn't exit when expected.");
284 const ExegesisTarget &ET = State.getExegesisTarget();
285 auto CounterOrError =
286 ET.createCounter(CounterName, State, ParentOrChildPID);
288 if (!CounterOrError)
289 return CounterOrError.takeError();
291 pfm::Counter *Counter = CounterOrError.get().get();
293 close(PipeFiles[0]);
295 int CounterFileDescriptor = Counter->getFileDescriptor();
296 Error SendError =
297 sendFileDescriptorThroughSocket(PipeFiles[1], CounterFileDescriptor);
299 if (SendError)
300 return SendError;
302 if (ptrace(PTRACE_ATTACH, ParentOrChildPID, NULL, NULL) != 0)
303 return make_error<Failure>("Failed to attach to the child process: " +
304 Twine(strerror(errno)));
306 if (wait(NULL) == -1) {
307 return make_error<Failure>(
308 "Failed to wait for child process to stop after attaching: " +
309 Twine(strerror(errno)));
312 if (ptrace(PTRACE_CONT, ParentOrChildPID, NULL, NULL) != 0)
313 return make_error<Failure>(
314 "Failed to continue execution of the child process: " +
315 Twine(strerror(errno)));
317 int ChildStatus;
318 if (wait(&ChildStatus) == -1) {
319 return make_error<Failure>(
320 "Waiting for the child process to complete failed: " +
321 Twine(strerror(errno)));
324 if (WIFEXITED(ChildStatus)) {
325 int ChildExitCode = WEXITSTATUS(ChildStatus);
326 if (ChildExitCode == 0) {
327 // The child exited succesfully, read counter values and return
328 // success
329 CounterValues[0] = Counter->read();
330 return Error::success();
332 // The child exited, but not successfully
333 return make_error<SnippetCrash>(
334 "Child benchmarking process exited with non-zero exit code: " +
335 childProcessExitCodeToString(ChildExitCode));
338 // An error was encountered running the snippet, process it
339 siginfo_t ChildSignalInfo;
340 if (ptrace(PTRACE_GETSIGINFO, ParentOrChildPID, NULL, &ChildSignalInfo) ==
341 -1) {
342 return make_error<Failure>("Getting signal info from the child failed: " +
343 Twine(strerror(errno)));
346 return make_error<SnippetCrash>(
347 "The benchmarking subprocess sent unexpected signal: " +
348 Twine(strsignal(ChildSignalInfo.si_signo)));
351 [[noreturn]] void prepareAndRunBenchmark(int Pipe,
352 const BenchmarkKey &Key) const {
353 // The following occurs within the benchmarking subprocess
354 pid_t ParentPID = getppid();
356 Expected<int> CounterFileDescriptorOrError =
357 getFileDescriptorFromSocket(Pipe);
359 if (!CounterFileDescriptorOrError)
360 exit(ChildProcessExitCodeE::CounterFDReadFailed);
362 int CounterFileDescriptor = *CounterFileDescriptorOrError;
364 // Glibc versions greater than 2.35 automatically call rseq during
365 // initialization. Unmapping the region that glibc sets up for this causes
366 // segfaults in the program Unregister the rseq region so that we can safely
367 // unmap it later
368 #ifdef GLIBC_INITS_RSEQ
369 long RseqDisableOutput =
370 syscall(SYS_rseq, (intptr_t)__builtin_thread_pointer() + __rseq_offset,
371 __rseq_size, RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
372 if (RseqDisableOutput != 0)
373 exit(ChildProcessExitCodeE::RSeqDisableFailed);
374 #endif // GLIBC_INITS_RSEQ
376 size_t FunctionDataCopySize = this->Function.FunctionBytes.size();
377 char *FunctionDataCopy =
378 (char *)mmap(NULL, FunctionDataCopySize, PROT_READ | PROT_WRITE,
379 MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
380 if ((intptr_t)FunctionDataCopy == -1)
381 exit(ChildProcessExitCodeE::FunctionDataMappingFailed);
383 memcpy(FunctionDataCopy, this->Function.FunctionBytes.data(),
384 this->Function.FunctionBytes.size());
385 mprotect(FunctionDataCopy, FunctionDataCopySize, PROT_READ | PROT_EXEC);
387 Expected<int> AuxMemFDOrError =
388 SubprocessMemory::setupAuxiliaryMemoryInSubprocess(
389 Key.MemoryValues, ParentPID, CounterFileDescriptor);
390 if (!AuxMemFDOrError)
391 exit(ChildProcessExitCodeE::AuxiliaryMemorySetupFailed);
393 ((void (*)(size_t, int))(intptr_t)FunctionDataCopy)(FunctionDataCopySize,
394 *AuxMemFDOrError);
396 exit(0);
399 Expected<llvm::SmallVector<int64_t, 4>>
400 runWithCounter(StringRef CounterName) const override {
401 SmallVector<int64_t, 4> Value(1, 0);
402 Error PossibleBenchmarkError =
403 createSubProcessAndRunBenchmark(CounterName, Value);
405 if (PossibleBenchmarkError) {
406 return std::move(PossibleBenchmarkError);
409 return Value;
412 const LLVMState &State;
413 const ExecutableFunction Function;
414 const BenchmarkKey &Key;
416 #endif // __linux__
417 } // namespace
419 Expected<SmallString<0>> BenchmarkRunner::assembleSnippet(
420 const BenchmarkCode &BC, const SnippetRepetitor &Repetitor,
421 unsigned MinInstructions, unsigned LoopBodySize,
422 bool GenerateMemoryInstructions) const {
423 const std::vector<MCInst> &Instructions = BC.Key.Instructions;
424 SmallString<0> Buffer;
425 raw_svector_ostream OS(Buffer);
426 if (Error E = assembleToStream(
427 State.getExegesisTarget(), State.createTargetMachine(), BC.LiveIns,
428 BC.Key.RegisterInitialValues,
429 Repetitor.Repeat(Instructions, MinInstructions, LoopBodySize,
430 GenerateMemoryInstructions),
431 OS, BC.Key, GenerateMemoryInstructions)) {
432 return std::move(E);
434 return Buffer;
437 Expected<BenchmarkRunner::RunnableConfiguration>
438 BenchmarkRunner::getRunnableConfiguration(
439 const BenchmarkCode &BC, unsigned NumRepetitions, unsigned LoopBodySize,
440 const SnippetRepetitor &Repetitor) const {
441 RunnableConfiguration RC;
443 Benchmark &InstrBenchmark = RC.InstrBenchmark;
444 InstrBenchmark.Mode = Mode;
445 InstrBenchmark.CpuName = std::string(State.getTargetMachine().getTargetCPU());
446 InstrBenchmark.LLVMTriple =
447 State.getTargetMachine().getTargetTriple().normalize();
448 InstrBenchmark.NumRepetitions = NumRepetitions;
449 InstrBenchmark.Info = BC.Info;
451 const std::vector<MCInst> &Instructions = BC.Key.Instructions;
453 bool GenerateMemoryInstructions = ExecutionMode == ExecutionModeE::SubProcess;
455 InstrBenchmark.Key = BC.Key;
457 // Assemble at least kMinInstructionsForSnippet instructions by repeating
458 // the snippet for debug/analysis. This is so that the user clearly
459 // understands that the inside instructions are repeated.
460 if (BenchmarkPhaseSelector > BenchmarkPhaseSelectorE::PrepareSnippet) {
461 const int MinInstructionsForSnippet = 4 * Instructions.size();
462 const int LoopBodySizeForSnippet = 2 * Instructions.size();
463 auto Snippet =
464 assembleSnippet(BC, Repetitor, MinInstructionsForSnippet,
465 LoopBodySizeForSnippet, GenerateMemoryInstructions);
466 if (Error E = Snippet.takeError())
467 return std::move(E);
469 if (auto Err = getBenchmarkFunctionBytes(*Snippet,
470 InstrBenchmark.AssembledSnippet))
471 return std::move(Err);
474 // Assemble NumRepetitions instructions repetitions of the snippet for
475 // measurements.
476 if (BenchmarkPhaseSelector >
477 BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) {
478 auto Snippet = assembleSnippet(BC, Repetitor, InstrBenchmark.NumRepetitions,
479 LoopBodySize, GenerateMemoryInstructions);
480 if (Error E = Snippet.takeError())
481 return std::move(E);
482 RC.ObjectFile = getObjectFromBuffer(*Snippet);
485 return std::move(RC);
488 Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>>
489 BenchmarkRunner::createFunctionExecutor(
490 object::OwningBinary<object::ObjectFile> ObjectFile,
491 const BenchmarkKey &Key) const {
492 switch (ExecutionMode) {
493 case ExecutionModeE::InProcess:
494 return std::make_unique<InProcessFunctionExecutorImpl>(
495 State, std::move(ObjectFile), Scratch.get());
496 case ExecutionModeE::SubProcess:
497 #ifdef __linux__
498 return std::make_unique<SubProcessFunctionExecutorImpl>(
499 State, std::move(ObjectFile), Key);
500 #else
501 return make_error<Failure>(
502 "The subprocess execution mode is only supported on Linux");
503 #endif
505 llvm_unreachable("ExecutionMode is outside expected range");
508 Expected<Benchmark> BenchmarkRunner::runConfiguration(
509 RunnableConfiguration &&RC,
510 const std::optional<StringRef> &DumpFile) const {
511 Benchmark &InstrBenchmark = RC.InstrBenchmark;
512 object::OwningBinary<object::ObjectFile> &ObjectFile = RC.ObjectFile;
514 if (DumpFile && BenchmarkPhaseSelector >
515 BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) {
516 auto ObjectFilePath =
517 writeObjectFile(ObjectFile.getBinary()->getData(), *DumpFile);
518 if (Error E = ObjectFilePath.takeError()) {
519 InstrBenchmark.Error = toString(std::move(E));
520 return std::move(InstrBenchmark);
522 outs() << "Check generated assembly with: /usr/bin/objdump -d "
523 << *ObjectFilePath << "\n";
526 if (BenchmarkPhaseSelector < BenchmarkPhaseSelectorE::Measure) {
527 InstrBenchmark.Error = "actual measurements skipped.";
528 return std::move(InstrBenchmark);
531 Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>> Executor =
532 createFunctionExecutor(std::move(ObjectFile), RC.InstrBenchmark.Key);
533 if (!Executor)
534 return Executor.takeError();
535 auto NewMeasurements = runMeasurements(**Executor);
537 if (Error E = NewMeasurements.takeError()) {
538 if (!E.isA<SnippetCrash>())
539 return std::move(E);
540 InstrBenchmark.Error = toString(std::move(E));
541 return std::move(InstrBenchmark);
543 assert(InstrBenchmark.NumRepetitions > 0 && "invalid NumRepetitions");
544 for (BenchmarkMeasure &BM : *NewMeasurements) {
545 // Scale the measurements by instruction.
546 BM.PerInstructionValue /= InstrBenchmark.NumRepetitions;
547 // Scale the measurements by snippet.
548 BM.PerSnippetValue *=
549 static_cast<double>(InstrBenchmark.Key.Instructions.size()) /
550 InstrBenchmark.NumRepetitions;
552 InstrBenchmark.Measurements = std::move(*NewMeasurements);
554 return std::move(InstrBenchmark);
557 Expected<std::string>
558 BenchmarkRunner::writeObjectFile(StringRef Buffer, StringRef FileName) const {
559 int ResultFD = 0;
560 SmallString<256> ResultPath = FileName;
561 if (Error E = errorCodeToError(
562 FileName.empty() ? sys::fs::createTemporaryFile("snippet", "o",
563 ResultFD, ResultPath)
564 : sys::fs::openFileForReadWrite(
565 FileName, ResultFD, sys::fs::CD_CreateAlways,
566 sys::fs::OF_None)))
567 return std::move(E);
568 raw_fd_ostream OFS(ResultFD, true /*ShouldClose*/);
569 OFS.write(Buffer.data(), Buffer.size());
570 OFS.flush();
571 return std::string(ResultPath.str());
574 BenchmarkRunner::FunctionExecutor::~FunctionExecutor() {}
576 } // namespace exegesis
577 } // namespace llvm