1 //===-- BenchmarkRunner.cpp -------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
13 #include "Assembler.h"
14 #include "BenchmarkRunner.h"
16 #include "MCInstrDescView.h"
17 #include "PerfHelper.h"
18 #include "SubprocessMemory.h"
20 #include "llvm/ADT/ScopeExit.h"
21 #include "llvm/ADT/StringExtras.h"
22 #include "llvm/ADT/StringRef.h"
23 #include "llvm/ADT/Twine.h"
24 #include "llvm/Support/CrashRecoveryContext.h"
25 #include "llvm/Support/Error.h"
26 #include "llvm/Support/FileSystem.h"
27 #include "llvm/Support/MemoryBuffer.h"
28 #include "llvm/Support/Program.h"
29 #include "llvm/Support/Signals.h"
33 #include <perfmon/perf_event.h>
36 #include <sys/ptrace.h>
37 #include <sys/socket.h>
38 #include <sys/syscall.h>
42 #if defined(__GLIBC__) && __has_include(<sys/rseq.h>) && defined(HAVE_BUILTIN_THREAD_POINTER)
44 #if defined(RSEQ_SIG) && defined(SYS_rseq)
45 #define GLIBC_INITS_RSEQ
53 BenchmarkRunner::BenchmarkRunner(const LLVMState
&State
, Benchmark::ModeE Mode
,
54 BenchmarkPhaseSelectorE BenchmarkPhaseSelector
,
55 ExecutionModeE ExecutionMode
)
56 : State(State
), Mode(Mode
), BenchmarkPhaseSelector(BenchmarkPhaseSelector
),
57 ExecutionMode(ExecutionMode
), Scratch(std::make_unique
<ScratchSpace
>()) {}
59 BenchmarkRunner::~BenchmarkRunner() = default;
61 void BenchmarkRunner::FunctionExecutor::accumulateCounterValues(
62 const llvm::SmallVectorImpl
<int64_t> &NewValues
,
63 llvm::SmallVectorImpl
<int64_t> *Result
) {
64 const size_t NumValues
= std::max(NewValues
.size(), Result
->size());
65 if (NumValues
> Result
->size())
66 Result
->resize(NumValues
, 0);
67 for (size_t I
= 0, End
= NewValues
.size(); I
< End
; ++I
)
68 (*Result
)[I
] += NewValues
[I
];
71 Expected
<llvm::SmallVector
<int64_t, 4>>
72 BenchmarkRunner::FunctionExecutor::runAndSample(const char *Counters
) const {
73 // We sum counts when there are several counters for a single ProcRes
74 // (e.g. P23 on SandyBridge).
75 llvm::SmallVector
<int64_t, 4> CounterValues
;
76 SmallVector
<StringRef
, 2> CounterNames
;
77 StringRef(Counters
).split(CounterNames
, '+');
78 for (auto &CounterName
: CounterNames
) {
79 CounterName
= CounterName
.trim();
80 Expected
<SmallVector
<int64_t, 4>> ValueOrError
=
81 runWithCounter(CounterName
);
83 return ValueOrError
.takeError();
84 accumulateCounterValues(ValueOrError
.get(), &CounterValues
);
90 class InProcessFunctionExecutorImpl
: public BenchmarkRunner::FunctionExecutor
{
92 InProcessFunctionExecutorImpl(const LLVMState
&State
,
93 object::OwningBinary
<object::ObjectFile
> Obj
,
94 BenchmarkRunner::ScratchSpace
*Scratch
)
95 : State(State
), Function(State
.createTargetMachine(), std::move(Obj
)),
100 accumulateCounterValues(const llvm::SmallVector
<int64_t, 4> &NewValues
,
101 llvm::SmallVector
<int64_t, 4> *Result
) {
102 const size_t NumValues
= std::max(NewValues
.size(), Result
->size());
103 if (NumValues
> Result
->size())
104 Result
->resize(NumValues
, 0);
105 for (size_t I
= 0, End
= NewValues
.size(); I
< End
; ++I
)
106 (*Result
)[I
] += NewValues
[I
];
109 Expected
<llvm::SmallVector
<int64_t, 4>>
110 runWithCounter(StringRef CounterName
) const override
{
111 const ExegesisTarget
&ET
= State
.getExegesisTarget();
112 char *const ScratchPtr
= Scratch
->ptr();
113 auto CounterOrError
= ET
.createCounter(CounterName
, State
);
116 return CounterOrError
.takeError();
118 pfm::Counter
*Counter
= CounterOrError
.get().get();
121 auto PS
= ET
.withSavedState();
122 CrashRecoveryContext CRC
;
123 CrashRecoveryContext::Enable();
124 const bool Crashed
= !CRC
.RunSafely([this, Counter
, ScratchPtr
]() {
126 this->Function(ScratchPtr
);
129 CrashRecoveryContext::Disable();
132 std::string Msg
= "snippet crashed while running";
134 // See "Exit Status for Commands":
135 // https://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xcu_chap02.html
136 constexpr const int kSigOffset
= 128;
137 if (const char *const SigName
= strsignal(CRC
.RetCode
- kSigOffset
)) {
142 return make_error
<SnippetCrash
>(std::move(Msg
));
146 return Counter
->readOrError(Function
.getFunctionBytes());
149 const LLVMState
&State
;
150 const ExecutableFunction Function
;
151 BenchmarkRunner::ScratchSpace
*const Scratch
;
155 // The following class implements a function executor that executes the
156 // benchmark code within a subprocess rather than within the main llvm-exegesis
157 // process. This allows for much more control over the execution context of the
158 // snippet, particularly with regard to memory. This class performs all the
159 // necessary functions to create the subprocess, execute the snippet in the
160 // subprocess, and report results/handle errors.
161 class SubProcessFunctionExecutorImpl
162 : public BenchmarkRunner::FunctionExecutor
{
164 SubProcessFunctionExecutorImpl(const LLVMState
&State
,
165 object::OwningBinary
<object::ObjectFile
> Obj
,
166 const BenchmarkKey
&Key
)
167 : State(State
), Function(State
.createTargetMachine(), std::move(Obj
)),
171 enum ChildProcessExitCodeE
{
172 CounterFDReadFailed
= 1,
174 FunctionDataMappingFailed
,
175 AuxiliaryMemorySetupFailed
178 StringRef
childProcessExitCodeToString(int ExitCode
) const {
180 case ChildProcessExitCodeE::CounterFDReadFailed
:
181 return "Counter file descriptor read failed";
182 case ChildProcessExitCodeE::RSeqDisableFailed
:
183 return "Disabling restartable sequences failed";
184 case ChildProcessExitCodeE::FunctionDataMappingFailed
:
185 return "Failed to map memory for assembled snippet";
186 case ChildProcessExitCodeE::AuxiliaryMemorySetupFailed
:
187 return "Failed to setup auxiliary memory";
189 return "Child process returned with unknown exit code";
193 Error
sendFileDescriptorThroughSocket(int SocketFD
, int FD
) const {
194 struct msghdr Message
= {};
195 char Buffer
[CMSG_SPACE(sizeof(FD
))];
196 memset(Buffer
, 0, sizeof(Buffer
));
197 Message
.msg_control
= Buffer
;
198 Message
.msg_controllen
= sizeof(Buffer
);
200 struct cmsghdr
*ControlMessage
= CMSG_FIRSTHDR(&Message
);
201 ControlMessage
->cmsg_level
= SOL_SOCKET
;
202 ControlMessage
->cmsg_type
= SCM_RIGHTS
;
203 ControlMessage
->cmsg_len
= CMSG_LEN(sizeof(FD
));
205 memcpy(CMSG_DATA(ControlMessage
), &FD
, sizeof(FD
));
207 Message
.msg_controllen
= CMSG_SPACE(sizeof(FD
));
209 ssize_t BytesWritten
= sendmsg(SocketFD
, &Message
, 0);
211 if (BytesWritten
< 0)
212 return make_error
<Failure
>("Failed to write FD to socket: " +
213 Twine(strerror(errno
)));
215 return Error::success();
218 Expected
<int> getFileDescriptorFromSocket(int SocketFD
) const {
219 struct msghdr Message
= {};
221 char ControlBuffer
[256];
222 Message
.msg_control
= ControlBuffer
;
223 Message
.msg_controllen
= sizeof(ControlBuffer
);
225 ssize_t BytesRead
= recvmsg(SocketFD
, &Message
, 0);
228 return make_error
<Failure
>("Failed to read FD from socket: " +
229 Twine(strerror(errno
)));
231 struct cmsghdr
*ControlMessage
= CMSG_FIRSTHDR(&Message
);
235 if (ControlMessage
->cmsg_len
!= CMSG_LEN(sizeof(FD
)))
236 return make_error
<Failure
>("Failed to get correct number of bytes for "
237 "file descriptor from socket.");
239 memcpy(&FD
, CMSG_DATA(ControlMessage
), sizeof(FD
));
244 Error
createSubProcessAndRunBenchmark(
245 StringRef CounterName
, SmallVectorImpl
<int64_t> &CounterValues
) const {
247 int PipeSuccessOrErr
= socketpair(AF_UNIX
, SOCK_DGRAM
, 0, PipeFiles
);
248 if (PipeSuccessOrErr
!= 0) {
249 return make_error
<Failure
>(
250 "Failed to create a pipe for interprocess communication between "
251 "llvm-exegesis and the benchmarking subprocess: " +
252 Twine(strerror(errno
)));
255 SubprocessMemory SPMemory
;
256 Error MemoryInitError
= SPMemory
.initializeSubprocessMemory(getpid());
258 return MemoryInitError
;
260 Error AddMemDefError
=
261 SPMemory
.addMemoryDefinition(Key
.MemoryValues
, getpid());
263 return AddMemDefError
;
265 pid_t ParentOrChildPID
= fork();
267 if (ParentOrChildPID
== -1) {
268 return make_error
<Failure
>("Failed to create child process: " +
269 Twine(strerror(errno
)));
272 if (ParentOrChildPID
== 0) {
273 // We are in the child process, close the write end of the pipe
275 // Unregister handlers, signal handling is now handled through ptrace in
277 llvm::sys::unregisterHandlers();
278 prepareAndRunBenchmark(PipeFiles
[0], Key
);
279 // The child process terminates in the above function, so we should never
280 // get to this point.
281 llvm_unreachable("Child process didn't exit when expected.");
284 const ExegesisTarget
&ET
= State
.getExegesisTarget();
285 auto CounterOrError
=
286 ET
.createCounter(CounterName
, State
, ParentOrChildPID
);
289 return CounterOrError
.takeError();
291 pfm::Counter
*Counter
= CounterOrError
.get().get();
295 int CounterFileDescriptor
= Counter
->getFileDescriptor();
297 sendFileDescriptorThroughSocket(PipeFiles
[1], CounterFileDescriptor
);
302 if (ptrace(PTRACE_ATTACH
, ParentOrChildPID
, NULL
, NULL
) != 0)
303 return make_error
<Failure
>("Failed to attach to the child process: " +
304 Twine(strerror(errno
)));
306 if (wait(NULL
) == -1) {
307 return make_error
<Failure
>(
308 "Failed to wait for child process to stop after attaching: " +
309 Twine(strerror(errno
)));
312 if (ptrace(PTRACE_CONT
, ParentOrChildPID
, NULL
, NULL
) != 0)
313 return make_error
<Failure
>(
314 "Failed to continue execution of the child process: " +
315 Twine(strerror(errno
)));
318 if (wait(&ChildStatus
) == -1) {
319 return make_error
<Failure
>(
320 "Waiting for the child process to complete failed: " +
321 Twine(strerror(errno
)));
324 if (WIFEXITED(ChildStatus
)) {
325 int ChildExitCode
= WEXITSTATUS(ChildStatus
);
326 if (ChildExitCode
== 0) {
327 // The child exited succesfully, read counter values and return
329 CounterValues
[0] = Counter
->read();
330 return Error::success();
332 // The child exited, but not successfully
333 return make_error
<SnippetCrash
>(
334 "Child benchmarking process exited with non-zero exit code: " +
335 childProcessExitCodeToString(ChildExitCode
));
338 // An error was encountered running the snippet, process it
339 siginfo_t ChildSignalInfo
;
340 if (ptrace(PTRACE_GETSIGINFO
, ParentOrChildPID
, NULL
, &ChildSignalInfo
) ==
342 return make_error
<Failure
>("Getting signal info from the child failed: " +
343 Twine(strerror(errno
)));
346 return make_error
<SnippetCrash
>(
347 "The benchmarking subprocess sent unexpected signal: " +
348 Twine(strsignal(ChildSignalInfo
.si_signo
)));
351 [[noreturn
]] void prepareAndRunBenchmark(int Pipe
,
352 const BenchmarkKey
&Key
) const {
353 // The following occurs within the benchmarking subprocess
354 pid_t ParentPID
= getppid();
356 Expected
<int> CounterFileDescriptorOrError
=
357 getFileDescriptorFromSocket(Pipe
);
359 if (!CounterFileDescriptorOrError
)
360 exit(ChildProcessExitCodeE::CounterFDReadFailed
);
362 int CounterFileDescriptor
= *CounterFileDescriptorOrError
;
364 // Glibc versions greater than 2.35 automatically call rseq during
365 // initialization. Unmapping the region that glibc sets up for this causes
366 // segfaults in the program Unregister the rseq region so that we can safely
368 #ifdef GLIBC_INITS_RSEQ
369 long RseqDisableOutput
=
370 syscall(SYS_rseq
, (intptr_t)__builtin_thread_pointer() + __rseq_offset
,
371 __rseq_size
, RSEQ_FLAG_UNREGISTER
, RSEQ_SIG
);
372 if (RseqDisableOutput
!= 0)
373 exit(ChildProcessExitCodeE::RSeqDisableFailed
);
374 #endif // GLIBC_INITS_RSEQ
376 size_t FunctionDataCopySize
= this->Function
.FunctionBytes
.size();
377 char *FunctionDataCopy
=
378 (char *)mmap(NULL
, FunctionDataCopySize
, PROT_READ
| PROT_WRITE
,
379 MAP_PRIVATE
| MAP_ANONYMOUS
, 0, 0);
380 if ((intptr_t)FunctionDataCopy
== -1)
381 exit(ChildProcessExitCodeE::FunctionDataMappingFailed
);
383 memcpy(FunctionDataCopy
, this->Function
.FunctionBytes
.data(),
384 this->Function
.FunctionBytes
.size());
385 mprotect(FunctionDataCopy
, FunctionDataCopySize
, PROT_READ
| PROT_EXEC
);
387 Expected
<int> AuxMemFDOrError
=
388 SubprocessMemory::setupAuxiliaryMemoryInSubprocess(
389 Key
.MemoryValues
, ParentPID
, CounterFileDescriptor
);
390 if (!AuxMemFDOrError
)
391 exit(ChildProcessExitCodeE::AuxiliaryMemorySetupFailed
);
393 ((void (*)(size_t, int))(intptr_t)FunctionDataCopy
)(FunctionDataCopySize
,
399 Expected
<llvm::SmallVector
<int64_t, 4>>
400 runWithCounter(StringRef CounterName
) const override
{
401 SmallVector
<int64_t, 4> Value(1, 0);
402 Error PossibleBenchmarkError
=
403 createSubProcessAndRunBenchmark(CounterName
, Value
);
405 if (PossibleBenchmarkError
) {
406 return std::move(PossibleBenchmarkError
);
412 const LLVMState
&State
;
413 const ExecutableFunction Function
;
414 const BenchmarkKey
&Key
;
419 Expected
<SmallString
<0>> BenchmarkRunner::assembleSnippet(
420 const BenchmarkCode
&BC
, const SnippetRepetitor
&Repetitor
,
421 unsigned MinInstructions
, unsigned LoopBodySize
,
422 bool GenerateMemoryInstructions
) const {
423 const std::vector
<MCInst
> &Instructions
= BC
.Key
.Instructions
;
424 SmallString
<0> Buffer
;
425 raw_svector_ostream
OS(Buffer
);
426 if (Error E
= assembleToStream(
427 State
.getExegesisTarget(), State
.createTargetMachine(), BC
.LiveIns
,
428 BC
.Key
.RegisterInitialValues
,
429 Repetitor
.Repeat(Instructions
, MinInstructions
, LoopBodySize
,
430 GenerateMemoryInstructions
),
431 OS
, BC
.Key
, GenerateMemoryInstructions
)) {
437 Expected
<BenchmarkRunner::RunnableConfiguration
>
438 BenchmarkRunner::getRunnableConfiguration(
439 const BenchmarkCode
&BC
, unsigned NumRepetitions
, unsigned LoopBodySize
,
440 const SnippetRepetitor
&Repetitor
) const {
441 RunnableConfiguration RC
;
443 Benchmark
&InstrBenchmark
= RC
.InstrBenchmark
;
444 InstrBenchmark
.Mode
= Mode
;
445 InstrBenchmark
.CpuName
= std::string(State
.getTargetMachine().getTargetCPU());
446 InstrBenchmark
.LLVMTriple
=
447 State
.getTargetMachine().getTargetTriple().normalize();
448 InstrBenchmark
.NumRepetitions
= NumRepetitions
;
449 InstrBenchmark
.Info
= BC
.Info
;
451 const std::vector
<MCInst
> &Instructions
= BC
.Key
.Instructions
;
453 bool GenerateMemoryInstructions
= ExecutionMode
== ExecutionModeE::SubProcess
;
455 InstrBenchmark
.Key
= BC
.Key
;
457 // Assemble at least kMinInstructionsForSnippet instructions by repeating
458 // the snippet for debug/analysis. This is so that the user clearly
459 // understands that the inside instructions are repeated.
460 if (BenchmarkPhaseSelector
> BenchmarkPhaseSelectorE::PrepareSnippet
) {
461 const int MinInstructionsForSnippet
= 4 * Instructions
.size();
462 const int LoopBodySizeForSnippet
= 2 * Instructions
.size();
464 assembleSnippet(BC
, Repetitor
, MinInstructionsForSnippet
,
465 LoopBodySizeForSnippet
, GenerateMemoryInstructions
);
466 if (Error E
= Snippet
.takeError())
469 if (auto Err
= getBenchmarkFunctionBytes(*Snippet
,
470 InstrBenchmark
.AssembledSnippet
))
471 return std::move(Err
);
474 // Assemble NumRepetitions instructions repetitions of the snippet for
476 if (BenchmarkPhaseSelector
>
477 BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet
) {
478 auto Snippet
= assembleSnippet(BC
, Repetitor
, InstrBenchmark
.NumRepetitions
,
479 LoopBodySize
, GenerateMemoryInstructions
);
480 if (Error E
= Snippet
.takeError())
482 RC
.ObjectFile
= getObjectFromBuffer(*Snippet
);
485 return std::move(RC
);
488 Expected
<std::unique_ptr
<BenchmarkRunner::FunctionExecutor
>>
489 BenchmarkRunner::createFunctionExecutor(
490 object::OwningBinary
<object::ObjectFile
> ObjectFile
,
491 const BenchmarkKey
&Key
) const {
492 switch (ExecutionMode
) {
493 case ExecutionModeE::InProcess
:
494 return std::make_unique
<InProcessFunctionExecutorImpl
>(
495 State
, std::move(ObjectFile
), Scratch
.get());
496 case ExecutionModeE::SubProcess
:
498 return std::make_unique
<SubProcessFunctionExecutorImpl
>(
499 State
, std::move(ObjectFile
), Key
);
501 return make_error
<Failure
>(
502 "The subprocess execution mode is only supported on Linux");
505 llvm_unreachable("ExecutionMode is outside expected range");
508 Expected
<Benchmark
> BenchmarkRunner::runConfiguration(
509 RunnableConfiguration
&&RC
,
510 const std::optional
<StringRef
> &DumpFile
) const {
511 Benchmark
&InstrBenchmark
= RC
.InstrBenchmark
;
512 object::OwningBinary
<object::ObjectFile
> &ObjectFile
= RC
.ObjectFile
;
514 if (DumpFile
&& BenchmarkPhaseSelector
>
515 BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet
) {
516 auto ObjectFilePath
=
517 writeObjectFile(ObjectFile
.getBinary()->getData(), *DumpFile
);
518 if (Error E
= ObjectFilePath
.takeError()) {
519 InstrBenchmark
.Error
= toString(std::move(E
));
520 return std::move(InstrBenchmark
);
522 outs() << "Check generated assembly with: /usr/bin/objdump -d "
523 << *ObjectFilePath
<< "\n";
526 if (BenchmarkPhaseSelector
< BenchmarkPhaseSelectorE::Measure
) {
527 InstrBenchmark
.Error
= "actual measurements skipped.";
528 return std::move(InstrBenchmark
);
531 Expected
<std::unique_ptr
<BenchmarkRunner::FunctionExecutor
>> Executor
=
532 createFunctionExecutor(std::move(ObjectFile
), RC
.InstrBenchmark
.Key
);
534 return Executor
.takeError();
535 auto NewMeasurements
= runMeasurements(**Executor
);
537 if (Error E
= NewMeasurements
.takeError()) {
538 if (!E
.isA
<SnippetCrash
>())
540 InstrBenchmark
.Error
= toString(std::move(E
));
541 return std::move(InstrBenchmark
);
543 assert(InstrBenchmark
.NumRepetitions
> 0 && "invalid NumRepetitions");
544 for (BenchmarkMeasure
&BM
: *NewMeasurements
) {
545 // Scale the measurements by instruction.
546 BM
.PerInstructionValue
/= InstrBenchmark
.NumRepetitions
;
547 // Scale the measurements by snippet.
548 BM
.PerSnippetValue
*=
549 static_cast<double>(InstrBenchmark
.Key
.Instructions
.size()) /
550 InstrBenchmark
.NumRepetitions
;
552 InstrBenchmark
.Measurements
= std::move(*NewMeasurements
);
554 return std::move(InstrBenchmark
);
557 Expected
<std::string
>
558 BenchmarkRunner::writeObjectFile(StringRef Buffer
, StringRef FileName
) const {
560 SmallString
<256> ResultPath
= FileName
;
561 if (Error E
= errorCodeToError(
562 FileName
.empty() ? sys::fs::createTemporaryFile("snippet", "o",
563 ResultFD
, ResultPath
)
564 : sys::fs::openFileForReadWrite(
565 FileName
, ResultFD
, sys::fs::CD_CreateAlways
,
568 raw_fd_ostream
OFS(ResultFD
, true /*ShouldClose*/);
569 OFS
.write(Buffer
.data(), Buffer
.size());
571 return std::string(ResultPath
.str());
574 BenchmarkRunner::FunctionExecutor::~FunctionExecutor() {}
576 } // namespace exegesis