1 //===-- BenchmarkRunner.cpp -------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "BenchmarkRunner.h"
10 #include "Assembler.h"
12 #include "MCInstrDescView.h"
13 #include "MmapUtils.h"
14 #include "PerfHelper.h"
15 #include "SubprocessMemory.h"
17 #include "llvm/ADT/ScopeExit.h"
18 #include "llvm/ADT/StringExtras.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
22 #include "llvm/Support/CrashRecoveryContext.h"
23 #include "llvm/Support/Error.h"
24 #include "llvm/Support/FileSystem.h"
25 #include "llvm/Support/MemoryBuffer.h"
26 #include "llvm/Support/Program.h"
27 #include "llvm/Support/Signals.h"
28 #include "llvm/Support/SystemZ/zOSSupport.h"
35 #include <perfmon/perf_event.h>
38 #include <sys/ptrace.h>
39 #include <sys/resource.h>
40 #include <sys/socket.h>
41 #include <sys/syscall.h>
45 #if defined(__GLIBC__) && __has_include(<sys/rseq.h>) && defined(HAVE_BUILTIN_THREAD_POINTER)
47 #if defined(RSEQ_SIG) && defined(SYS_rseq)
48 #define GLIBC_INITS_RSEQ
56 BenchmarkRunner::BenchmarkRunner(const LLVMState
&State
, Benchmark::ModeE Mode
,
57 BenchmarkPhaseSelectorE BenchmarkPhaseSelector
,
58 ExecutionModeE ExecutionMode
,
59 ArrayRef
<ValidationEvent
> ValCounters
)
60 : State(State
), Mode(Mode
), BenchmarkPhaseSelector(BenchmarkPhaseSelector
),
61 ExecutionMode(ExecutionMode
), ValidationCounters(ValCounters
),
62 Scratch(std::make_unique
<ScratchSpace
>()) {}
64 BenchmarkRunner::~BenchmarkRunner() = default;
66 void BenchmarkRunner::FunctionExecutor::accumulateCounterValues(
67 const SmallVectorImpl
<int64_t> &NewValues
,
68 SmallVectorImpl
<int64_t> *Result
) {
69 const size_t NumValues
= std::max(NewValues
.size(), Result
->size());
70 if (NumValues
> Result
->size())
71 Result
->resize(NumValues
, 0);
72 for (size_t I
= 0, End
= NewValues
.size(); I
< End
; ++I
)
73 (*Result
)[I
] += NewValues
[I
];
76 Expected
<SmallVector
<int64_t, 4>>
77 BenchmarkRunner::FunctionExecutor::runAndSample(
78 const char *Counters
, ArrayRef
<const char *> ValidationCounters
,
79 SmallVectorImpl
<int64_t> &ValidationCounterValues
) const {
80 // We sum counts when there are several counters for a single ProcRes
81 // (e.g. P23 on SandyBridge).
82 SmallVector
<int64_t, 4> CounterValues
;
83 SmallVector
<StringRef
, 2> CounterNames
;
84 StringRef(Counters
).split(CounterNames
, '+');
85 for (auto &CounterName
: CounterNames
) {
86 CounterName
= CounterName
.trim();
87 Expected
<SmallVector
<int64_t, 4>> ValueOrError
= runWithCounter(
88 CounterName
, ValidationCounters
, ValidationCounterValues
);
90 return ValueOrError
.takeError();
91 accumulateCounterValues(ValueOrError
.get(), &CounterValues
);
97 class InProcessFunctionExecutorImpl
: public BenchmarkRunner::FunctionExecutor
{
99 static Expected
<std::unique_ptr
<InProcessFunctionExecutorImpl
>>
100 create(const LLVMState
&State
, object::OwningBinary
<object::ObjectFile
> Obj
,
101 BenchmarkRunner::ScratchSpace
*Scratch
,
102 std::optional
<int> BenchmarkProcessCPU
) {
103 Expected
<ExecutableFunction
> EF
=
104 ExecutableFunction::create(State
.createTargetMachine(), std::move(Obj
));
107 return EF
.takeError();
109 return std::unique_ptr
<InProcessFunctionExecutorImpl
>(
110 new InProcessFunctionExecutorImpl(State
, std::move(*EF
), Scratch
));
114 InProcessFunctionExecutorImpl(const LLVMState
&State
,
115 ExecutableFunction Function
,
116 BenchmarkRunner::ScratchSpace
*Scratch
)
117 : State(State
), Function(std::move(Function
)), Scratch(Scratch
) {}
119 static void accumulateCounterValues(const SmallVector
<int64_t, 4> &NewValues
,
120 SmallVector
<int64_t, 4> *Result
) {
121 const size_t NumValues
= std::max(NewValues
.size(), Result
->size());
122 if (NumValues
> Result
->size())
123 Result
->resize(NumValues
, 0);
124 for (size_t I
= 0, End
= NewValues
.size(); I
< End
; ++I
)
125 (*Result
)[I
] += NewValues
[I
];
128 Expected
<SmallVector
<int64_t, 4>> runWithCounter(
129 StringRef CounterName
, ArrayRef
<const char *> ValidationCounters
,
130 SmallVectorImpl
<int64_t> &ValidationCounterValues
) const override
{
131 const ExegesisTarget
&ET
= State
.getExegesisTarget();
132 char *const ScratchPtr
= Scratch
->ptr();
133 auto CounterOrError
=
134 ET
.createCounter(CounterName
, State
, ValidationCounters
);
137 return CounterOrError
.takeError();
139 pfm::CounterGroup
*Counter
= CounterOrError
.get().get();
142 auto PS
= ET
.withSavedState();
143 CrashRecoveryContext CRC
;
144 CrashRecoveryContext::Enable();
145 const bool Crashed
= !CRC
.RunSafely([this, Counter
, ScratchPtr
]() {
147 this->Function(ScratchPtr
);
150 CrashRecoveryContext::Disable();
154 // See "Exit Status for Commands":
155 // https://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xcu_chap02.html
156 constexpr const int kSigOffset
= 128;
157 return make_error
<SnippetSignal
>(CRC
.RetCode
- kSigOffset
);
159 // The exit code of the process on windows is not meaningful as a
160 // signal, so simply pass in -1 as the signal into the error.
161 return make_error
<SnippetSignal
>(-1);
162 #endif // LLVM_ON_UNIX
166 auto ValidationValuesOrErr
= Counter
->readValidationCountersOrError();
167 if (!ValidationValuesOrErr
)
168 return ValidationValuesOrErr
.takeError();
170 ArrayRef RealValidationValues
= *ValidationValuesOrErr
;
171 for (size_t I
= 0; I
< RealValidationValues
.size(); ++I
)
172 ValidationCounterValues
[I
] = RealValidationValues
[I
];
174 return Counter
->readOrError(Function
.getFunctionBytes());
177 const LLVMState
&State
;
178 const ExecutableFunction Function
;
179 BenchmarkRunner::ScratchSpace
*const Scratch
;
183 // The following class implements a function executor that executes the
184 // benchmark code within a subprocess rather than within the main llvm-exegesis
185 // process. This allows for much more control over the execution context of the
186 // snippet, particularly with regard to memory. This class performs all the
187 // necessary functions to create the subprocess, execute the snippet in the
188 // subprocess, and report results/handle errors.
189 class SubProcessFunctionExecutorImpl
190 : public BenchmarkRunner::FunctionExecutor
{
192 static Expected
<std::unique_ptr
<SubProcessFunctionExecutorImpl
>>
193 create(const LLVMState
&State
, object::OwningBinary
<object::ObjectFile
> Obj
,
194 const BenchmarkKey
&Key
, std::optional
<int> BenchmarkProcessCPU
) {
195 Expected
<ExecutableFunction
> EF
=
196 ExecutableFunction::create(State
.createTargetMachine(), std::move(Obj
));
198 return EF
.takeError();
200 return std::unique_ptr
<SubProcessFunctionExecutorImpl
>(
201 new SubProcessFunctionExecutorImpl(State
, std::move(*EF
), Key
,
202 BenchmarkProcessCPU
));
206 SubProcessFunctionExecutorImpl(const LLVMState
&State
,
207 ExecutableFunction Function
,
208 const BenchmarkKey
&Key
,
209 std::optional
<int> BenchmarkCPU
)
210 : State(State
), Function(std::move(Function
)), Key(Key
),
211 BenchmarkProcessCPU(BenchmarkCPU
) {}
213 enum ChildProcessExitCodeE
{
214 CounterFDReadFailed
= 1,
216 FunctionDataMappingFailed
,
217 AuxiliaryMemorySetupFailed
,
221 StringRef
childProcessExitCodeToString(int ExitCode
) const {
223 case ChildProcessExitCodeE::CounterFDReadFailed
:
224 return "Counter file descriptor read failed";
225 case ChildProcessExitCodeE::RSeqDisableFailed
:
226 return "Disabling restartable sequences failed";
227 case ChildProcessExitCodeE::FunctionDataMappingFailed
:
228 return "Failed to map memory for assembled snippet";
229 case ChildProcessExitCodeE::AuxiliaryMemorySetupFailed
:
230 return "Failed to setup auxiliary memory";
231 case ChildProcessExitCodeE::SetCPUAffinityFailed
:
232 return "Failed to set CPU affinity of the benchmarking process";
234 return "Child process returned with unknown exit code";
238 Error
sendFileDescriptorThroughSocket(int SocketFD
, int FD
) const {
239 struct msghdr Message
= {};
240 char Buffer
[CMSG_SPACE(sizeof(FD
))];
241 memset(Buffer
, 0, sizeof(Buffer
));
242 Message
.msg_control
= Buffer
;
243 Message
.msg_controllen
= sizeof(Buffer
);
245 struct cmsghdr
*ControlMessage
= CMSG_FIRSTHDR(&Message
);
246 ControlMessage
->cmsg_level
= SOL_SOCKET
;
247 ControlMessage
->cmsg_type
= SCM_RIGHTS
;
248 ControlMessage
->cmsg_len
= CMSG_LEN(sizeof(FD
));
250 memcpy(CMSG_DATA(ControlMessage
), &FD
, sizeof(FD
));
252 Message
.msg_controllen
= CMSG_SPACE(sizeof(FD
));
254 ssize_t BytesWritten
= sendmsg(SocketFD
, &Message
, 0);
256 if (BytesWritten
< 0)
257 return make_error
<Failure
>("Failed to write FD to socket: " +
258 Twine(strerror(errno
)));
260 return Error::success();
263 Expected
<int> getFileDescriptorFromSocket(int SocketFD
) const {
264 struct msghdr Message
= {};
266 char ControlBuffer
[256];
267 Message
.msg_control
= ControlBuffer
;
268 Message
.msg_controllen
= sizeof(ControlBuffer
);
270 ssize_t BytesRead
= recvmsg(SocketFD
, &Message
, 0);
273 return make_error
<Failure
>("Failed to read FD from socket: " +
274 Twine(strerror(errno
)));
276 struct cmsghdr
*ControlMessage
= CMSG_FIRSTHDR(&Message
);
280 if (ControlMessage
->cmsg_len
!= CMSG_LEN(sizeof(FD
)))
281 return make_error
<Failure
>("Failed to get correct number of bytes for "
282 "file descriptor from socket.");
284 memcpy(&FD
, CMSG_DATA(ControlMessage
), sizeof(FD
));
290 runParentProcess(pid_t ChildPID
, int WriteFD
, StringRef CounterName
,
291 SmallVectorImpl
<int64_t> &CounterValues
,
292 ArrayRef
<const char *> ValidationCounters
,
293 SmallVectorImpl
<int64_t> &ValidationCounterValues
) const {
294 auto WriteFDClose
= make_scope_exit([WriteFD
]() { close(WriteFD
); });
295 const ExegesisTarget
&ET
= State
.getExegesisTarget();
296 auto CounterOrError
=
297 ET
.createCounter(CounterName
, State
, ValidationCounters
, ChildPID
);
300 return CounterOrError
.takeError();
302 pfm::CounterGroup
*Counter
= CounterOrError
.get().get();
304 // Make sure to attach to the process (and wait for the sigstop to be
305 // delivered and for the process to continue) before we write to the counter
306 // file descriptor. Attaching to the process before writing to the socket
307 // ensures that the subprocess at most has blocked on the read call. If we
308 // attach afterwards, the subprocess might exit before we get to the attach
309 // call due to effects like scheduler contention, introducing transient
311 if (ptrace(PTRACE_ATTACH
, ChildPID
, NULL
, NULL
) != 0)
312 return make_error
<Failure
>("Failed to attach to the child process: " +
313 Twine(strerror(errno
)));
315 if (waitpid(ChildPID
, NULL
, 0) == -1) {
316 return make_error
<Failure
>(
317 "Failed to wait for child process to stop after attaching: " +
318 Twine(strerror(errno
)));
321 if (ptrace(PTRACE_CONT
, ChildPID
, NULL
, NULL
) != 0)
322 return make_error
<Failure
>(
323 "Failed to continue execution of the child process: " +
324 Twine(strerror(errno
)));
326 int CounterFileDescriptor
= Counter
->getFileDescriptor();
328 sendFileDescriptorThroughSocket(WriteFD
, CounterFileDescriptor
);
334 if (waitpid(ChildPID
, &ChildStatus
, 0) == -1) {
335 return make_error
<Failure
>(
336 "Waiting for the child process to complete failed: " +
337 Twine(strerror(errno
)));
340 if (WIFEXITED(ChildStatus
)) {
341 int ChildExitCode
= WEXITSTATUS(ChildStatus
);
342 if (ChildExitCode
== 0) {
343 // The child exited succesfully, read counter values and return
345 auto CounterValueOrErr
= Counter
->readOrError();
346 if (!CounterValueOrErr
)
347 return CounterValueOrErr
.takeError();
348 CounterValues
= std::move(*CounterValueOrErr
);
350 auto ValidationValuesOrErr
= Counter
->readValidationCountersOrError();
351 if (!ValidationValuesOrErr
)
352 return ValidationValuesOrErr
.takeError();
354 ArrayRef RealValidationValues
= *ValidationValuesOrErr
;
355 for (size_t I
= 0; I
< RealValidationValues
.size(); ++I
)
356 ValidationCounterValues
[I
] = RealValidationValues
[I
];
358 return Error::success();
360 // The child exited, but not successfully.
361 return make_error
<Failure
>(
362 "Child benchmarking process exited with non-zero exit code: " +
363 childProcessExitCodeToString(ChildExitCode
));
366 // An error was encountered running the snippet, process it
367 siginfo_t ChildSignalInfo
;
368 if (ptrace(PTRACE_GETSIGINFO
, ChildPID
, NULL
, &ChildSignalInfo
) == -1) {
369 return make_error
<Failure
>("Getting signal info from the child failed: " +
370 Twine(strerror(errno
)));
373 // Send SIGKILL rather than SIGTERM as the child process has no SIGTERM
374 // handlers to run, and calling SIGTERM would mean that ptrace will force
375 // it to block in the signal-delivery-stop for the SIGSEGV/other signals,
377 if (kill(ChildPID
, SIGKILL
) == -1)
378 return make_error
<Failure
>("Failed to kill child benchmarking proces: " +
379 Twine(strerror(errno
)));
381 // Wait for the process to exit so that there are no zombie processes left
383 if (waitpid(ChildPID
, NULL
, 0) == -1)
384 return make_error
<Failure
>("Failed to wait for process to die: " +
385 Twine(strerror(errno
)));
387 if (ChildSignalInfo
.si_signo
== SIGSEGV
)
388 return make_error
<SnippetSegmentationFault
>(
389 reinterpret_cast<uintptr_t>(ChildSignalInfo
.si_addr
));
391 return make_error
<SnippetSignal
>(ChildSignalInfo
.si_signo
);
394 static void setCPUAffinityIfRequested(int CPUToUse
) {
395 // Special case this function for x86_64 for now as certain more esoteric
396 // platforms have different definitions for some of the libc functions that
397 // cause buildtime failures. Additionally, the subprocess executor mode (the
398 // sole mode where this is supported) currently only supports x86_64.
400 // Also check that we have the SYS_getcpu macro defined, meaning the syscall
401 // actually exists within the build environment. We manually use the syscall
402 // rather than the libc wrapper given the wrapper for getcpu is only available
403 // in glibc 2.29 and later.
404 #if defined(__x86_64__) && defined(SYS_getcpu)
405 // Set the CPU affinity for the child process, so that we ensure that if
406 // the user specified a CPU the process should run on, the benchmarking
407 // process is running on that CPU.
410 CPU_SET(CPUToUse
, &CPUMask
);
411 // TODO(boomanaiden154): Rewrite this to use LLVM primitives once they
413 int SetAffinityReturn
= sched_setaffinity(0, sizeof(CPUMask
), &CPUMask
);
414 if (SetAffinityReturn
== -1) {
415 exit(ChildProcessExitCodeE::SetCPUAffinityFailed
);
418 // Check (if assertions are enabled) that we are actually running on the
419 // CPU that was specified by the user.
420 [[maybe_unused
]] unsigned int CurrentCPU
;
421 assert(syscall(SYS_getcpu
, &CurrentCPU
, nullptr) == 0 &&
422 "Expected getcpu call to succeed.");
423 assert(static_cast<int>(CurrentCPU
) == CPUToUse
&&
424 "Expected current CPU to equal the CPU requested by the user");
425 #endif // defined(__x86_64__) && defined(SYS_getcpu)
426 exit(ChildProcessExitCodeE::SetCPUAffinityFailed
);
429 Error
createSubProcessAndRunBenchmark(
430 StringRef CounterName
, SmallVectorImpl
<int64_t> &CounterValues
,
431 ArrayRef
<const char *> ValidationCounters
,
432 SmallVectorImpl
<int64_t> &ValidationCounterValues
) const {
434 int PipeSuccessOrErr
= socketpair(AF_UNIX
, SOCK_DGRAM
, 0, PipeFiles
);
435 if (PipeSuccessOrErr
!= 0) {
436 return make_error
<Failure
>(
437 "Failed to create a pipe for interprocess communication between "
438 "llvm-exegesis and the benchmarking subprocess: " +
439 Twine(strerror(errno
)));
442 SubprocessMemory SPMemory
;
443 Error MemoryInitError
= SPMemory
.initializeSubprocessMemory(getpid());
445 return MemoryInitError
;
447 Error AddMemDefError
=
448 SPMemory
.addMemoryDefinition(Key
.MemoryValues
, getpid());
450 return AddMemDefError
;
452 long ParentTID
= SubprocessMemory::getCurrentTID();
453 pid_t ParentOrChildPID
= fork();
455 if (ParentOrChildPID
== -1) {
456 return make_error
<Failure
>("Failed to create child process: " +
457 Twine(strerror(errno
)));
460 if (ParentOrChildPID
== 0) {
461 if (BenchmarkProcessCPU
.has_value()) {
462 setCPUAffinityIfRequested(*BenchmarkProcessCPU
);
465 // We are in the child process, close the write end of the pipe.
467 // Unregister handlers, signal handling is now handled through ptrace in
469 sys::unregisterHandlers();
470 runChildSubprocess(PipeFiles
[0], Key
, ParentTID
);
471 // The child process terminates in the above function, so we should never
472 // get to this point.
473 llvm_unreachable("Child process didn't exit when expected.");
476 // Close the read end of the pipe as we only need to write to the subprocess
477 // from the parent process.
479 return runParentProcess(ParentOrChildPID
, PipeFiles
[1], CounterName
,
480 CounterValues
, ValidationCounters
,
481 ValidationCounterValues
);
484 void disableCoreDumps() const {
488 setrlimit(RLIMIT_CORE
, &rlim
);
491 [[noreturn
]] void runChildSubprocess(int Pipe
, const BenchmarkKey
&Key
,
492 long ParentTID
) const {
493 // Disable core dumps in the child process as otherwise everytime we
494 // encounter an execution failure like a segmentation fault, we will create
495 // a core dump. We report the information directly rather than require the
496 // user inspect a core dump.
499 // The following occurs within the benchmarking subprocess.
500 pid_t ParentPID
= getppid();
502 Expected
<int> CounterFileDescriptorOrError
=
503 getFileDescriptorFromSocket(Pipe
);
505 if (!CounterFileDescriptorOrError
)
506 exit(ChildProcessExitCodeE::CounterFDReadFailed
);
508 int CounterFileDescriptor
= *CounterFileDescriptorOrError
;
510 // Glibc versions greater than 2.35 automatically call rseq during
511 // initialization. Unmapping the region that glibc sets up for this causes
512 // segfaults in the program. Unregister the rseq region so that we can safely
514 #ifdef GLIBC_INITS_RSEQ
515 unsigned int RseqStructSize
= __rseq_size
;
517 // Glibc v2.40 (the change is also expected to be backported to v2.35)
518 // changes the definition of __rseq_size to be the usable area of the struct
519 // rather than the actual size of the struct. v2.35 uses only 20 bytes of
520 // the 32 byte struct. For now, it should be safe to assume that if the
521 // usable size is less than 32, the actual size of the struct will be 32
522 // bytes given alignment requirements.
523 if (__rseq_size
< 32)
526 long RseqDisableOutput
= syscall(
528 reinterpret_cast<uintptr_t>(__builtin_thread_pointer()) + __rseq_offset
,
529 RseqStructSize
, RSEQ_FLAG_UNREGISTER
, RSEQ_SIG
);
530 if (RseqDisableOutput
!= 0)
531 exit(ChildProcessExitCodeE::RSeqDisableFailed
);
532 #endif // GLIBC_INITS_RSEQ
534 // The frontend that generates the memory annotation structures should
535 // validate that the address to map the snippet in at is a multiple of
536 // the page size. Assert that this is true here.
537 assert(Key
.SnippetAddress
% getpagesize() == 0 &&
538 "The snippet address needs to be aligned to a page boundary.");
540 size_t FunctionDataCopySize
= this->Function
.FunctionBytes
.size();
541 void *MapAddress
= NULL
;
542 int MapFlags
= MAP_PRIVATE
| MAP_ANONYMOUS
;
544 if (Key
.SnippetAddress
!= 0) {
545 MapAddress
= reinterpret_cast<void *>(Key
.SnippetAddress
);
546 MapFlags
|= MAP_FIXED_NOREPLACE
;
549 char *FunctionDataCopy
=
550 (char *)mmap(MapAddress
, FunctionDataCopySize
, PROT_READ
| PROT_WRITE
,
552 if (reinterpret_cast<intptr_t>(FunctionDataCopy
) == -1)
553 exit(ChildProcessExitCodeE::FunctionDataMappingFailed
);
555 memcpy(FunctionDataCopy
, this->Function
.FunctionBytes
.data(),
556 this->Function
.FunctionBytes
.size());
557 mprotect(FunctionDataCopy
, FunctionDataCopySize
, PROT_READ
| PROT_EXEC
);
559 Expected
<int> AuxMemFDOrError
=
560 SubprocessMemory::setupAuxiliaryMemoryInSubprocess(
561 Key
.MemoryValues
, ParentPID
, ParentTID
, CounterFileDescriptor
);
562 if (!AuxMemFDOrError
)
563 exit(ChildProcessExitCodeE::AuxiliaryMemorySetupFailed
);
565 ((void (*)(size_t, int))(uintptr_t)FunctionDataCopy
)(FunctionDataCopySize
,
571 Expected
<SmallVector
<int64_t, 4>> runWithCounter(
572 StringRef CounterName
, ArrayRef
<const char *> ValidationCounters
,
573 SmallVectorImpl
<int64_t> &ValidationCounterValues
) const override
{
574 SmallVector
<int64_t, 4> Value(1, 0);
575 Error PossibleBenchmarkError
= createSubProcessAndRunBenchmark(
576 CounterName
, Value
, ValidationCounters
, ValidationCounterValues
);
578 if (PossibleBenchmarkError
)
579 return std::move(PossibleBenchmarkError
);
584 const LLVMState
&State
;
585 const ExecutableFunction Function
;
586 const BenchmarkKey
&Key
;
587 const std::optional
<int> BenchmarkProcessCPU
;
592 Expected
<SmallString
<0>> BenchmarkRunner::assembleSnippet(
593 const BenchmarkCode
&BC
, const SnippetRepetitor
&Repetitor
,
594 unsigned MinInstructions
, unsigned LoopBodySize
,
595 bool GenerateMemoryInstructions
) const {
596 const std::vector
<MCInst
> &Instructions
= BC
.Key
.Instructions
;
597 SmallString
<0> Buffer
;
598 raw_svector_ostream
OS(Buffer
);
599 if (Error E
= assembleToStream(
600 State
.getExegesisTarget(), State
.createTargetMachine(), BC
.LiveIns
,
601 Repetitor
.Repeat(Instructions
, MinInstructions
, LoopBodySize
,
602 GenerateMemoryInstructions
),
603 OS
, BC
.Key
, GenerateMemoryInstructions
)) {
609 Expected
<BenchmarkRunner::RunnableConfiguration
>
610 BenchmarkRunner::getRunnableConfiguration(
611 const BenchmarkCode
&BC
, unsigned MinInstructions
, unsigned LoopBodySize
,
612 const SnippetRepetitor
&Repetitor
) const {
613 RunnableConfiguration RC
;
615 Benchmark
&BenchmarkResult
= RC
.BenchmarkResult
;
616 BenchmarkResult
.Mode
= Mode
;
617 BenchmarkResult
.CpuName
=
618 std::string(State
.getTargetMachine().getTargetCPU());
619 BenchmarkResult
.LLVMTriple
=
620 State
.getTargetMachine().getTargetTriple().normalize();
621 BenchmarkResult
.MinInstructions
= MinInstructions
;
622 BenchmarkResult
.Info
= BC
.Info
;
624 const std::vector
<MCInst
> &Instructions
= BC
.Key
.Instructions
;
626 bool GenerateMemoryInstructions
= ExecutionMode
== ExecutionModeE::SubProcess
;
628 BenchmarkResult
.Key
= BC
.Key
;
630 // Assemble at least kMinInstructionsForSnippet instructions by repeating
631 // the snippet for debug/analysis. This is so that the user clearly
632 // understands that the inside instructions are repeated.
633 if (BenchmarkPhaseSelector
> BenchmarkPhaseSelectorE::PrepareSnippet
) {
634 const int MinInstructionsForSnippet
= 4 * Instructions
.size();
635 const int LoopBodySizeForSnippet
= 2 * Instructions
.size();
637 assembleSnippet(BC
, Repetitor
, MinInstructionsForSnippet
,
638 LoopBodySizeForSnippet
, GenerateMemoryInstructions
);
639 if (Error E
= Snippet
.takeError())
642 if (auto Err
= getBenchmarkFunctionBytes(*Snippet
,
643 BenchmarkResult
.AssembledSnippet
))
644 return std::move(Err
);
647 // Assemble enough repetitions of the snippet so we have at least
648 // MinInstructions instructions.
649 if (BenchmarkPhaseSelector
>
650 BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet
) {
652 assembleSnippet(BC
, Repetitor
, BenchmarkResult
.MinInstructions
,
653 LoopBodySize
, GenerateMemoryInstructions
);
654 if (Error E
= Snippet
.takeError())
656 RC
.ObjectFile
= getObjectFromBuffer(*Snippet
);
659 return std::move(RC
);
662 Expected
<std::unique_ptr
<BenchmarkRunner::FunctionExecutor
>>
663 BenchmarkRunner::createFunctionExecutor(
664 object::OwningBinary
<object::ObjectFile
> ObjectFile
,
665 const BenchmarkKey
&Key
, std::optional
<int> BenchmarkProcessCPU
) const {
666 switch (ExecutionMode
) {
667 case ExecutionModeE::InProcess
: {
668 if (BenchmarkProcessCPU
.has_value())
669 return make_error
<Failure
>("The inprocess execution mode does not "
670 "support benchmark core pinning.");
672 auto InProcessExecutorOrErr
= InProcessFunctionExecutorImpl::create(
673 State
, std::move(ObjectFile
), Scratch
.get(), BenchmarkProcessCPU
);
674 if (!InProcessExecutorOrErr
)
675 return InProcessExecutorOrErr
.takeError();
677 return std::move(*InProcessExecutorOrErr
);
679 case ExecutionModeE::SubProcess
: {
681 auto SubProcessExecutorOrErr
= SubProcessFunctionExecutorImpl::create(
682 State
, std::move(ObjectFile
), Key
, BenchmarkProcessCPU
);
683 if (!SubProcessExecutorOrErr
)
684 return SubProcessExecutorOrErr
.takeError();
686 return std::move(*SubProcessExecutorOrErr
);
688 return make_error
<Failure
>(
689 "The subprocess execution mode is only supported on Linux");
693 llvm_unreachable("ExecutionMode is outside expected range");
696 std::pair
<Error
, Benchmark
> BenchmarkRunner::runConfiguration(
697 RunnableConfiguration
&&RC
, const std::optional
<StringRef
> &DumpFile
,
698 std::optional
<int> BenchmarkProcessCPU
) const {
699 Benchmark
&BenchmarkResult
= RC
.BenchmarkResult
;
700 object::OwningBinary
<object::ObjectFile
> &ObjectFile
= RC
.ObjectFile
;
702 if (DumpFile
&& BenchmarkPhaseSelector
>
703 BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet
) {
704 auto ObjectFilePath
=
705 writeObjectFile(ObjectFile
.getBinary()->getData(), *DumpFile
);
706 if (Error E
= ObjectFilePath
.takeError()) {
707 return {std::move(E
), std::move(BenchmarkResult
)};
709 outs() << "Check generated assembly with: /usr/bin/objdump -d "
710 << *ObjectFilePath
<< "\n";
713 if (BenchmarkPhaseSelector
< BenchmarkPhaseSelectorE::Measure
) {
714 BenchmarkResult
.Error
= "actual measurements skipped.";
715 return {Error::success(), std::move(BenchmarkResult
)};
718 Expected
<std::unique_ptr
<BenchmarkRunner::FunctionExecutor
>> Executor
=
719 createFunctionExecutor(std::move(ObjectFile
), RC
.BenchmarkResult
.Key
,
720 BenchmarkProcessCPU
);
722 return {Executor
.takeError(), std::move(BenchmarkResult
)};
723 auto NewMeasurements
= runMeasurements(**Executor
);
725 if (Error E
= NewMeasurements
.takeError()) {
726 return {std::move(E
), std::move(BenchmarkResult
)};
728 assert(BenchmarkResult
.MinInstructions
> 0 && "invalid MinInstructions");
729 for (BenchmarkMeasure
&BM
: *NewMeasurements
) {
730 // Scale the measurements by the number of instructions.
731 BM
.PerInstructionValue
/= BenchmarkResult
.MinInstructions
;
732 // Scale the measurements by the number of times the entire snippet is
734 BM
.PerSnippetValue
/=
735 std::ceil(BenchmarkResult
.MinInstructions
/
736 static_cast<double>(BenchmarkResult
.Key
.Instructions
.size()));
738 BenchmarkResult
.Measurements
= std::move(*NewMeasurements
);
740 return {Error::success(), std::move(BenchmarkResult
)};
743 Expected
<std::string
>
744 BenchmarkRunner::writeObjectFile(StringRef Buffer
, StringRef FileName
) const {
746 SmallString
<256> ResultPath
= FileName
;
747 if (Error E
= errorCodeToError(
748 FileName
.empty() ? sys::fs::createTemporaryFile("snippet", "o",
749 ResultFD
, ResultPath
)
750 : sys::fs::openFileForReadWrite(
751 FileName
, ResultFD
, sys::fs::CD_CreateAlways
,
754 raw_fd_ostream
OFS(ResultFD
, true /*ShouldClose*/);
755 OFS
.write(Buffer
.data(), Buffer
.size());
757 return std::string(ResultPath
);
760 static bool EventLessThan(const std::pair
<ValidationEvent
, const char *> LHS
,
761 const ValidationEvent RHS
) {
762 return static_cast<int>(LHS
.first
) < static_cast<int>(RHS
);
765 Error
BenchmarkRunner::getValidationCountersToRun(
766 SmallVector
<const char *> &ValCountersToRun
) const {
767 const PfmCountersInfo
&PCI
= State
.getPfmCounters();
768 ValCountersToRun
.reserve(ValidationCounters
.size());
770 ValCountersToRun
.reserve(ValidationCounters
.size());
771 ArrayRef
TargetValidationEvents(PCI
.ValidationEvents
,
772 PCI
.NumValidationEvents
);
773 for (const ValidationEvent RequestedValEvent
: ValidationCounters
) {
775 lower_bound(TargetValidationEvents
, RequestedValEvent
, EventLessThan
);
776 if (ValCounterIt
== TargetValidationEvents
.end() ||
777 ValCounterIt
->first
!= RequestedValEvent
)
778 return make_error
<Failure
>("Cannot create validation counter");
780 assert(ValCounterIt
->first
== RequestedValEvent
&&
781 "The array of validation events from the target should be sorted");
782 ValCountersToRun
.push_back(ValCounterIt
->second
);
785 return Error::success();
788 BenchmarkRunner::FunctionExecutor::~FunctionExecutor() {}
790 } // namespace exegesis