1 // SPDX-License-Identifier: GPL-2.0
3 * Benchmark synthesis of perf events such as at the start of a 'perf
4 * record'. Synthesis is done on the current process and the 'dummy' event
5 * handlers are invoked that support dump_trace but otherwise do nothing.
7 * Copyright 2019 Google LLC.
11 #include "../util/debug.h"
12 #include "../util/session.h"
13 #include "../util/stat.h"
14 #include "../util/synthetic-events.h"
15 #include "../util/target.h"
16 #include "../util/thread_map.h"
17 #include "../util/tool.h"
18 #include "../util/util.h"
19 #include <linux/atomic.h>
20 #include <linux/err.h>
21 #include <linux/time64.h>
22 #include <subcmd/parse-options.h>
24 static unsigned int min_threads
= 1;
25 static unsigned int max_threads
= UINT_MAX
;
26 static unsigned int single_iterations
= 10000;
27 static unsigned int multi_iterations
= 10;
31 static const struct option options
[] = {
32 OPT_BOOLEAN('s', "st", &run_st
, "Run single threaded benchmark"),
33 OPT_BOOLEAN('t', "mt", &run_mt
, "Run multi-threaded benchmark"),
34 OPT_UINTEGER('m', "min-threads", &min_threads
,
35 "Minimum number of threads in multithreaded bench"),
36 OPT_UINTEGER('M', "max-threads", &max_threads
,
37 "Maximum number of threads in multithreaded bench"),
38 OPT_UINTEGER('i', "single-iterations", &single_iterations
,
39 "Number of iterations used to compute single-threaded average"),
40 OPT_UINTEGER('I', "multi-iterations", &multi_iterations
,
41 "Number of iterations used to compute multi-threaded average"),
45 static const char *const bench_usage
[] = {
46 "perf bench internals synthesize <options>",
50 static atomic_t event_count
;
52 static int process_synthesized_event(struct perf_tool
*tool __maybe_unused
,
53 union perf_event
*event __maybe_unused
,
54 struct perf_sample
*sample __maybe_unused
,
55 struct machine
*machine __maybe_unused
)
57 atomic_inc(&event_count
);
61 static int do_run_single_threaded(struct perf_session
*session
,
62 struct perf_thread_map
*threads
,
63 struct target
*target
, bool data_mmap
)
65 const unsigned int nr_threads_synthesize
= 1;
66 struct timeval start
, end
, diff
;
69 double time_average
, time_stddev
, event_average
, event_stddev
;
71 struct stats time_stats
, event_stats
;
73 init_stats(&time_stats
);
74 init_stats(&event_stats
);
76 for (i
= 0; i
< single_iterations
; i
++) {
77 atomic_set(&event_count
, 0);
78 gettimeofday(&start
, NULL
);
79 err
= __machine__synthesize_threads(&session
->machines
.host
,
82 process_synthesized_event
,
84 nr_threads_synthesize
);
88 gettimeofday(&end
, NULL
);
89 timersub(&end
, &start
, &diff
);
90 runtime_us
= diff
.tv_sec
* USEC_PER_SEC
+ diff
.tv_usec
;
91 update_stats(&time_stats
, runtime_us
);
92 update_stats(&event_stats
, atomic_read(&event_count
));
95 time_average
= avg_stats(&time_stats
);
96 time_stddev
= stddev_stats(&time_stats
);
97 printf(" Average %ssynthesis took: %.3f usec (+- %.3f usec)\n",
98 data_mmap
? "data " : "", time_average
, time_stddev
);
100 event_average
= avg_stats(&event_stats
);
101 event_stddev
= stddev_stats(&event_stats
);
102 printf(" Average num. events: %.3f (+- %.3f)\n",
103 event_average
, event_stddev
);
105 printf(" Average time per event %.3f usec\n",
106 time_average
/ event_average
);
110 static int run_single_threaded(void)
112 struct perf_session
*session
;
113 struct target target
= {
116 struct perf_thread_map
*threads
;
119 perf_set_singlethreaded();
120 session
= perf_session__new(NULL
, false, NULL
);
121 if (IS_ERR(session
)) {
122 pr_err("Session creation failed.\n");
123 return PTR_ERR(session
);
125 threads
= thread_map__new_by_pid(getpid());
127 pr_err("Thread map creation failed.\n");
133 "Computing performance of single threaded perf event synthesis by\n"
134 "synthesizing events on the perf process itself:");
136 err
= do_run_single_threaded(session
, threads
, &target
, false);
140 err
= do_run_single_threaded(session
, threads
, &target
, true);
144 perf_thread_map__put(threads
);
146 perf_session__delete(session
);
150 static int do_run_multi_threaded(struct target
*target
,
151 unsigned int nr_threads_synthesize
)
153 struct timeval start
, end
, diff
;
156 double time_average
, time_stddev
, event_average
, event_stddev
;
158 struct stats time_stats
, event_stats
;
159 struct perf_session
*session
;
161 init_stats(&time_stats
);
162 init_stats(&event_stats
);
163 for (i
= 0; i
< multi_iterations
; i
++) {
164 session
= perf_session__new(NULL
, false, NULL
);
166 return PTR_ERR(session
);
168 atomic_set(&event_count
, 0);
169 gettimeofday(&start
, NULL
);
170 err
= __machine__synthesize_threads(&session
->machines
.host
,
173 process_synthesized_event
,
175 nr_threads_synthesize
);
177 perf_session__delete(session
);
181 gettimeofday(&end
, NULL
);
182 timersub(&end
, &start
, &diff
);
183 runtime_us
= diff
.tv_sec
* USEC_PER_SEC
+ diff
.tv_usec
;
184 update_stats(&time_stats
, runtime_us
);
185 update_stats(&event_stats
, atomic_read(&event_count
));
186 perf_session__delete(session
);
189 time_average
= avg_stats(&time_stats
);
190 time_stddev
= stddev_stats(&time_stats
);
191 printf(" Average synthesis took: %.3f usec (+- %.3f usec)\n",
192 time_average
, time_stddev
);
194 event_average
= avg_stats(&event_stats
);
195 event_stddev
= stddev_stats(&event_stats
);
196 printf(" Average num. events: %.3f (+- %.3f)\n",
197 event_average
, event_stddev
);
199 printf(" Average time per event %.3f usec\n",
200 time_average
/ event_average
);
204 static int run_multi_threaded(void)
206 struct target target
= {
209 unsigned int nr_threads_synthesize
;
212 if (max_threads
== UINT_MAX
)
213 max_threads
= sysconf(_SC_NPROCESSORS_ONLN
);
216 "Computing performance of multi threaded perf event synthesis by\n"
217 "synthesizing events on CPU 0:");
219 for (nr_threads_synthesize
= min_threads
;
220 nr_threads_synthesize
<= max_threads
;
221 nr_threads_synthesize
++) {
222 if (nr_threads_synthesize
== 1)
223 perf_set_singlethreaded();
225 perf_set_multithreaded();
227 printf(" Number of synthesis threads: %u\n",
228 nr_threads_synthesize
);
230 err
= do_run_multi_threaded(&target
, nr_threads_synthesize
);
234 perf_set_singlethreaded();
238 int bench_synthesize(int argc
, const char **argv
)
242 argc
= parse_options(argc
, argv
, options
, bench_usage
, 0);
244 usage_with_options(bench_usage
, options
);
249 * If neither single threaded or multi-threaded are specified, default
250 * to running just single threaded.
252 if (!run_st
&& !run_mt
)
256 err
= run_single_threaded();
259 err
= run_multi_threaded();