1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2020 Facebook */
5 #include <linux/compiler.h>
10 #include <sys/sysinfo.h>
11 #include <sys/resource.h>
14 #include "testing_helpers.h"
24 static int libbpf_print_fn(enum libbpf_print_level level
,
25 const char *format
, va_list args
)
27 if (level
== LIBBPF_DEBUG
&& !env
.verbose
)
29 return vfprintf(stderr
, format
, args
);
32 static int bump_memlock_rlimit(void)
34 struct rlimit rlim_new
= {
35 .rlim_cur
= RLIM_INFINITY
,
36 .rlim_max
= RLIM_INFINITY
,
39 return setrlimit(RLIMIT_MEMLOCK
, &rlim_new
);
46 libbpf_set_print(libbpf_print_fn
);
48 err
= bump_memlock_rlimit();
50 fprintf(stderr
, "failed to increase RLIMIT_MEMLOCK: %d", err
);
53 void hits_drops_report_progress(int iter
, struct bench_res
*res
, long delta_ns
)
55 double hits_per_sec
, drops_per_sec
;
58 hits_per_sec
= res
->hits
/ 1000000.0 / (delta_ns
/ 1000000000.0);
59 hits_per_prod
= hits_per_sec
/ env
.producer_cnt
;
60 drops_per_sec
= res
->drops
/ 1000000.0 / (delta_ns
/ 1000000000.0);
62 printf("Iter %3d (%7.3lfus): ",
63 iter
, (delta_ns
- 1000000000) / 1000.0);
65 printf("hits %8.3lfM/s (%7.3lfM/prod), drops %8.3lfM/s\n",
66 hits_per_sec
, hits_per_prod
, drops_per_sec
);
69 void hits_drops_report_final(struct bench_res res
[], int res_cnt
)
72 double hits_mean
= 0.0, drops_mean
= 0.0;
73 double hits_stddev
= 0.0, drops_stddev
= 0.0;
75 for (i
= 0; i
< res_cnt
; i
++) {
76 hits_mean
+= res
[i
].hits
/ 1000000.0 / (0.0 + res_cnt
);
77 drops_mean
+= res
[i
].drops
/ 1000000.0 / (0.0 + res_cnt
);
81 for (i
= 0; i
< res_cnt
; i
++) {
82 hits_stddev
+= (hits_mean
- res
[i
].hits
/ 1000000.0) *
83 (hits_mean
- res
[i
].hits
/ 1000000.0) /
85 drops_stddev
+= (drops_mean
- res
[i
].drops
/ 1000000.0) *
86 (drops_mean
- res
[i
].drops
/ 1000000.0) /
89 hits_stddev
= sqrt(hits_stddev
);
90 drops_stddev
= sqrt(drops_stddev
);
92 printf("Summary: hits %8.3lf \u00B1 %5.3lfM/s (%7.3lfM/prod), ",
93 hits_mean
, hits_stddev
, hits_mean
/ env
.producer_cnt
);
94 printf("drops %8.3lf \u00B1 %5.3lfM/s\n",
95 drops_mean
, drops_stddev
);
98 const char *argp_program_version
= "benchmark";
99 const char *argp_program_bug_address
= "<bpf@vger.kernel.org>";
100 const char argp_program_doc
[] =
101 "benchmark Generic benchmarking framework.\n"
103 "This tool runs benchmarks.\n"
105 "USAGE: benchmark <bench-name>\n"
108 " # run 'count-local' benchmark with 1 producer and 1 consumer\n"
109 " benchmark count-local\n"
110 " # run 'count-local' with 16 producer and 8 consumer thread, pinned to CPUs\n"
111 " benchmark -p16 -c8 -a count-local\n";
114 ARG_PROD_AFFINITY_SET
= 1000,
115 ARG_CONS_AFFINITY_SET
= 1001,
118 static const struct argp_option opts
[] = {
119 { "list", 'l', NULL
, 0, "List available benchmarks"},
120 { "duration", 'd', "SEC", 0, "Duration of benchmark, seconds"},
121 { "warmup", 'w', "SEC", 0, "Warm-up period, seconds"},
122 { "producers", 'p', "NUM", 0, "Number of producer threads"},
123 { "consumers", 'c', "NUM", 0, "Number of consumer threads"},
124 { "verbose", 'v', NULL
, 0, "Verbose debug output"},
125 { "affinity", 'a', NULL
, 0, "Set consumer/producer thread affinity"},
126 { "prod-affinity", ARG_PROD_AFFINITY_SET
, "CPUSET", 0,
127 "Set of CPUs for producer threads; implies --affinity"},
128 { "cons-affinity", ARG_CONS_AFFINITY_SET
, "CPUSET", 0,
129 "Set of CPUs for consumer threads; implies --affinity"},
133 extern struct argp bench_ringbufs_argp
;
135 static const struct argp_child bench_parsers
[] = {
136 { &bench_ringbufs_argp
, 0, "Ring buffers benchmark", 0 },
140 static error_t
parse_arg(int key
, char *arg
, struct argp_state
*state
)
152 env
.duration_sec
= strtol(arg
, NULL
, 10);
153 if (env
.duration_sec
<= 0) {
154 fprintf(stderr
, "Invalid duration: %s\n", arg
);
159 env
.warmup_sec
= strtol(arg
, NULL
, 10);
160 if (env
.warmup_sec
<= 0) {
161 fprintf(stderr
, "Invalid warm-up duration: %s\n", arg
);
166 env
.producer_cnt
= strtol(arg
, NULL
, 10);
167 if (env
.producer_cnt
<= 0) {
168 fprintf(stderr
, "Invalid producer count: %s\n", arg
);
173 env
.consumer_cnt
= strtol(arg
, NULL
, 10);
174 if (env
.consumer_cnt
<= 0) {
175 fprintf(stderr
, "Invalid consumer count: %s\n", arg
);
182 case ARG_PROD_AFFINITY_SET
:
184 if (parse_num_list(arg
, &env
.prod_cpus
.cpus
,
185 &env
.prod_cpus
.cpus_len
)) {
186 fprintf(stderr
, "Invalid format of CPU set for producers.");
190 case ARG_CONS_AFFINITY_SET
:
192 if (parse_num_list(arg
, &env
.cons_cpus
.cpus
,
193 &env
.cons_cpus
.cpus_len
)) {
194 fprintf(stderr
, "Invalid format of CPU set for consumers.");
201 "Unrecognized positional argument: %s\n", arg
);
204 env
.bench_name
= strdup(arg
);
207 return ARGP_ERR_UNKNOWN
;
212 static void parse_cmdline_args(int argc
, char **argv
)
214 static const struct argp argp
= {
217 .doc
= argp_program_doc
,
218 .children
= bench_parsers
,
220 if (argp_parse(&argp
, argc
, argv
, 0, NULL
, NULL
))
222 if (!env
.list
&& !env
.bench_name
) {
223 argp_help(&argp
, stderr
, ARGP_HELP_DOC
, "bench");
228 static void collect_measurements(long delta_ns
);
230 static __u64 last_time_ns
;
231 static void sigalarm_handler(int signo
)
233 long new_time_ns
= get_time_ns();
234 long delta_ns
= new_time_ns
- last_time_ns
;
236 collect_measurements(delta_ns
);
238 last_time_ns
= new_time_ns
;
241 /* set up periodic 1-second timer */
242 static void setup_timer()
244 static struct sigaction sigalarm_action
= {
245 .sa_handler
= sigalarm_handler
,
247 struct itimerval timer_settings
= {};
250 last_time_ns
= get_time_ns();
251 err
= sigaction(SIGALRM
, &sigalarm_action
, NULL
);
253 fprintf(stderr
, "failed to install SIGALRM handler: %d\n", -errno
);
256 timer_settings
.it_interval
.tv_sec
= 1;
257 timer_settings
.it_value
.tv_sec
= 1;
258 err
= setitimer(ITIMER_REAL
, &timer_settings
, NULL
);
260 fprintf(stderr
, "failed to arm interval timer: %d\n", -errno
);
265 static void set_thread_affinity(pthread_t thread
, int cpu
)
270 CPU_SET(cpu
, &cpuset
);
271 if (pthread_setaffinity_np(thread
, sizeof(cpuset
), &cpuset
)) {
272 fprintf(stderr
, "setting affinity to CPU #%d failed: %d\n",
278 static int next_cpu(struct cpu_set
*cpu_set
)
283 /* find next available CPU */
284 for (i
= cpu_set
->next_cpu
; i
< cpu_set
->cpus_len
; i
++) {
285 if (cpu_set
->cpus
[i
]) {
286 cpu_set
->next_cpu
= i
+ 1;
290 fprintf(stderr
, "Not enough CPUs specified, need CPU #%d or higher.\n", i
);
294 return cpu_set
->next_cpu
++;
297 static struct bench_state
{
299 struct bench_res
*results
;
300 pthread_t
*consumers
;
301 pthread_t
*producers
;
304 const struct bench
*bench
= NULL
;
306 extern const struct bench bench_count_global
;
307 extern const struct bench bench_count_local
;
308 extern const struct bench bench_rename_base
;
309 extern const struct bench bench_rename_kprobe
;
310 extern const struct bench bench_rename_kretprobe
;
311 extern const struct bench bench_rename_rawtp
;
312 extern const struct bench bench_rename_fentry
;
313 extern const struct bench bench_rename_fexit
;
314 extern const struct bench bench_trig_base
;
315 extern const struct bench bench_trig_tp
;
316 extern const struct bench bench_trig_rawtp
;
317 extern const struct bench bench_trig_kprobe
;
318 extern const struct bench bench_trig_fentry
;
319 extern const struct bench bench_trig_fentry_sleep
;
320 extern const struct bench bench_trig_fmodret
;
321 extern const struct bench bench_rb_libbpf
;
322 extern const struct bench bench_rb_custom
;
323 extern const struct bench bench_pb_libbpf
;
324 extern const struct bench bench_pb_custom
;
326 static const struct bench
*benchs
[] = {
330 &bench_rename_kprobe
,
331 &bench_rename_kretprobe
,
333 &bench_rename_fentry
,
340 &bench_trig_fentry_sleep
,
348 static void setup_benchmark()
352 if (!env
.bench_name
) {
353 fprintf(stderr
, "benchmark name is not specified\n");
357 for (i
= 0; i
< ARRAY_SIZE(benchs
); i
++) {
358 if (strcmp(benchs
[i
]->name
, env
.bench_name
) == 0) {
364 fprintf(stderr
, "benchmark '%s' not found\n", env
.bench_name
);
368 printf("Setting up benchmark '%s'...\n", bench
->name
);
370 state
.producers
= calloc(env
.producer_cnt
, sizeof(*state
.producers
));
371 state
.consumers
= calloc(env
.consumer_cnt
, sizeof(*state
.consumers
));
372 state
.results
= calloc(env
.duration_sec
+ env
.warmup_sec
+ 2,
373 sizeof(*state
.results
));
374 if (!state
.producers
|| !state
.consumers
|| !state
.results
)
382 for (i
= 0; i
< env
.consumer_cnt
; i
++) {
383 err
= pthread_create(&state
.consumers
[i
], NULL
,
384 bench
->consumer_thread
, (void *)(long)i
);
386 fprintf(stderr
, "failed to create consumer thread #%d: %d\n",
391 set_thread_affinity(state
.consumers
[i
],
392 next_cpu(&env
.cons_cpus
));
395 /* unless explicit producer CPU list is specified, continue after
398 if (!env
.prod_cpus
.cpus
)
399 env
.prod_cpus
.next_cpu
= env
.cons_cpus
.next_cpu
;
401 for (i
= 0; i
< env
.producer_cnt
; i
++) {
402 err
= pthread_create(&state
.producers
[i
], NULL
,
403 bench
->producer_thread
, (void *)(long)i
);
405 fprintf(stderr
, "failed to create producer thread #%d: %d\n",
410 set_thread_affinity(state
.producers
[i
],
411 next_cpu(&env
.prod_cpus
));
414 printf("Benchmark '%s' started.\n", bench
->name
);
417 static pthread_mutex_t bench_done_mtx
= PTHREAD_MUTEX_INITIALIZER
;
418 static pthread_cond_t bench_done
= PTHREAD_COND_INITIALIZER
;
420 static void collect_measurements(long delta_ns
) {
421 int iter
= state
.res_cnt
++;
422 struct bench_res
*res
= &state
.results
[iter
];
426 if (bench
->report_progress
)
427 bench
->report_progress(iter
, res
, delta_ns
);
429 if (iter
== env
.duration_sec
+ env
.warmup_sec
) {
430 pthread_mutex_lock(&bench_done_mtx
);
431 pthread_cond_signal(&bench_done
);
432 pthread_mutex_unlock(&bench_done_mtx
);
436 int main(int argc
, char **argv
)
438 parse_cmdline_args(argc
, argv
);
443 printf("Available benchmarks:\n");
444 for (i
= 0; i
< ARRAY_SIZE(benchs
); i
++) {
445 printf("- %s\n", benchs
[i
]->name
);
454 pthread_mutex_lock(&bench_done_mtx
);
455 pthread_cond_wait(&bench_done
, &bench_done_mtx
);
456 pthread_mutex_unlock(&bench_done_mtx
);
458 if (bench
->report_final
)
459 /* skip first sample */
460 bench
->report_final(state
.results
+ env
.warmup_sec
,
461 state
.res_cnt
- env
.warmup_sec
);