1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (C) 2015 Davidlohr Bueso.
5 * Block a bunch of threads and let parallel waker threads wakeup an
6 * equal amount of them. The program output reflects the avg latency
7 * for each individual thread to service its share of work. Ultimately
8 * it can be used to measure futex_wake() changes.
11 #include <linux/compiler.h>
12 #include "../util/debug.h"
13 #include "../util/mutex.h"
15 #ifndef HAVE_PTHREAD_BARRIER
16 int bench_futex_wake_parallel(int argc __maybe_unused
, const char **argv __maybe_unused
)
18 pr_err("%s: pthread_barrier_t unavailable, disabling this test...\n", __func__
);
21 #else /* HAVE_PTHREAD_BARRIER */
22 /* For the CLR_() macros */
27 #include "../util/stat.h"
28 #include <subcmd/parse-options.h>
29 #include <linux/kernel.h>
30 #include <linux/time64.h>
33 #include <perf/cpumap.h>
43 struct timeval runtime
;
46 static unsigned int nwakes
= 1;
48 /* all threads will block on the same futex -- hash bucket chaos ;) */
49 static u_int32_t futex
= 0;
51 static pthread_t
*blocked_worker
;
52 static bool done
= false;
53 static struct mutex thread_lock
;
54 static struct cond thread_parent
, thread_worker
;
55 static pthread_barrier_t barrier
;
56 static struct stats waketime_stats
, wakeup_stats
;
57 static unsigned int threads_starting
;
58 static int futex_flag
= 0;
60 static struct bench_futex_parameters params
;
62 static const struct option options
[] = {
63 OPT_UINTEGER('t', "threads", ¶ms
.nthreads
, "Specify amount of threads"),
64 OPT_UINTEGER('w', "nwakers", ¶ms
.nwakes
, "Specify amount of waking threads"),
65 OPT_BOOLEAN( 's', "silent", ¶ms
.silent
, "Silent mode: do not display data/details"),
66 OPT_BOOLEAN( 'S', "shared", ¶ms
.fshared
, "Use shared futexes instead of private ones"),
67 OPT_BOOLEAN( 'm', "mlockall", ¶ms
.mlockall
, "Lock all current and future memory"),
72 static const char * const bench_futex_wake_parallel_usage
[] = {
73 "perf bench futex wake-parallel <options>",
77 static void *waking_workerfn(void *arg
)
79 struct thread_data
*waker
= (struct thread_data
*) arg
;
80 struct timeval start
, end
;
82 pthread_barrier_wait(&barrier
);
84 gettimeofday(&start
, NULL
);
86 waker
->nwoken
= futex_wake(&futex
, nwakes
, futex_flag
);
87 if (waker
->nwoken
!= nwakes
)
88 warnx("couldn't wakeup all tasks (%d/%d)",
89 waker
->nwoken
, nwakes
);
91 gettimeofday(&end
, NULL
);
92 timersub(&end
, &start
, &waker
->runtime
);
98 static void wakeup_threads(struct thread_data
*td
)
101 pthread_attr_t thread_attr
;
103 pthread_attr_init(&thread_attr
);
104 pthread_attr_setdetachstate(&thread_attr
, PTHREAD_CREATE_JOINABLE
);
106 pthread_barrier_init(&barrier
, NULL
, params
.nwakes
+ 1);
108 /* create and block all threads */
109 for (i
= 0; i
< params
.nwakes
; i
++) {
111 * Thread creation order will impact per-thread latency
112 * as it will affect the order to acquire the hb spinlock.
113 * For now let the scheduler decide.
115 if (pthread_create(&td
[i
].worker
, &thread_attr
,
116 waking_workerfn
, (void *)&td
[i
]))
117 err(EXIT_FAILURE
, "pthread_create");
120 pthread_barrier_wait(&barrier
);
122 for (i
= 0; i
< params
.nwakes
; i
++)
123 if (pthread_join(td
[i
].worker
, NULL
))
124 err(EXIT_FAILURE
, "pthread_join");
126 pthread_barrier_destroy(&barrier
);
127 pthread_attr_destroy(&thread_attr
);
130 static void *blocked_workerfn(void *arg __maybe_unused
)
132 mutex_lock(&thread_lock
);
134 if (!threads_starting
)
135 cond_signal(&thread_parent
);
136 cond_wait(&thread_worker
, &thread_lock
);
137 mutex_unlock(&thread_lock
);
139 while (1) { /* handle spurious wakeups */
140 if (futex_wait(&futex
, 0, NULL
, futex_flag
) != EINTR
)
148 static void block_threads(pthread_t
*w
, struct perf_cpu_map
*cpu
)
152 int nrcpus
= cpu__max_cpu().cpu
;
155 threads_starting
= params
.nthreads
;
157 cpuset
= CPU_ALLOC(nrcpus
);
159 size
= CPU_ALLOC_SIZE(nrcpus
);
161 /* create and block all threads */
162 for (i
= 0; i
< params
.nthreads
; i
++) {
163 pthread_attr_t thread_attr
;
165 pthread_attr_init(&thread_attr
);
166 CPU_ZERO_S(size
, cpuset
);
167 CPU_SET_S(perf_cpu_map__cpu(cpu
, i
% perf_cpu_map__nr(cpu
)).cpu
, size
, cpuset
);
169 if (pthread_attr_setaffinity_np(&thread_attr
, size
, cpuset
)) {
171 err(EXIT_FAILURE
, "pthread_attr_setaffinity_np");
174 if (pthread_create(&w
[i
], &thread_attr
, blocked_workerfn
, NULL
)) {
176 err(EXIT_FAILURE
, "pthread_create");
178 pthread_attr_destroy(&thread_attr
);
183 static void print_run(struct thread_data
*waking_worker
, unsigned int run_num
)
185 unsigned int i
, wakeup_avg
;
186 double waketime_avg
, waketime_stddev
;
187 struct stats __waketime_stats
, __wakeup_stats
;
189 init_stats(&__wakeup_stats
);
190 init_stats(&__waketime_stats
);
192 for (i
= 0; i
< params
.nwakes
; i
++) {
193 update_stats(&__waketime_stats
, waking_worker
[i
].runtime
.tv_usec
);
194 update_stats(&__wakeup_stats
, waking_worker
[i
].nwoken
);
197 waketime_avg
= avg_stats(&__waketime_stats
);
198 waketime_stddev
= stddev_stats(&__waketime_stats
);
199 wakeup_avg
= avg_stats(&__wakeup_stats
);
201 printf("[Run %d]: Avg per-thread latency (waking %d/%d threads) "
202 "in %.4f ms (+-%.2f%%)\n", run_num
+ 1, wakeup_avg
,
203 params
.nthreads
, waketime_avg
/ USEC_PER_MSEC
,
204 rel_stddev_stats(waketime_stddev
, waketime_avg
));
207 static void print_summary(void)
209 unsigned int wakeup_avg
;
210 double waketime_avg
, waketime_stddev
;
212 waketime_avg
= avg_stats(&waketime_stats
);
213 waketime_stddev
= stddev_stats(&waketime_stats
);
214 wakeup_avg
= avg_stats(&wakeup_stats
);
216 printf("Avg per-thread latency (waking %d/%d threads) in %.4f ms (+-%.2f%%)\n",
219 waketime_avg
/ USEC_PER_MSEC
,
220 rel_stddev_stats(waketime_stddev
, waketime_avg
));
224 static void do_run_stats(struct thread_data
*waking_worker
)
228 for (i
= 0; i
< params
.nwakes
; i
++) {
229 update_stats(&waketime_stats
, waking_worker
[i
].runtime
.tv_usec
);
230 update_stats(&wakeup_stats
, waking_worker
[i
].nwoken
);
235 static void toggle_done(int sig __maybe_unused
,
236 siginfo_t
*info __maybe_unused
,
237 void *uc __maybe_unused
)
242 int bench_futex_wake_parallel(int argc
, const char **argv
)
246 struct sigaction act
;
247 struct thread_data
*waking_worker
;
248 struct perf_cpu_map
*cpu
;
250 argc
= parse_options(argc
, argv
, options
,
251 bench_futex_wake_parallel_usage
, 0);
253 usage_with_options(bench_futex_wake_parallel_usage
, options
);
257 memset(&act
, 0, sizeof(act
));
258 sigfillset(&act
.sa_mask
);
259 act
.sa_sigaction
= toggle_done
;
260 sigaction(SIGINT
, &act
, NULL
);
262 if (params
.mlockall
) {
263 if (mlockall(MCL_CURRENT
| MCL_FUTURE
))
264 err(EXIT_FAILURE
, "mlockall");
267 cpu
= perf_cpu_map__new_online_cpus();
269 err(EXIT_FAILURE
, "calloc");
271 if (!params
.nthreads
)
272 params
.nthreads
= perf_cpu_map__nr(cpu
);
274 /* some sanity checks */
275 if (params
.nwakes
> params
.nthreads
||
277 params
.nwakes
= params
.nthreads
;
279 if (params
.nthreads
% params
.nwakes
)
280 errx(EXIT_FAILURE
, "Must be perfectly divisible");
282 * Each thread will wakeup nwakes tasks in
283 * a single futex_wait call.
285 nwakes
= params
.nthreads
/params
.nwakes
;
287 blocked_worker
= calloc(params
.nthreads
, sizeof(*blocked_worker
));
289 err(EXIT_FAILURE
, "calloc");
292 futex_flag
= FUTEX_PRIVATE_FLAG
;
294 printf("Run summary [PID %d]: blocking on %d threads (at [%s] "
295 "futex %p), %d threads waking up %d at a time.\n\n",
296 getpid(), params
.nthreads
, params
.fshared
? "shared":"private",
297 &futex
, params
.nwakes
, nwakes
);
299 init_stats(&wakeup_stats
);
300 init_stats(&waketime_stats
);
302 mutex_init(&thread_lock
);
303 cond_init(&thread_parent
);
304 cond_init(&thread_worker
);
306 for (j
= 0; j
< bench_repeat
&& !done
; j
++) {
307 waking_worker
= calloc(params
.nwakes
, sizeof(*waking_worker
));
309 err(EXIT_FAILURE
, "calloc");
311 /* create, launch & block all threads */
312 block_threads(blocked_worker
, cpu
);
314 /* make sure all threads are already blocked */
315 mutex_lock(&thread_lock
);
316 while (threads_starting
)
317 cond_wait(&thread_parent
, &thread_lock
);
318 cond_broadcast(&thread_worker
);
319 mutex_unlock(&thread_lock
);
323 /* Ok, all threads are patiently blocked, start waking folks up */
324 wakeup_threads(waking_worker
);
326 for (i
= 0; i
< params
.nthreads
; i
++) {
327 ret
= pthread_join(blocked_worker
[i
], NULL
);
329 err(EXIT_FAILURE
, "pthread_join");
332 do_run_stats(waking_worker
);
334 print_run(waking_worker
, j
);
339 /* cleanup & report results */
340 cond_destroy(&thread_parent
);
341 cond_destroy(&thread_worker
);
342 mutex_destroy(&thread_lock
);
346 free(blocked_worker
);
347 perf_cpu_map__put(cpu
);
350 #endif /* HAVE_PTHREAD_BARRIER */