1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (C) 2013 Davidlohr Bueso <davidlohr@hp.com>
5 * futex-requeue: Block a bunch of threads on futex1 and requeue them
6 * on futex2, N at a time.
8 * This program is particularly useful to measure the latency of nthread
9 * requeues without waking up any tasks (in the non-pi case) -- thus
10 * mimicking a regular futex_wait.
13 /* For the CLR_() macros */
18 #include "../util/mutex.h"
19 #include "../util/stat.h"
20 #include <subcmd/parse-options.h>
21 #include <linux/compiler.h>
22 #include <linux/kernel.h>
23 #include <linux/time64.h>
25 #include <perf/cpumap.h>
34 static u_int32_t futex1
= 0, futex2
= 0;
36 static pthread_t
*worker
;
37 static bool done
= false;
38 static struct mutex thread_lock
;
39 static struct cond thread_parent
, thread_worker
;
40 static struct stats requeuetime_stats
, requeued_stats
;
41 static unsigned int threads_starting
;
42 static int futex_flag
= 0;
44 static struct bench_futex_parameters params
= {
46 * How many tasks to requeue at a time.
47 * Default to 1 in order to make the kernel work more.
52 static const struct option options
[] = {
53 OPT_UINTEGER('t', "threads", ¶ms
.nthreads
, "Specify amount of threads"),
54 OPT_UINTEGER('q', "nrequeue", ¶ms
.nrequeue
, "Specify amount of threads to requeue at once"),
55 OPT_BOOLEAN( 's', "silent", ¶ms
.silent
, "Silent mode: do not display data/details"),
56 OPT_BOOLEAN( 'S', "shared", ¶ms
.fshared
, "Use shared futexes instead of private ones"),
57 OPT_BOOLEAN( 'm', "mlockall", ¶ms
.mlockall
, "Lock all current and future memory"),
58 OPT_BOOLEAN( 'B', "broadcast", ¶ms
.broadcast
, "Requeue all threads at once"),
59 OPT_BOOLEAN( 'p', "pi", ¶ms
.pi
, "Use PI-aware variants of FUTEX_CMP_REQUEUE"),
64 static const char * const bench_futex_requeue_usage
[] = {
65 "perf bench futex requeue <options>",
69 static void print_summary(void)
71 double requeuetime_avg
= avg_stats(&requeuetime_stats
);
72 double requeuetime_stddev
= stddev_stats(&requeuetime_stats
);
73 unsigned int requeued_avg
= avg_stats(&requeued_stats
);
75 printf("Requeued %d of %d threads in %.4f ms (+-%.2f%%)\n",
78 requeuetime_avg
/ USEC_PER_MSEC
,
79 rel_stddev_stats(requeuetime_stddev
, requeuetime_avg
));
82 static void *workerfn(void *arg __maybe_unused
)
86 mutex_lock(&thread_lock
);
88 if (!threads_starting
)
89 cond_signal(&thread_parent
);
90 cond_wait(&thread_worker
, &thread_lock
);
91 mutex_unlock(&thread_lock
);
95 ret
= futex_wait(&futex1
, 0, NULL
, futex_flag
);
99 if (ret
&& errno
!= EAGAIN
) {
105 ret
= futex_wait_requeue_pi(&futex1
, 0, &futex2
,
108 /* got the lock at futex2 */
109 futex_unlock_pi(&futex2
, futex_flag
);
113 if (ret
&& errno
!= EAGAIN
) {
115 warnx("futex_wait_requeue_pi");
124 static void block_threads(pthread_t
*w
, struct perf_cpu_map
*cpu
)
128 int nrcpus
= cpu__max_cpu().cpu
;
131 threads_starting
= params
.nthreads
;
133 cpuset
= CPU_ALLOC(nrcpus
);
135 size
= CPU_ALLOC_SIZE(nrcpus
);
137 /* create and block all threads */
138 for (i
= 0; i
< params
.nthreads
; i
++) {
139 pthread_attr_t thread_attr
;
141 pthread_attr_init(&thread_attr
);
142 CPU_ZERO_S(size
, cpuset
);
143 CPU_SET_S(perf_cpu_map__cpu(cpu
, i
% perf_cpu_map__nr(cpu
)).cpu
, size
, cpuset
);
145 if (pthread_attr_setaffinity_np(&thread_attr
, size
, cpuset
)) {
147 err(EXIT_FAILURE
, "pthread_attr_setaffinity_np");
150 if (pthread_create(&w
[i
], &thread_attr
, workerfn
, NULL
)) {
152 err(EXIT_FAILURE
, "pthread_create");
154 pthread_attr_destroy(&thread_attr
);
159 static void toggle_done(int sig __maybe_unused
,
160 siginfo_t
*info __maybe_unused
,
161 void *uc __maybe_unused
)
166 int bench_futex_requeue(int argc
, const char **argv
)
170 struct sigaction act
;
171 struct perf_cpu_map
*cpu
;
173 argc
= parse_options(argc
, argv
, options
, bench_futex_requeue_usage
, 0);
177 cpu
= perf_cpu_map__new_online_cpus();
179 err(EXIT_FAILURE
, "cpu_map__new");
181 memset(&act
, 0, sizeof(act
));
182 sigfillset(&act
.sa_mask
);
183 act
.sa_sigaction
= toggle_done
;
184 sigaction(SIGINT
, &act
, NULL
);
186 if (params
.mlockall
) {
187 if (mlockall(MCL_CURRENT
| MCL_FUTURE
))
188 err(EXIT_FAILURE
, "mlockall");
191 if (!params
.nthreads
)
192 params
.nthreads
= perf_cpu_map__nr(cpu
);
194 worker
= calloc(params
.nthreads
, sizeof(*worker
));
196 err(EXIT_FAILURE
, "calloc");
199 futex_flag
= FUTEX_PRIVATE_FLAG
;
201 if (params
.nrequeue
> params
.nthreads
)
202 params
.nrequeue
= params
.nthreads
;
204 if (params
.broadcast
)
205 params
.nrequeue
= params
.nthreads
;
207 printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %s%p), "
208 "%d at a time.\n\n", getpid(), params
.nthreads
,
209 params
.fshared
? "shared":"private", &futex1
,
210 params
.pi
? "PI ": "", &futex2
, params
.nrequeue
);
212 init_stats(&requeued_stats
);
213 init_stats(&requeuetime_stats
);
214 mutex_init(&thread_lock
);
215 cond_init(&thread_parent
);
216 cond_init(&thread_worker
);
218 for (j
= 0; j
< bench_repeat
&& !done
; j
++) {
219 unsigned int nrequeued
= 0, wakeups
= 0;
220 struct timeval start
, end
, runtime
;
222 /* create, launch & block all threads */
223 block_threads(worker
, cpu
);
225 /* make sure all threads are already blocked */
226 mutex_lock(&thread_lock
);
227 while (threads_starting
)
228 cond_wait(&thread_parent
, &thread_lock
);
229 cond_broadcast(&thread_worker
);
230 mutex_unlock(&thread_lock
);
234 /* Ok, all threads are patiently blocked, start requeueing */
235 gettimeofday(&start
, NULL
);
236 while (nrequeued
< params
.nthreads
) {
240 * For the regular non-pi case, do not wakeup any tasks
241 * blocked on futex1, allowing us to really measure
242 * futex_wait functionality. For the PI case the first
243 * waiter is always awoken.
246 r
= futex_cmp_requeue(&futex1
, 0, &futex2
, 0,
250 r
= futex_cmp_requeue_pi(&futex1
, 0, &futex2
,
253 wakeups
++; /* assume no error */
257 err(EXIT_FAILURE
, "couldn't requeue from %p to %p",
263 gettimeofday(&end
, NULL
);
264 timersub(&end
, &start
, &runtime
);
266 update_stats(&requeued_stats
, nrequeued
);
267 update_stats(&requeuetime_stats
, runtime
.tv_usec
);
269 if (!params
.silent
) {
271 printf("[Run %d]: Requeued %d of %d threads in "
272 "%.4f ms\n", j
+ 1, nrequeued
,
274 runtime
.tv_usec
/ (double)USEC_PER_MSEC
);
276 nrequeued
-= wakeups
;
277 printf("[Run %d]: Awoke and Requeued (%d+%d) of "
278 "%d threads in %.4f ms\n",
279 j
+ 1, wakeups
, nrequeued
,
281 runtime
.tv_usec
/ (double)USEC_PER_MSEC
);
287 /* everybody should be blocked on futex2, wake'em up */
288 nrequeued
= futex_wake(&futex2
, nrequeued
, futex_flag
);
289 if (params
.nthreads
!= nrequeued
)
290 warnx("couldn't wakeup all tasks (%d/%d)",
291 nrequeued
, params
.nthreads
);
294 for (i
= 0; i
< params
.nthreads
; i
++) {
295 ret
= pthread_join(worker
[i
], NULL
);
297 err(EXIT_FAILURE
, "pthread_join");
301 /* cleanup & report results */
302 cond_destroy(&thread_parent
);
303 cond_destroy(&thread_worker
);
304 mutex_destroy(&thread_lock
);
309 perf_cpu_map__put(cpu
);
312 usage_with_options(bench_futex_requeue_usage
, options
);