1 // SPDX-License-Identifier: GPL-2.0+
3 // Scalability test comparing RCU vs other mechanisms
4 // for acquiring references on objects.
6 // Copyright (C) Google, 2020.
8 // Author: Joel Fernandes <joel@joelfernandes.org>
10 #define pr_fmt(fmt) fmt
12 #include <linux/atomic.h>
13 #include <linux/bitops.h>
14 #include <linux/completion.h>
15 #include <linux/cpu.h>
16 #include <linux/delay.h>
17 #include <linux/err.h>
18 #include <linux/init.h>
19 #include <linux/interrupt.h>
20 #include <linux/kthread.h>
21 #include <linux/kernel.h>
23 #include <linux/module.h>
24 #include <linux/moduleparam.h>
25 #include <linux/notifier.h>
26 #include <linux/percpu.h>
27 #include <linux/rcupdate.h>
28 #include <linux/rcupdate_trace.h>
29 #include <linux/reboot.h>
30 #include <linux/sched.h>
31 #include <linux/spinlock.h>
32 #include <linux/smp.h>
33 #include <linux/stat.h>
34 #include <linux/srcu.h>
35 #include <linux/slab.h>
36 #include <linux/torture.h>
37 #include <linux/types.h>
41 #define SCALE_FLAG "-ref-scale: "
43 #define SCALEOUT(s, x...) \
44 pr_alert("%s" SCALE_FLAG s, scale_type, ## x)
46 #define VERBOSE_SCALEOUT(s, x...) \
47 do { if (verbose) pr_alert("%s" SCALE_FLAG s, scale_type, ## x); } while (0)
49 #define VERBOSE_SCALEOUT_ERRSTRING(s, x...) \
50 do { if (verbose) pr_alert("%s" SCALE_FLAG "!!! " s, scale_type, ## x); } while (0)
52 MODULE_LICENSE("GPL");
53 MODULE_AUTHOR("Joel Fernandes (Google) <joel@joelfernandes.org>");
55 static char *scale_type
= "rcu";
56 module_param(scale_type
, charp
, 0444);
57 MODULE_PARM_DESC(scale_type
, "Type of test (rcu, srcu, refcnt, rwsem, rwlock.");
59 torture_param(int, verbose
, 0, "Enable verbose debugging printk()s");
61 // Wait until there are multiple CPUs before starting test.
62 torture_param(int, holdoff
, IS_BUILTIN(CONFIG_RCU_REF_SCALE_TEST
) ? 10 : 0,
63 "Holdoff time before test start (s)");
64 // Number of loops per experiment, all readers execute operations concurrently.
65 torture_param(long, loops
, 10000, "Number of loops per experiment.");
66 // Number of readers, with -1 defaulting to about 75% of the CPUs.
67 torture_param(int, nreaders
, -1, "Number of readers, -1 for 75% of CPUs.");
69 torture_param(int, nruns
, 30, "Number of experiments to run.");
70 // Reader delay in nanoseconds, 0 for no delay.
71 torture_param(int, readdelay
, 0, "Read-side delay in nanoseconds.");
74 # define REFSCALE_SHUTDOWN 0
76 # define REFSCALE_SHUTDOWN 1
79 torture_param(bool, shutdown
, REFSCALE_SHUTDOWN
,
80 "Shutdown at end of scalability tests.");
83 struct task_struct
*task
;
89 static struct task_struct
*shutdown_task
;
90 static wait_queue_head_t shutdown_wq
;
92 static struct task_struct
*main_task
;
93 static wait_queue_head_t main_wq
;
94 static int shutdown_start
;
96 static struct reader_task
*reader_tasks
;
98 // Number of readers that are part of the current experiment.
99 static atomic_t nreaders_exp
;
101 // Use to wait for all threads to start.
102 static atomic_t n_init
;
103 static atomic_t n_started
;
104 static atomic_t n_warmedup
;
105 static atomic_t n_cooleddown
;
107 // Track which experiment is currently running.
110 // Operations vector for selecting different types of tests.
111 struct ref_scale_ops
{
113 void (*cleanup
)(void);
114 void (*readsection
)(const int nloops
);
115 void (*delaysection
)(const int nloops
, const int udl
, const int ndl
);
119 static struct ref_scale_ops
*cur_ops
;
121 static void un_delay(const int udl
, const int ndl
)
129 static void ref_rcu_read_section(const int nloops
)
133 for (i
= nloops
; i
>= 0; i
--) {
139 static void ref_rcu_delay_section(const int nloops
, const int udl
, const int ndl
)
143 for (i
= nloops
; i
>= 0; i
--) {
150 static void rcu_sync_scale_init(void)
154 static struct ref_scale_ops rcu_ops
= {
155 .init
= rcu_sync_scale_init
,
156 .readsection
= ref_rcu_read_section
,
157 .delaysection
= ref_rcu_delay_section
,
161 // Definitions for SRCU ref scale testing.
162 DEFINE_STATIC_SRCU(srcu_refctl_scale
);
163 static struct srcu_struct
*srcu_ctlp
= &srcu_refctl_scale
;
165 static void srcu_ref_scale_read_section(const int nloops
)
170 for (i
= nloops
; i
>= 0; i
--) {
171 idx
= srcu_read_lock(srcu_ctlp
);
172 srcu_read_unlock(srcu_ctlp
, idx
);
176 static void srcu_ref_scale_delay_section(const int nloops
, const int udl
, const int ndl
)
181 for (i
= nloops
; i
>= 0; i
--) {
182 idx
= srcu_read_lock(srcu_ctlp
);
184 srcu_read_unlock(srcu_ctlp
, idx
);
188 static struct ref_scale_ops srcu_ops
= {
189 .init
= rcu_sync_scale_init
,
190 .readsection
= srcu_ref_scale_read_section
,
191 .delaysection
= srcu_ref_scale_delay_section
,
195 // Definitions for RCU Tasks ref scale testing: Empty read markers.
196 // These definitions also work for RCU Rude readers.
197 static void rcu_tasks_ref_scale_read_section(const int nloops
)
201 for (i
= nloops
; i
>= 0; i
--)
205 static void rcu_tasks_ref_scale_delay_section(const int nloops
, const int udl
, const int ndl
)
209 for (i
= nloops
; i
>= 0; i
--)
213 static struct ref_scale_ops rcu_tasks_ops
= {
214 .init
= rcu_sync_scale_init
,
215 .readsection
= rcu_tasks_ref_scale_read_section
,
216 .delaysection
= rcu_tasks_ref_scale_delay_section
,
220 // Definitions for RCU Tasks Trace ref scale testing.
221 static void rcu_trace_ref_scale_read_section(const int nloops
)
225 for (i
= nloops
; i
>= 0; i
--) {
226 rcu_read_lock_trace();
227 rcu_read_unlock_trace();
231 static void rcu_trace_ref_scale_delay_section(const int nloops
, const int udl
, const int ndl
)
235 for (i
= nloops
; i
>= 0; i
--) {
236 rcu_read_lock_trace();
238 rcu_read_unlock_trace();
242 static struct ref_scale_ops rcu_trace_ops
= {
243 .init
= rcu_sync_scale_init
,
244 .readsection
= rcu_trace_ref_scale_read_section
,
245 .delaysection
= rcu_trace_ref_scale_delay_section
,
249 // Definitions for reference count
250 static atomic_t refcnt
;
252 static void ref_refcnt_section(const int nloops
)
256 for (i
= nloops
; i
>= 0; i
--) {
262 static void ref_refcnt_delay_section(const int nloops
, const int udl
, const int ndl
)
266 for (i
= nloops
; i
>= 0; i
--) {
273 static struct ref_scale_ops refcnt_ops
= {
274 .init
= rcu_sync_scale_init
,
275 .readsection
= ref_refcnt_section
,
276 .delaysection
= ref_refcnt_delay_section
,
280 // Definitions for rwlock
281 static rwlock_t test_rwlock
;
283 static void ref_rwlock_init(void)
285 rwlock_init(&test_rwlock
);
288 static void ref_rwlock_section(const int nloops
)
292 for (i
= nloops
; i
>= 0; i
--) {
293 read_lock(&test_rwlock
);
294 read_unlock(&test_rwlock
);
298 static void ref_rwlock_delay_section(const int nloops
, const int udl
, const int ndl
)
302 for (i
= nloops
; i
>= 0; i
--) {
303 read_lock(&test_rwlock
);
305 read_unlock(&test_rwlock
);
309 static struct ref_scale_ops rwlock_ops
= {
310 .init
= ref_rwlock_init
,
311 .readsection
= ref_rwlock_section
,
312 .delaysection
= ref_rwlock_delay_section
,
316 // Definitions for rwsem
317 static struct rw_semaphore test_rwsem
;
319 static void ref_rwsem_init(void)
321 init_rwsem(&test_rwsem
);
324 static void ref_rwsem_section(const int nloops
)
328 for (i
= nloops
; i
>= 0; i
--) {
329 down_read(&test_rwsem
);
330 up_read(&test_rwsem
);
334 static void ref_rwsem_delay_section(const int nloops
, const int udl
, const int ndl
)
338 for (i
= nloops
; i
>= 0; i
--) {
339 down_read(&test_rwsem
);
341 up_read(&test_rwsem
);
345 static struct ref_scale_ops rwsem_ops
= {
346 .init
= ref_rwsem_init
,
347 .readsection
= ref_rwsem_section
,
348 .delaysection
= ref_rwsem_delay_section
,
352 static void rcu_scale_one_reader(void)
355 cur_ops
->readsection(loops
);
357 cur_ops
->delaysection(loops
, readdelay
/ 1000, readdelay
% 1000);
360 // Reader kthread. Repeatedly does empty RCU read-side
361 // critical section, minimizing update-side interference.
363 ref_scale_reader(void *arg
)
367 struct reader_task
*rt
= &(reader_tasks
[me
]);
371 VERBOSE_SCALEOUT("ref_scale_reader %ld: task started", me
);
372 set_cpus_allowed_ptr(current
, cpumask_of(me
% nr_cpu_ids
));
373 set_user_nice(current
, MAX_NICE
);
376 schedule_timeout_interruptible(holdoff
* HZ
);
378 VERBOSE_SCALEOUT("ref_scale_reader %ld: waiting to start next experiment on cpu %d", me
, smp_processor_id());
380 // Wait for signal that this reader can start.
381 wait_event(rt
->wq
, (atomic_read(&nreaders_exp
) && smp_load_acquire(&rt
->start_reader
)) ||
382 torture_must_stop());
384 if (torture_must_stop())
387 // Make sure that the CPU is affinitized appropriately during testing.
388 WARN_ON_ONCE(smp_processor_id() != me
);
390 WRITE_ONCE(rt
->start_reader
, 0);
391 if (!atomic_dec_return(&n_started
))
392 while (atomic_read_acquire(&n_started
))
395 VERBOSE_SCALEOUT("ref_scale_reader %ld: experiment %d started", me
, exp_idx
);
398 // To reduce noise, do an initial cache-warming invocation, check
399 // in, and then keep warming until everyone has checked in.
400 rcu_scale_one_reader();
401 if (!atomic_dec_return(&n_warmedup
))
402 while (atomic_read_acquire(&n_warmedup
))
403 rcu_scale_one_reader();
404 // Also keep interrupts disabled. This also has the effect
405 // of preventing entries into slow path for rcu_read_unlock().
406 local_irq_save(flags
);
407 start
= ktime_get_mono_fast_ns();
409 rcu_scale_one_reader();
411 duration
= ktime_get_mono_fast_ns() - start
;
412 local_irq_restore(flags
);
414 rt
->last_duration_ns
= WARN_ON_ONCE(duration
< 0) ? 0 : duration
;
415 // To reduce runtime-skew noise, do maintain-load invocations until
417 if (!atomic_dec_return(&n_cooleddown
))
418 while (atomic_read_acquire(&n_cooleddown
))
419 rcu_scale_one_reader();
421 if (atomic_dec_and_test(&nreaders_exp
))
424 VERBOSE_SCALEOUT("ref_scale_reader %ld: experiment %d ended, (readers remaining=%d)",
425 me
, exp_idx
, atomic_read(&nreaders_exp
));
427 if (!torture_must_stop())
430 torture_kthread_stopping("ref_scale_reader");
434 static void reset_readers(void)
437 struct reader_task
*rt
;
439 for (i
= 0; i
< nreaders
; i
++) {
440 rt
= &(reader_tasks
[i
]);
442 rt
->last_duration_ns
= 0;
446 // Print the results of each reader and return the sum of all their durations.
447 static u64
process_durations(int n
)
450 struct reader_task
*rt
;
455 buf
= kmalloc(128 + nreaders
* 32, GFP_KERNEL
);
459 sprintf(buf
, "Experiment #%d (Format: <THREAD-NUM>:<Total loop time in ns>)",
462 for (i
= 0; i
< n
&& !torture_must_stop(); i
++) {
463 rt
= &(reader_tasks
[i
]);
464 sprintf(buf1
, "%d: %llu\t", i
, rt
->last_duration_ns
);
470 sum
+= rt
->last_duration_ns
;
474 SCALEOUT("%s\n", buf
);
480 // The main_func is the main orchestrator, it performs a bunch of
481 // experiments. For every experiment, it orders all the readers
482 // involved to start and waits for them to finish the experiment. It
483 // then reads their timestamps and starts the next experiment. Each
484 // experiment progresses from 1 concurrent reader to N of them at which
485 // point all the timestamps are printed.
486 static int main_func(void *arg
)
488 bool errexit
= false;
494 set_cpus_allowed_ptr(current
, cpumask_of(nreaders
% nr_cpu_ids
));
495 set_user_nice(current
, MAX_NICE
);
497 VERBOSE_SCALEOUT("main_func task started");
498 result_avg
= kzalloc(nruns
* sizeof(*result_avg
), GFP_KERNEL
);
499 buf
= kzalloc(64 + nruns
* 32, GFP_KERNEL
);
500 if (!result_avg
|| !buf
) {
501 VERBOSE_SCALEOUT_ERRSTRING("out of memory");
505 schedule_timeout_interruptible(holdoff
* HZ
);
507 // Wait for all threads to start.
509 while (atomic_read(&n_init
) < nreaders
+ 1)
510 schedule_timeout_uninterruptible(1);
512 // Start exp readers up per experiment
513 for (exp
= 0; exp
< nruns
&& !torture_must_stop(); exp
++) {
516 if (torture_must_stop())
520 atomic_set(&nreaders_exp
, nreaders
);
521 atomic_set(&n_started
, nreaders
);
522 atomic_set(&n_warmedup
, nreaders
);
523 atomic_set(&n_cooleddown
, nreaders
);
527 for (r
= 0; r
< nreaders
; r
++) {
528 smp_store_release(&reader_tasks
[r
].start_reader
, 1);
529 wake_up(&reader_tasks
[r
].wq
);
532 VERBOSE_SCALEOUT("main_func: experiment started, waiting for %d readers",
536 !atomic_read(&nreaders_exp
) || torture_must_stop());
538 VERBOSE_SCALEOUT("main_func: experiment ended");
540 if (torture_must_stop())
543 result_avg
[exp
] = div_u64(1000 * process_durations(nreaders
), nreaders
* loops
);
546 // Print the average of all experiments
547 SCALEOUT("END OF TEST. Calculating average duration per loop (nanoseconds)...\n");
552 strcat(buf
, "Runs\tTime(ns)\n");
555 for (exp
= 0; exp
< nruns
; exp
++) {
561 avg
= div_u64_rem(result_avg
[exp
], 1000, &rem
);
562 sprintf(buf1
, "%d\t%llu.%03u\n", exp
+ 1, avg
, rem
);
569 // This will shutdown everything including us.
572 wake_up(&shutdown_wq
);
575 // Wait for torture to stop us
576 while (!torture_must_stop())
577 schedule_timeout_uninterruptible(1);
580 torture_kthread_stopping("main_func");
587 ref_scale_print_module_parms(struct ref_scale_ops
*cur_ops
, const char *tag
)
589 pr_alert("%s" SCALE_FLAG
590 "--- %s: verbose=%d shutdown=%d holdoff=%d loops=%ld nreaders=%d nruns=%d readdelay=%d\n", scale_type
, tag
,
591 verbose
, shutdown
, holdoff
, loops
, nreaders
, nruns
, readdelay
);
595 ref_scale_cleanup(void)
599 if (torture_cleanup_begin())
603 torture_cleanup_end();
608 for (i
= 0; i
< nreaders
; i
++)
609 torture_stop_kthread("ref_scale_reader",
610 reader_tasks
[i
].task
);
614 torture_stop_kthread("main_task", main_task
);
617 // Do scale-type-specific cleanup operations.
618 if (cur_ops
->cleanup
!= NULL
)
621 torture_cleanup_end();
624 // Shutdown kthread. Just waits to be awakened, then shuts down system.
626 ref_scale_shutdown(void *arg
)
628 wait_event(shutdown_wq
, shutdown_start
);
630 smp_mb(); // Wake before output.
642 static struct ref_scale_ops
*scale_ops
[] = {
643 &rcu_ops
, &srcu_ops
, &rcu_trace_ops
, &rcu_tasks_ops
,
644 &refcnt_ops
, &rwlock_ops
, &rwsem_ops
,
647 if (!torture_init_begin(scale_type
, verbose
))
650 for (i
= 0; i
< ARRAY_SIZE(scale_ops
); i
++) {
651 cur_ops
= scale_ops
[i
];
652 if (strcmp(scale_type
, cur_ops
->name
) == 0)
655 if (i
== ARRAY_SIZE(scale_ops
)) {
656 pr_alert("rcu-scale: invalid scale type: \"%s\"\n", scale_type
);
657 pr_alert("rcu-scale types:");
658 for (i
= 0; i
< ARRAY_SIZE(scale_ops
); i
++)
659 pr_cont(" %s", scale_ops
[i
]->name
);
668 ref_scale_print_module_parms(cur_ops
, "Start of test");
672 init_waitqueue_head(&shutdown_wq
);
673 firsterr
= torture_create_kthread(ref_scale_shutdown
, NULL
,
677 schedule_timeout_uninterruptible(1);
680 // Reader tasks (default to ~75% of online CPUs).
682 nreaders
= (num_online_cpus() >> 1) + (num_online_cpus() >> 2);
683 if (WARN_ONCE(loops
<= 0, "%s: loops = %ld, adjusted to 1\n", __func__
, loops
))
685 if (WARN_ONCE(nreaders
<= 0, "%s: nreaders = %d, adjusted to 1\n", __func__
, nreaders
))
687 if (WARN_ONCE(nruns
<= 0, "%s: nruns = %d, adjusted to 1\n", __func__
, nruns
))
689 reader_tasks
= kcalloc(nreaders
, sizeof(reader_tasks
[0]),
692 VERBOSE_SCALEOUT_ERRSTRING("out of memory");
697 VERBOSE_SCALEOUT("Starting %d reader threads\n", nreaders
);
699 for (i
= 0; i
< nreaders
; i
++) {
700 firsterr
= torture_create_kthread(ref_scale_reader
, (void *)i
,
701 reader_tasks
[i
].task
);
705 init_waitqueue_head(&(reader_tasks
[i
].wq
));
709 init_waitqueue_head(&main_wq
);
710 firsterr
= torture_create_kthread(main_func
, NULL
, main_task
);
721 WARN_ON(!IS_MODULE(CONFIG_RCU_REF_SCALE_TEST
));
727 module_init(ref_scale_init
);
728 module_exit(ref_scale_cleanup
);