1 // SPDX-License-Identifier: LGPL-2.1
13 #include <sys/types.h>
18 static inline pid_t
gettid(void)
20 return syscall(__NR_gettid
);
24 static int loop_cnt
[NR_INJECT
+ 1];
26 static int loop_cnt_1
asm("asm_loop_cnt_1") __attribute__((used
));
27 static int loop_cnt_2
asm("asm_loop_cnt_2") __attribute__((used
));
28 static int loop_cnt_3
asm("asm_loop_cnt_3") __attribute__((used
));
29 static int loop_cnt_4
asm("asm_loop_cnt_4") __attribute__((used
));
30 static int loop_cnt_5
asm("asm_loop_cnt_5") __attribute__((used
));
31 static int loop_cnt_6
asm("asm_loop_cnt_6") __attribute__((used
));
33 static int opt_modulo
, verbose
;
35 static int opt_yield
, opt_signal
, opt_sleep
,
36 opt_disable_rseq
, opt_threads
= 200,
37 opt_disable_mod
= 0, opt_test
= 's', opt_mb
= 0;
39 #ifndef RSEQ_SKIP_FASTPATH
40 static long long opt_reps
= 5000;
42 static long long opt_reps
= 100;
45 static __thread
__attribute__((tls_model("initial-exec")))
46 unsigned int signals_delivered
;
50 static __thread
__attribute__((tls_model("initial-exec"), unused
))
51 unsigned int yield_mod_cnt
, nr_abort
;
53 #define printf_verbose(fmt, ...) \
56 printf(fmt, ## __VA_ARGS__); \
61 #define INJECT_ASM_REG "eax"
63 #define RSEQ_INJECT_CLOBBER \
66 #define RSEQ_INJECT_ASM(n) \
67 "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
68 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
71 "dec %%" INJECT_ASM_REG "\n\t" \
75 #elif defined(__x86_64__)
77 #define INJECT_ASM_REG_P "rax"
78 #define INJECT_ASM_REG "eax"
80 #define RSEQ_INJECT_CLOBBER \
84 #define RSEQ_INJECT_ASM(n) \
85 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
86 "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
87 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
90 "dec %%" INJECT_ASM_REG "\n\t" \
94 #elif defined(__s390__)
96 #define RSEQ_INJECT_INPUT \
97 , [loop_cnt_1]"m"(loop_cnt[1]) \
98 , [loop_cnt_2]"m"(loop_cnt[2]) \
99 , [loop_cnt_3]"m"(loop_cnt[3]) \
100 , [loop_cnt_4]"m"(loop_cnt[4]) \
101 , [loop_cnt_5]"m"(loop_cnt[5]) \
102 , [loop_cnt_6]"m"(loop_cnt[6])
104 #define INJECT_ASM_REG "r12"
106 #define RSEQ_INJECT_CLOBBER \
109 #define RSEQ_INJECT_ASM(n) \
110 "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
111 "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
114 "ahi %%" INJECT_ASM_REG ", -1\n\t" \
118 #elif defined(__ARMEL__)
120 #define RSEQ_INJECT_INPUT \
121 , [loop_cnt_1]"m"(loop_cnt[1]) \
122 , [loop_cnt_2]"m"(loop_cnt[2]) \
123 , [loop_cnt_3]"m"(loop_cnt[3]) \
124 , [loop_cnt_4]"m"(loop_cnt[4]) \
125 , [loop_cnt_5]"m"(loop_cnt[5]) \
126 , [loop_cnt_6]"m"(loop_cnt[6])
128 #define INJECT_ASM_REG "r4"
130 #define RSEQ_INJECT_CLOBBER \
133 #define RSEQ_INJECT_ASM(n) \
134 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
135 "cmp " INJECT_ASM_REG ", #0\n\t" \
138 "subs " INJECT_ASM_REG ", #1\n\t" \
142 #elif defined(__AARCH64EL__)
144 #define RSEQ_INJECT_INPUT \
145 , [loop_cnt_1] "Qo" (loop_cnt[1]) \
146 , [loop_cnt_2] "Qo" (loop_cnt[2]) \
147 , [loop_cnt_3] "Qo" (loop_cnt[3]) \
148 , [loop_cnt_4] "Qo" (loop_cnt[4]) \
149 , [loop_cnt_5] "Qo" (loop_cnt[5]) \
150 , [loop_cnt_6] "Qo" (loop_cnt[6])
152 #define INJECT_ASM_REG RSEQ_ASM_TMP_REG32
154 #define RSEQ_INJECT_ASM(n) \
155 " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \
156 " cbz " INJECT_ASM_REG ", 333f\n" \
158 " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \
159 " cbnz " INJECT_ASM_REG ", 222b\n" \
164 #define RSEQ_INJECT_INPUT \
165 , [loop_cnt_1]"m"(loop_cnt[1]) \
166 , [loop_cnt_2]"m"(loop_cnt[2]) \
167 , [loop_cnt_3]"m"(loop_cnt[3]) \
168 , [loop_cnt_4]"m"(loop_cnt[4]) \
169 , [loop_cnt_5]"m"(loop_cnt[5]) \
170 , [loop_cnt_6]"m"(loop_cnt[6])
172 #define INJECT_ASM_REG "r18"
174 #define RSEQ_INJECT_CLOBBER \
177 #define RSEQ_INJECT_ASM(n) \
178 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
179 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
182 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
186 #elif defined(__mips__)
188 #define RSEQ_INJECT_INPUT \
189 , [loop_cnt_1]"m"(loop_cnt[1]) \
190 , [loop_cnt_2]"m"(loop_cnt[2]) \
191 , [loop_cnt_3]"m"(loop_cnt[3]) \
192 , [loop_cnt_4]"m"(loop_cnt[4]) \
193 , [loop_cnt_5]"m"(loop_cnt[5]) \
194 , [loop_cnt_6]"m"(loop_cnt[6])
196 #define INJECT_ASM_REG "$5"
198 #define RSEQ_INJECT_CLOBBER \
201 #define RSEQ_INJECT_ASM(n) \
202 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
203 "beqz " INJECT_ASM_REG ", 333f\n\t" \
205 "addiu " INJECT_ASM_REG ", -1\n\t" \
206 "bnez " INJECT_ASM_REG ", 222b\n\t" \
210 #error unsupported target
213 #define RSEQ_INJECT_FAILED \
216 #define RSEQ_INJECT_C(n) \
218 int loc_i, loc_nr_loops = loop_cnt[n]; \
220 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
223 if (loc_nr_loops == -1 && opt_modulo) { \
224 if (yield_mod_cnt == opt_modulo - 1) { \
226 poll(NULL, 0, opt_sleep); \
240 #define printf_verbose(fmt, ...)
242 #endif /* BENCHMARK */
246 struct percpu_lock_entry
{
248 } __attribute__((aligned(128)));
251 struct percpu_lock_entry c
[CPU_SETSIZE
];
254 struct test_data_entry
{
256 } __attribute__((aligned(128)));
258 struct spinlock_test_data
{
259 struct percpu_lock lock
;
260 struct test_data_entry c
[CPU_SETSIZE
];
263 struct spinlock_thread_test_data
{
264 struct spinlock_test_data
*data
;
269 struct inc_test_data
{
270 struct test_data_entry c
[CPU_SETSIZE
];
273 struct inc_thread_test_data
{
274 struct inc_test_data
*data
;
279 struct percpu_list_node
{
281 struct percpu_list_node
*next
;
284 struct percpu_list_entry
{
285 struct percpu_list_node
*head
;
286 } __attribute__((aligned(128)));
289 struct percpu_list_entry c
[CPU_SETSIZE
];
292 #define BUFFER_ITEM_PER_CPU 100
294 struct percpu_buffer_node
{
298 struct percpu_buffer_entry
{
301 struct percpu_buffer_node
**array
;
302 } __attribute__((aligned(128)));
304 struct percpu_buffer
{
305 struct percpu_buffer_entry c
[CPU_SETSIZE
];
308 #define MEMCPY_BUFFER_ITEM_PER_CPU 100
310 struct percpu_memcpy_buffer_node
{
315 struct percpu_memcpy_buffer_entry
{
318 struct percpu_memcpy_buffer_node
*array
;
319 } __attribute__((aligned(128)));
321 struct percpu_memcpy_buffer
{
322 struct percpu_memcpy_buffer_entry c
[CPU_SETSIZE
];
325 /* A simple percpu spinlock. Grabs lock on current cpu. */
326 static int rseq_this_cpu_lock(struct percpu_lock
*lock
)
333 cpu
= rseq_cpu_start();
334 ret
= rseq_cmpeqv_storev(&lock
->c
[cpu
].v
,
336 if (rseq_likely(!ret
))
338 /* Retry if comparison fails or rseq aborts. */
341 * Acquire semantic when taking lock after control dependency.
342 * Matches rseq_smp_store_release().
344 rseq_smp_acquire__after_ctrl_dep();
348 static void rseq_percpu_unlock(struct percpu_lock
*lock
, int cpu
)
350 assert(lock
->c
[cpu
].v
== 1);
352 * Release lock, with release semantic. Matches
353 * rseq_smp_acquire__after_ctrl_dep().
355 rseq_smp_store_release(&lock
->c
[cpu
].v
, 0);
358 void *test_percpu_spinlock_thread(void *arg
)
360 struct spinlock_thread_test_data
*thread_data
= arg
;
361 struct spinlock_test_data
*data
= thread_data
->data
;
364 if (!opt_disable_rseq
&& thread_data
->reg
&&
365 rseq_register_current_thread())
367 reps
= thread_data
->reps
;
368 for (i
= 0; i
< reps
; i
++) {
369 int cpu
= rseq_cpu_start();
371 cpu
= rseq_this_cpu_lock(&data
->lock
);
372 data
->c
[cpu
].count
++;
373 rseq_percpu_unlock(&data
->lock
, cpu
);
375 if (i
!= 0 && !(i
% (reps
/ 10)))
376 printf_verbose("tid %d: count %lld\n", (int) gettid(), i
);
379 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
380 (int) gettid(), nr_abort
, signals_delivered
);
381 if (!opt_disable_rseq
&& thread_data
->reg
&&
382 rseq_unregister_current_thread())
388 * A simple test which implements a sharded counter using a per-cpu
389 * lock. Obviously real applications might prefer to simply use a
390 * per-cpu increment; however, this is reasonable for a test and the
391 * lock can be extended to synchronize more complicated operations.
393 void test_percpu_spinlock(void)
395 const int num_threads
= opt_threads
;
398 pthread_t test_threads
[num_threads
];
399 struct spinlock_test_data data
;
400 struct spinlock_thread_test_data thread_data
[num_threads
];
402 memset(&data
, 0, sizeof(data
));
403 for (i
= 0; i
< num_threads
; i
++) {
404 thread_data
[i
].reps
= opt_reps
;
405 if (opt_disable_mod
<= 0 || (i
% opt_disable_mod
))
406 thread_data
[i
].reg
= 1;
408 thread_data
[i
].reg
= 0;
409 thread_data
[i
].data
= &data
;
410 ret
= pthread_create(&test_threads
[i
], NULL
,
411 test_percpu_spinlock_thread
,
415 perror("pthread_create");
420 for (i
= 0; i
< num_threads
; i
++) {
421 ret
= pthread_join(test_threads
[i
], NULL
);
424 perror("pthread_join");
430 for (i
= 0; i
< CPU_SETSIZE
; i
++)
431 sum
+= data
.c
[i
].count
;
433 assert(sum
== (uint64_t)opt_reps
* num_threads
);
436 void *test_percpu_inc_thread(void *arg
)
438 struct inc_thread_test_data
*thread_data
= arg
;
439 struct inc_test_data
*data
= thread_data
->data
;
442 if (!opt_disable_rseq
&& thread_data
->reg
&&
443 rseq_register_current_thread())
445 reps
= thread_data
->reps
;
446 for (i
= 0; i
< reps
; i
++) {
452 cpu
= rseq_cpu_start();
453 ret
= rseq_addv(&data
->c
[cpu
].count
, 1, cpu
);
454 } while (rseq_unlikely(ret
));
456 if (i
!= 0 && !(i
% (reps
/ 10)))
457 printf_verbose("tid %d: count %lld\n", (int) gettid(), i
);
460 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
461 (int) gettid(), nr_abort
, signals_delivered
);
462 if (!opt_disable_rseq
&& thread_data
->reg
&&
463 rseq_unregister_current_thread())
468 void test_percpu_inc(void)
470 const int num_threads
= opt_threads
;
473 pthread_t test_threads
[num_threads
];
474 struct inc_test_data data
;
475 struct inc_thread_test_data thread_data
[num_threads
];
477 memset(&data
, 0, sizeof(data
));
478 for (i
= 0; i
< num_threads
; i
++) {
479 thread_data
[i
].reps
= opt_reps
;
480 if (opt_disable_mod
<= 0 || (i
% opt_disable_mod
))
481 thread_data
[i
].reg
= 1;
483 thread_data
[i
].reg
= 0;
484 thread_data
[i
].data
= &data
;
485 ret
= pthread_create(&test_threads
[i
], NULL
,
486 test_percpu_inc_thread
,
490 perror("pthread_create");
495 for (i
= 0; i
< num_threads
; i
++) {
496 ret
= pthread_join(test_threads
[i
], NULL
);
499 perror("pthread_join");
505 for (i
= 0; i
< CPU_SETSIZE
; i
++)
506 sum
+= data
.c
[i
].count
;
508 assert(sum
== (uint64_t)opt_reps
* num_threads
);
511 void this_cpu_list_push(struct percpu_list
*list
,
512 struct percpu_list_node
*node
,
518 intptr_t *targetptr
, newval
, expect
;
521 cpu
= rseq_cpu_start();
522 /* Load list->c[cpu].head with single-copy atomicity. */
523 expect
= (intptr_t)RSEQ_READ_ONCE(list
->c
[cpu
].head
);
524 newval
= (intptr_t)node
;
525 targetptr
= (intptr_t *)&list
->c
[cpu
].head
;
526 node
->next
= (struct percpu_list_node
*)expect
;
527 ret
= rseq_cmpeqv_storev(targetptr
, expect
, newval
, cpu
);
528 if (rseq_likely(!ret
))
530 /* Retry if comparison fails or rseq aborts. */
537 * Unlike a traditional lock-less linked list; the availability of a
538 * rseq primitive allows us to implement pop without concerns over
541 struct percpu_list_node
*this_cpu_list_pop(struct percpu_list
*list
,
544 struct percpu_list_node
*node
= NULL
;
548 struct percpu_list_node
*head
;
549 intptr_t *targetptr
, expectnot
, *load
;
553 cpu
= rseq_cpu_start();
554 targetptr
= (intptr_t *)&list
->c
[cpu
].head
;
555 expectnot
= (intptr_t)NULL
;
556 offset
= offsetof(struct percpu_list_node
, next
);
557 load
= (intptr_t *)&head
;
558 ret
= rseq_cmpnev_storeoffp_load(targetptr
, expectnot
,
560 if (rseq_likely(!ret
)) {
566 /* Retry if rseq aborts. */
574 * __percpu_list_pop is not safe against concurrent accesses. Should
575 * only be used on lists that are not concurrently modified.
577 struct percpu_list_node
*__percpu_list_pop(struct percpu_list
*list
, int cpu
)
579 struct percpu_list_node
*node
;
581 node
= list
->c
[cpu
].head
;
584 list
->c
[cpu
].head
= node
->next
;
588 void *test_percpu_list_thread(void *arg
)
591 struct percpu_list
*list
= (struct percpu_list
*)arg
;
593 if (!opt_disable_rseq
&& rseq_register_current_thread())
597 for (i
= 0; i
< reps
; i
++) {
598 struct percpu_list_node
*node
;
600 node
= this_cpu_list_pop(list
, NULL
);
602 sched_yield(); /* encourage shuffling */
604 this_cpu_list_push(list
, node
, NULL
);
607 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
608 (int) gettid(), nr_abort
, signals_delivered
);
609 if (!opt_disable_rseq
&& rseq_unregister_current_thread())
615 /* Simultaneous modification to a per-cpu linked list from many threads. */
616 void test_percpu_list(void)
618 const int num_threads
= opt_threads
;
620 uint64_t sum
= 0, expected_sum
= 0;
621 struct percpu_list list
;
622 pthread_t test_threads
[num_threads
];
623 cpu_set_t allowed_cpus
;
625 memset(&list
, 0, sizeof(list
));
627 /* Generate list entries for every usable cpu. */
628 sched_getaffinity(0, sizeof(allowed_cpus
), &allowed_cpus
);
629 for (i
= 0; i
< CPU_SETSIZE
; i
++) {
630 if (!CPU_ISSET(i
, &allowed_cpus
))
632 for (j
= 1; j
<= 100; j
++) {
633 struct percpu_list_node
*node
;
637 node
= malloc(sizeof(*node
));
640 node
->next
= list
.c
[i
].head
;
641 list
.c
[i
].head
= node
;
645 for (i
= 0; i
< num_threads
; i
++) {
646 ret
= pthread_create(&test_threads
[i
], NULL
,
647 test_percpu_list_thread
, &list
);
650 perror("pthread_create");
655 for (i
= 0; i
< num_threads
; i
++) {
656 ret
= pthread_join(test_threads
[i
], NULL
);
659 perror("pthread_join");
664 for (i
= 0; i
< CPU_SETSIZE
; i
++) {
665 struct percpu_list_node
*node
;
667 if (!CPU_ISSET(i
, &allowed_cpus
))
670 while ((node
= __percpu_list_pop(&list
, i
))) {
677 * All entries should now be accounted for (unless some external
678 * actor is interfering with our allowed affinity while this
681 assert(sum
== expected_sum
);
684 bool this_cpu_buffer_push(struct percpu_buffer
*buffer
,
685 struct percpu_buffer_node
*node
,
692 intptr_t *targetptr_spec
, newval_spec
;
693 intptr_t *targetptr_final
, newval_final
;
697 cpu
= rseq_cpu_start();
698 offset
= RSEQ_READ_ONCE(buffer
->c
[cpu
].offset
);
699 if (offset
== buffer
->c
[cpu
].buflen
)
701 newval_spec
= (intptr_t)node
;
702 targetptr_spec
= (intptr_t *)&buffer
->c
[cpu
].array
[offset
];
703 newval_final
= offset
+ 1;
704 targetptr_final
= &buffer
->c
[cpu
].offset
;
706 ret
= rseq_cmpeqv_trystorev_storev_release(
707 targetptr_final
, offset
, targetptr_spec
,
708 newval_spec
, newval_final
, cpu
);
710 ret
= rseq_cmpeqv_trystorev_storev(targetptr_final
,
711 offset
, targetptr_spec
, newval_spec
,
713 if (rseq_likely(!ret
)) {
717 /* Retry if comparison fails or rseq aborts. */
724 struct percpu_buffer_node
*this_cpu_buffer_pop(struct percpu_buffer
*buffer
,
727 struct percpu_buffer_node
*head
;
731 intptr_t *targetptr
, newval
;
735 cpu
= rseq_cpu_start();
736 /* Load offset with single-copy atomicity. */
737 offset
= RSEQ_READ_ONCE(buffer
->c
[cpu
].offset
);
742 head
= RSEQ_READ_ONCE(buffer
->c
[cpu
].array
[offset
- 1]);
744 targetptr
= (intptr_t *)&buffer
->c
[cpu
].offset
;
745 ret
= rseq_cmpeqv_cmpeqv_storev(targetptr
, offset
,
746 (intptr_t *)&buffer
->c
[cpu
].array
[offset
- 1],
747 (intptr_t)head
, newval
, cpu
);
748 if (rseq_likely(!ret
))
750 /* Retry if comparison fails or rseq aborts. */
758 * __percpu_buffer_pop is not safe against concurrent accesses. Should
759 * only be used on buffers that are not concurrently modified.
761 struct percpu_buffer_node
*__percpu_buffer_pop(struct percpu_buffer
*buffer
,
764 struct percpu_buffer_node
*head
;
767 offset
= buffer
->c
[cpu
].offset
;
770 head
= buffer
->c
[cpu
].array
[offset
- 1];
771 buffer
->c
[cpu
].offset
= offset
- 1;
775 void *test_percpu_buffer_thread(void *arg
)
778 struct percpu_buffer
*buffer
= (struct percpu_buffer
*)arg
;
780 if (!opt_disable_rseq
&& rseq_register_current_thread())
784 for (i
= 0; i
< reps
; i
++) {
785 struct percpu_buffer_node
*node
;
787 node
= this_cpu_buffer_pop(buffer
, NULL
);
789 sched_yield(); /* encourage shuffling */
791 if (!this_cpu_buffer_push(buffer
, node
, NULL
)) {
792 /* Should increase buffer size. */
798 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
799 (int) gettid(), nr_abort
, signals_delivered
);
800 if (!opt_disable_rseq
&& rseq_unregister_current_thread())
806 /* Simultaneous modification to a per-cpu buffer from many threads. */
807 void test_percpu_buffer(void)
809 const int num_threads
= opt_threads
;
811 uint64_t sum
= 0, expected_sum
= 0;
812 struct percpu_buffer buffer
;
813 pthread_t test_threads
[num_threads
];
814 cpu_set_t allowed_cpus
;
816 memset(&buffer
, 0, sizeof(buffer
));
818 /* Generate list entries for every usable cpu. */
819 sched_getaffinity(0, sizeof(allowed_cpus
), &allowed_cpus
);
820 for (i
= 0; i
< CPU_SETSIZE
; i
++) {
821 if (!CPU_ISSET(i
, &allowed_cpus
))
823 /* Worse-case is every item in same CPU. */
825 malloc(sizeof(*buffer
.c
[i
].array
) * CPU_SETSIZE
*
826 BUFFER_ITEM_PER_CPU
);
827 assert(buffer
.c
[i
].array
);
828 buffer
.c
[i
].buflen
= CPU_SETSIZE
* BUFFER_ITEM_PER_CPU
;
829 for (j
= 1; j
<= BUFFER_ITEM_PER_CPU
; j
++) {
830 struct percpu_buffer_node
*node
;
835 * We could theoretically put the word-sized
836 * "data" directly in the buffer. However, we
837 * want to model objects that would not fit
838 * within a single word, so allocate an object
841 node
= malloc(sizeof(*node
));
844 buffer
.c
[i
].array
[j
- 1] = node
;
845 buffer
.c
[i
].offset
++;
849 for (i
= 0; i
< num_threads
; i
++) {
850 ret
= pthread_create(&test_threads
[i
], NULL
,
851 test_percpu_buffer_thread
, &buffer
);
854 perror("pthread_create");
859 for (i
= 0; i
< num_threads
; i
++) {
860 ret
= pthread_join(test_threads
[i
], NULL
);
863 perror("pthread_join");
868 for (i
= 0; i
< CPU_SETSIZE
; i
++) {
869 struct percpu_buffer_node
*node
;
871 if (!CPU_ISSET(i
, &allowed_cpus
))
874 while ((node
= __percpu_buffer_pop(&buffer
, i
))) {
878 free(buffer
.c
[i
].array
);
882 * All entries should now be accounted for (unless some external
883 * actor is interfering with our allowed affinity while this
886 assert(sum
== expected_sum
);
889 bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer
*buffer
,
890 struct percpu_memcpy_buffer_node item
,
897 intptr_t *targetptr_final
, newval_final
, offset
;
898 char *destptr
, *srcptr
;
902 cpu
= rseq_cpu_start();
903 /* Load offset with single-copy atomicity. */
904 offset
= RSEQ_READ_ONCE(buffer
->c
[cpu
].offset
);
905 if (offset
== buffer
->c
[cpu
].buflen
)
907 destptr
= (char *)&buffer
->c
[cpu
].array
[offset
];
908 srcptr
= (char *)&item
;
909 /* copylen must be <= 4kB. */
910 copylen
= sizeof(item
);
911 newval_final
= offset
+ 1;
912 targetptr_final
= &buffer
->c
[cpu
].offset
;
914 ret
= rseq_cmpeqv_trymemcpy_storev_release(
915 targetptr_final
, offset
,
916 destptr
, srcptr
, copylen
,
919 ret
= rseq_cmpeqv_trymemcpy_storev(targetptr_final
,
920 offset
, destptr
, srcptr
, copylen
,
922 if (rseq_likely(!ret
)) {
926 /* Retry if comparison fails or rseq aborts. */
933 bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer
*buffer
,
934 struct percpu_memcpy_buffer_node
*item
,
941 intptr_t *targetptr_final
, newval_final
, offset
;
942 char *destptr
, *srcptr
;
946 cpu
= rseq_cpu_start();
947 /* Load offset with single-copy atomicity. */
948 offset
= RSEQ_READ_ONCE(buffer
->c
[cpu
].offset
);
951 destptr
= (char *)item
;
952 srcptr
= (char *)&buffer
->c
[cpu
].array
[offset
- 1];
953 /* copylen must be <= 4kB. */
954 copylen
= sizeof(*item
);
955 newval_final
= offset
- 1;
956 targetptr_final
= &buffer
->c
[cpu
].offset
;
957 ret
= rseq_cmpeqv_trymemcpy_storev(targetptr_final
,
958 offset
, destptr
, srcptr
, copylen
,
960 if (rseq_likely(!ret
)) {
964 /* Retry if comparison fails or rseq aborts. */
972 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
973 * only be used on buffers that are not concurrently modified.
975 bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer
*buffer
,
976 struct percpu_memcpy_buffer_node
*item
,
981 offset
= buffer
->c
[cpu
].offset
;
984 memcpy(item
, &buffer
->c
[cpu
].array
[offset
- 1], sizeof(*item
));
985 buffer
->c
[cpu
].offset
= offset
- 1;
989 void *test_percpu_memcpy_buffer_thread(void *arg
)
992 struct percpu_memcpy_buffer
*buffer
= (struct percpu_memcpy_buffer
*)arg
;
994 if (!opt_disable_rseq
&& rseq_register_current_thread())
998 for (i
= 0; i
< reps
; i
++) {
999 struct percpu_memcpy_buffer_node item
;
1002 result
= this_cpu_memcpy_buffer_pop(buffer
, &item
, NULL
);
1004 sched_yield(); /* encourage shuffling */
1006 if (!this_cpu_memcpy_buffer_push(buffer
, item
, NULL
)) {
1007 /* Should increase buffer size. */
1013 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
1014 (int) gettid(), nr_abort
, signals_delivered
);
1015 if (!opt_disable_rseq
&& rseq_unregister_current_thread())
1021 /* Simultaneous modification to a per-cpu buffer from many threads. */
1022 void test_percpu_memcpy_buffer(void)
1024 const int num_threads
= opt_threads
;
1026 uint64_t sum
= 0, expected_sum
= 0;
1027 struct percpu_memcpy_buffer buffer
;
1028 pthread_t test_threads
[num_threads
];
1029 cpu_set_t allowed_cpus
;
1031 memset(&buffer
, 0, sizeof(buffer
));
1033 /* Generate list entries for every usable cpu. */
1034 sched_getaffinity(0, sizeof(allowed_cpus
), &allowed_cpus
);
1035 for (i
= 0; i
< CPU_SETSIZE
; i
++) {
1036 if (!CPU_ISSET(i
, &allowed_cpus
))
1038 /* Worse-case is every item in same CPU. */
1040 malloc(sizeof(*buffer
.c
[i
].array
) * CPU_SETSIZE
*
1041 MEMCPY_BUFFER_ITEM_PER_CPU
);
1042 assert(buffer
.c
[i
].array
);
1043 buffer
.c
[i
].buflen
= CPU_SETSIZE
* MEMCPY_BUFFER_ITEM_PER_CPU
;
1044 for (j
= 1; j
<= MEMCPY_BUFFER_ITEM_PER_CPU
; j
++) {
1045 expected_sum
+= 2 * j
+ 1;
1048 * We could theoretically put the word-sized
1049 * "data" directly in the buffer. However, we
1050 * want to model objects that would not fit
1051 * within a single word, so allocate an object
1054 buffer
.c
[i
].array
[j
- 1].data1
= j
;
1055 buffer
.c
[i
].array
[j
- 1].data2
= j
+ 1;
1056 buffer
.c
[i
].offset
++;
1060 for (i
= 0; i
< num_threads
; i
++) {
1061 ret
= pthread_create(&test_threads
[i
], NULL
,
1062 test_percpu_memcpy_buffer_thread
,
1066 perror("pthread_create");
1071 for (i
= 0; i
< num_threads
; i
++) {
1072 ret
= pthread_join(test_threads
[i
], NULL
);
1075 perror("pthread_join");
1080 for (i
= 0; i
< CPU_SETSIZE
; i
++) {
1081 struct percpu_memcpy_buffer_node item
;
1083 if (!CPU_ISSET(i
, &allowed_cpus
))
1086 while (__percpu_memcpy_buffer_pop(&buffer
, &item
, i
)) {
1090 free(buffer
.c
[i
].array
);
1094 * All entries should now be accounted for (unless some external
1095 * actor is interfering with our allowed affinity while this
1098 assert(sum
== expected_sum
);
1101 static void test_signal_interrupt_handler(int signo
)
1103 signals_delivered
++;
1106 static int set_signal_handler(void)
1109 struct sigaction sa
;
1112 ret
= sigemptyset(&sigset
);
1114 perror("sigemptyset");
1118 sa
.sa_handler
= test_signal_interrupt_handler
;
1119 sa
.sa_mask
= sigset
;
1121 ret
= sigaction(SIGUSR1
, &sa
, NULL
);
1123 perror("sigaction");
1127 printf_verbose("Signal handler set for SIGUSR1\n");
1132 static void show_usage(int argc
, char **argv
)
1134 printf("Usage : %s <OPTIONS>\n",
1136 printf("OPTIONS:\n");
1137 printf(" [-1 loops] Number of loops for delay injection 1\n");
1138 printf(" [-2 loops] Number of loops for delay injection 2\n");
1139 printf(" [-3 loops] Number of loops for delay injection 3\n");
1140 printf(" [-4 loops] Number of loops for delay injection 4\n");
1141 printf(" [-5 loops] Number of loops for delay injection 5\n");
1142 printf(" [-6 loops] Number of loops for delay injection 6\n");
1143 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1144 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1145 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1146 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1147 printf(" [-y] Yield\n");
1148 printf(" [-k] Kill thread with signal\n");
1149 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1150 printf(" [-t N] Number of threads (default 200)\n");
1151 printf(" [-r N] Number of repetitions per thread (default 5000)\n");
1152 printf(" [-d] Disable rseq system call (no initialization)\n");
1153 printf(" [-D M] Disable rseq for each M threads\n");
1154 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement\n");
1155 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
1156 printf(" [-v] Verbose output.\n");
1157 printf(" [-h] Show this help.\n");
1161 int main(int argc
, char **argv
)
1165 for (i
= 1; i
< argc
; i
++) {
1166 if (argv
[i
][0] != '-')
1168 switch (argv
[i
][1]) {
1179 show_usage(argc
, argv
);
1182 loop_cnt
[argv
[i
][1] - '0'] = atol(argv
[i
+ 1]);
1187 show_usage(argc
, argv
);
1190 opt_modulo
= atol(argv
[i
+ 1]);
1191 if (opt_modulo
< 0) {
1192 show_usage(argc
, argv
);
1199 show_usage(argc
, argv
);
1202 opt_sleep
= atol(argv
[i
+ 1]);
1203 if (opt_sleep
< 0) {
1204 show_usage(argc
, argv
);
1216 opt_disable_rseq
= 1;
1220 show_usage(argc
, argv
);
1223 opt_disable_mod
= atol(argv
[i
+ 1]);
1224 if (opt_disable_mod
< 0) {
1225 show_usage(argc
, argv
);
1232 show_usage(argc
, argv
);
1235 opt_threads
= atol(argv
[i
+ 1]);
1236 if (opt_threads
< 0) {
1237 show_usage(argc
, argv
);
1244 show_usage(argc
, argv
);
1247 opt_reps
= atoll(argv
[i
+ 1]);
1249 show_usage(argc
, argv
);
1255 show_usage(argc
, argv
);
1259 show_usage(argc
, argv
);
1262 opt_test
= *argv
[i
+ 1];
1271 show_usage(argc
, argv
);
1283 show_usage(argc
, argv
);
1288 loop_cnt_1
= loop_cnt
[1];
1289 loop_cnt_2
= loop_cnt
[2];
1290 loop_cnt_3
= loop_cnt
[3];
1291 loop_cnt_4
= loop_cnt
[4];
1292 loop_cnt_5
= loop_cnt
[5];
1293 loop_cnt_6
= loop_cnt
[6];
1295 if (set_signal_handler())
1298 if (!opt_disable_rseq
&& rseq_register_current_thread())
1302 printf_verbose("spinlock\n");
1303 test_percpu_spinlock();
1306 printf_verbose("linked list\n");
1310 printf_verbose("buffer\n");
1311 test_percpu_buffer();
1314 printf_verbose("memcpy buffer\n");
1315 test_percpu_memcpy_buffer();
1318 printf_verbose("counter increment\n");
1322 if (!opt_disable_rseq
&& rseq_unregister_current_thread())