1 // SPDX-License-Identifier: LGPL-2.1
4 #include <linux/membarrier.h>
15 #include <sys/types.h>
21 static inline pid_t
rseq_gettid(void)
23 return syscall(__NR_gettid
);
27 static int loop_cnt
[NR_INJECT
+ 1];
29 static int loop_cnt_1
asm("asm_loop_cnt_1") __attribute__((used
));
30 static int loop_cnt_2
asm("asm_loop_cnt_2") __attribute__((used
));
31 static int loop_cnt_3
asm("asm_loop_cnt_3") __attribute__((used
));
32 static int loop_cnt_4
asm("asm_loop_cnt_4") __attribute__((used
));
33 static int loop_cnt_5
asm("asm_loop_cnt_5") __attribute__((used
));
34 static int loop_cnt_6
asm("asm_loop_cnt_6") __attribute__((used
));
36 static int opt_modulo
, verbose
;
38 static int opt_yield
, opt_signal
, opt_sleep
,
39 opt_disable_rseq
, opt_threads
= 200,
40 opt_disable_mod
= 0, opt_test
= 's';
42 static long long opt_reps
= 5000;
44 static __thread
__attribute__((tls_model("initial-exec")))
45 unsigned int signals_delivered
;
49 static __thread
__attribute__((tls_model("initial-exec"), unused
))
50 unsigned int yield_mod_cnt
, nr_abort
;
52 #define printf_verbose(fmt, ...) \
55 printf(fmt, ## __VA_ARGS__); \
60 #define INJECT_ASM_REG "eax"
62 #define RSEQ_INJECT_CLOBBER \
65 #define RSEQ_INJECT_ASM(n) \
66 "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
67 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
70 "dec %%" INJECT_ASM_REG "\n\t" \
74 #elif defined(__x86_64__)
76 #define INJECT_ASM_REG_P "rax"
77 #define INJECT_ASM_REG "eax"
79 #define RSEQ_INJECT_CLOBBER \
83 #define RSEQ_INJECT_ASM(n) \
84 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
85 "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
86 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
89 "dec %%" INJECT_ASM_REG "\n\t" \
93 #elif defined(__s390__)
95 #define RSEQ_INJECT_INPUT \
96 , [loop_cnt_1]"m"(loop_cnt[1]) \
97 , [loop_cnt_2]"m"(loop_cnt[2]) \
98 , [loop_cnt_3]"m"(loop_cnt[3]) \
99 , [loop_cnt_4]"m"(loop_cnt[4]) \
100 , [loop_cnt_5]"m"(loop_cnt[5]) \
101 , [loop_cnt_6]"m"(loop_cnt[6])
103 #define INJECT_ASM_REG "r12"
105 #define RSEQ_INJECT_CLOBBER \
108 #define RSEQ_INJECT_ASM(n) \
109 "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
110 "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
113 "ahi %%" INJECT_ASM_REG ", -1\n\t" \
117 #elif defined(__ARMEL__)
119 #define RSEQ_INJECT_INPUT \
120 , [loop_cnt_1]"m"(loop_cnt[1]) \
121 , [loop_cnt_2]"m"(loop_cnt[2]) \
122 , [loop_cnt_3]"m"(loop_cnt[3]) \
123 , [loop_cnt_4]"m"(loop_cnt[4]) \
124 , [loop_cnt_5]"m"(loop_cnt[5]) \
125 , [loop_cnt_6]"m"(loop_cnt[6])
127 #define INJECT_ASM_REG "r4"
129 #define RSEQ_INJECT_CLOBBER \
132 #define RSEQ_INJECT_ASM(n) \
133 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
134 "cmp " INJECT_ASM_REG ", #0\n\t" \
137 "subs " INJECT_ASM_REG ", #1\n\t" \
141 #elif defined(__AARCH64EL__)
143 #define RSEQ_INJECT_INPUT \
144 , [loop_cnt_1] "Qo" (loop_cnt[1]) \
145 , [loop_cnt_2] "Qo" (loop_cnt[2]) \
146 , [loop_cnt_3] "Qo" (loop_cnt[3]) \
147 , [loop_cnt_4] "Qo" (loop_cnt[4]) \
148 , [loop_cnt_5] "Qo" (loop_cnt[5]) \
149 , [loop_cnt_6] "Qo" (loop_cnt[6])
151 #define INJECT_ASM_REG RSEQ_ASM_TMP_REG32
153 #define RSEQ_INJECT_ASM(n) \
154 " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \
155 " cbz " INJECT_ASM_REG ", 333f\n" \
157 " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \
158 " cbnz " INJECT_ASM_REG ", 222b\n" \
161 #elif defined(__PPC__)
163 #define RSEQ_INJECT_INPUT \
164 , [loop_cnt_1]"m"(loop_cnt[1]) \
165 , [loop_cnt_2]"m"(loop_cnt[2]) \
166 , [loop_cnt_3]"m"(loop_cnt[3]) \
167 , [loop_cnt_4]"m"(loop_cnt[4]) \
168 , [loop_cnt_5]"m"(loop_cnt[5]) \
169 , [loop_cnt_6]"m"(loop_cnt[6])
171 #define INJECT_ASM_REG "r18"
173 #define RSEQ_INJECT_CLOBBER \
176 #define RSEQ_INJECT_ASM(n) \
177 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
178 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
181 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
185 #elif defined(__mips__)
187 #define RSEQ_INJECT_INPUT \
188 , [loop_cnt_1]"m"(loop_cnt[1]) \
189 , [loop_cnt_2]"m"(loop_cnt[2]) \
190 , [loop_cnt_3]"m"(loop_cnt[3]) \
191 , [loop_cnt_4]"m"(loop_cnt[4]) \
192 , [loop_cnt_5]"m"(loop_cnt[5]) \
193 , [loop_cnt_6]"m"(loop_cnt[6])
195 #define INJECT_ASM_REG "$5"
197 #define RSEQ_INJECT_CLOBBER \
200 #define RSEQ_INJECT_ASM(n) \
201 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
202 "beqz " INJECT_ASM_REG ", 333f\n\t" \
204 "addiu " INJECT_ASM_REG ", -1\n\t" \
205 "bnez " INJECT_ASM_REG ", 222b\n\t" \
207 #elif defined(__riscv)
209 #define RSEQ_INJECT_INPUT \
210 , [loop_cnt_1]"m"(loop_cnt[1]) \
211 , [loop_cnt_2]"m"(loop_cnt[2]) \
212 , [loop_cnt_3]"m"(loop_cnt[3]) \
213 , [loop_cnt_4]"m"(loop_cnt[4]) \
214 , [loop_cnt_5]"m"(loop_cnt[5]) \
215 , [loop_cnt_6]"m"(loop_cnt[6])
217 #define INJECT_ASM_REG "t1"
219 #define RSEQ_INJECT_CLOBBER \
222 #define RSEQ_INJECT_ASM(n) \
223 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
224 "beqz " INJECT_ASM_REG ", 333f\n\t" \
226 "addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \
227 "bnez " INJECT_ASM_REG ", 222b\n\t" \
232 #error unsupported target
235 #define RSEQ_INJECT_FAILED \
238 #define RSEQ_INJECT_C(n) \
240 int loc_i, loc_nr_loops = loop_cnt[n]; \
242 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
245 if (loc_nr_loops == -1 && opt_modulo) { \
246 if (yield_mod_cnt == opt_modulo - 1) { \
248 poll(NULL, 0, opt_sleep); \
262 #define printf_verbose(fmt, ...)
264 #endif /* BENCHMARK */
268 static enum rseq_mo opt_mo
= RSEQ_MO_RELAXED
;
270 #ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
271 #define TEST_MEMBARRIER
273 static int sys_membarrier(int cmd
, int flags
, int cpu_id
)
275 return syscall(__NR_membarrier
, cmd
, flags
, cpu_id
);
279 #ifdef BUILDOPT_RSEQ_PERCPU_MM_CID
280 # define RSEQ_PERCPU RSEQ_PERCPU_MM_CID
282 int get_current_cpu_id(void)
284 return rseq_current_mm_cid();
287 bool rseq_validate_cpu_id(void)
289 return rseq_mm_cid_available();
292 bool rseq_use_cpu_index(void)
294 return false; /* Use mm_cid */
296 # ifdef TEST_MEMBARRIER
298 * Membarrier does not currently support targeting a mm_cid, so
299 * issue the barrier on all cpus.
302 int rseq_membarrier_expedited(int cpu
)
304 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ
,
307 # endif /* TEST_MEMBARRIER */
309 # define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID
311 int get_current_cpu_id(void)
313 return rseq_cpu_start();
316 bool rseq_validate_cpu_id(void)
318 return rseq_current_cpu_raw() >= 0;
321 bool rseq_use_cpu_index(void)
323 return true; /* Use cpu_id as index. */
325 # ifdef TEST_MEMBARRIER
327 int rseq_membarrier_expedited(int cpu
)
329 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ
,
330 MEMBARRIER_CMD_FLAG_CPU
, cpu
);
332 # endif /* TEST_MEMBARRIER */
335 struct percpu_lock_entry
{
337 } __attribute__((aligned(128)));
340 struct percpu_lock_entry c
[CPU_SETSIZE
];
343 struct test_data_entry
{
345 } __attribute__((aligned(128)));
347 struct spinlock_test_data
{
348 struct percpu_lock lock
;
349 struct test_data_entry c
[CPU_SETSIZE
];
352 struct spinlock_thread_test_data
{
353 struct spinlock_test_data
*data
;
358 struct inc_test_data
{
359 struct test_data_entry c
[CPU_SETSIZE
];
362 struct inc_thread_test_data
{
363 struct inc_test_data
*data
;
368 struct percpu_list_node
{
370 struct percpu_list_node
*next
;
373 struct percpu_list_entry
{
374 struct percpu_list_node
*head
;
375 } __attribute__((aligned(128)));
378 struct percpu_list_entry c
[CPU_SETSIZE
];
381 #define BUFFER_ITEM_PER_CPU 100
383 struct percpu_buffer_node
{
387 struct percpu_buffer_entry
{
390 struct percpu_buffer_node
**array
;
391 } __attribute__((aligned(128)));
393 struct percpu_buffer
{
394 struct percpu_buffer_entry c
[CPU_SETSIZE
];
397 #define MEMCPY_BUFFER_ITEM_PER_CPU 100
399 struct percpu_memcpy_buffer_node
{
404 struct percpu_memcpy_buffer_entry
{
407 struct percpu_memcpy_buffer_node
*array
;
408 } __attribute__((aligned(128)));
410 struct percpu_memcpy_buffer
{
411 struct percpu_memcpy_buffer_entry c
[CPU_SETSIZE
];
414 /* A simple percpu spinlock. Grabs lock on current cpu. */
415 static int rseq_this_cpu_lock(struct percpu_lock
*lock
)
422 cpu
= get_current_cpu_id();
424 fprintf(stderr
, "pid: %d: tid: %d, cpu: %d: cid: %d\n",
425 getpid(), (int) rseq_gettid(), rseq_current_cpu_raw(), cpu
);
428 ret
= rseq_cmpeqv_storev(RSEQ_MO_RELAXED
, RSEQ_PERCPU
,
431 if (rseq_likely(!ret
))
433 /* Retry if comparison fails or rseq aborts. */
436 * Acquire semantic when taking lock after control dependency.
437 * Matches rseq_smp_store_release().
439 rseq_smp_acquire__after_ctrl_dep();
443 static void rseq_percpu_unlock(struct percpu_lock
*lock
, int cpu
)
445 assert(lock
->c
[cpu
].v
== 1);
447 * Release lock, with release semantic. Matches
448 * rseq_smp_acquire__after_ctrl_dep().
450 rseq_smp_store_release(&lock
->c
[cpu
].v
, 0);
453 void *test_percpu_spinlock_thread(void *arg
)
455 struct spinlock_thread_test_data
*thread_data
= arg
;
456 struct spinlock_test_data
*data
= thread_data
->data
;
459 if (!opt_disable_rseq
&& thread_data
->reg
&&
460 rseq_register_current_thread())
462 reps
= thread_data
->reps
;
463 for (i
= 0; i
< reps
; i
++) {
464 int cpu
= rseq_this_cpu_lock(&data
->lock
);
465 data
->c
[cpu
].count
++;
466 rseq_percpu_unlock(&data
->lock
, cpu
);
468 if (i
!= 0 && !(i
% (reps
/ 10)))
469 printf_verbose("tid %d: count %lld\n",
470 (int) rseq_gettid(), i
);
473 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
474 (int) rseq_gettid(), nr_abort
, signals_delivered
);
475 if (!opt_disable_rseq
&& thread_data
->reg
&&
476 rseq_unregister_current_thread())
482 * A simple test which implements a sharded counter using a per-cpu
483 * lock. Obviously real applications might prefer to simply use a
484 * per-cpu increment; however, this is reasonable for a test and the
485 * lock can be extended to synchronize more complicated operations.
487 void test_percpu_spinlock(void)
489 const int num_threads
= opt_threads
;
492 pthread_t test_threads
[num_threads
];
493 struct spinlock_test_data data
;
494 struct spinlock_thread_test_data thread_data
[num_threads
];
496 memset(&data
, 0, sizeof(data
));
497 for (i
= 0; i
< num_threads
; i
++) {
498 thread_data
[i
].reps
= opt_reps
;
499 if (opt_disable_mod
<= 0 || (i
% opt_disable_mod
))
500 thread_data
[i
].reg
= 1;
502 thread_data
[i
].reg
= 0;
503 thread_data
[i
].data
= &data
;
504 ret
= pthread_create(&test_threads
[i
], NULL
,
505 test_percpu_spinlock_thread
,
509 perror("pthread_create");
514 for (i
= 0; i
< num_threads
; i
++) {
515 ret
= pthread_join(test_threads
[i
], NULL
);
518 perror("pthread_join");
524 for (i
= 0; i
< CPU_SETSIZE
; i
++)
525 sum
+= data
.c
[i
].count
;
527 assert(sum
== (uint64_t)opt_reps
* num_threads
);
530 void *test_percpu_inc_thread(void *arg
)
532 struct inc_thread_test_data
*thread_data
= arg
;
533 struct inc_test_data
*data
= thread_data
->data
;
536 if (!opt_disable_rseq
&& thread_data
->reg
&&
537 rseq_register_current_thread())
539 reps
= thread_data
->reps
;
540 for (i
= 0; i
< reps
; i
++) {
546 cpu
= get_current_cpu_id();
547 ret
= rseq_addv(RSEQ_MO_RELAXED
, RSEQ_PERCPU
,
548 &data
->c
[cpu
].count
, 1, cpu
);
549 } while (rseq_unlikely(ret
));
551 if (i
!= 0 && !(i
% (reps
/ 10)))
552 printf_verbose("tid %d: count %lld\n",
553 (int) rseq_gettid(), i
);
556 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
557 (int) rseq_gettid(), nr_abort
, signals_delivered
);
558 if (!opt_disable_rseq
&& thread_data
->reg
&&
559 rseq_unregister_current_thread())
564 void test_percpu_inc(void)
566 const int num_threads
= opt_threads
;
569 pthread_t test_threads
[num_threads
];
570 struct inc_test_data data
;
571 struct inc_thread_test_data thread_data
[num_threads
];
573 memset(&data
, 0, sizeof(data
));
574 for (i
= 0; i
< num_threads
; i
++) {
575 thread_data
[i
].reps
= opt_reps
;
576 if (opt_disable_mod
<= 0 || (i
% opt_disable_mod
))
577 thread_data
[i
].reg
= 1;
579 thread_data
[i
].reg
= 0;
580 thread_data
[i
].data
= &data
;
581 ret
= pthread_create(&test_threads
[i
], NULL
,
582 test_percpu_inc_thread
,
586 perror("pthread_create");
591 for (i
= 0; i
< num_threads
; i
++) {
592 ret
= pthread_join(test_threads
[i
], NULL
);
595 perror("pthread_join");
601 for (i
= 0; i
< CPU_SETSIZE
; i
++)
602 sum
+= data
.c
[i
].count
;
604 assert(sum
== (uint64_t)opt_reps
* num_threads
);
607 void this_cpu_list_push(struct percpu_list
*list
,
608 struct percpu_list_node
*node
,
614 intptr_t *targetptr
, newval
, expect
;
617 cpu
= get_current_cpu_id();
618 /* Load list->c[cpu].head with single-copy atomicity. */
619 expect
= (intptr_t)RSEQ_READ_ONCE(list
->c
[cpu
].head
);
620 newval
= (intptr_t)node
;
621 targetptr
= (intptr_t *)&list
->c
[cpu
].head
;
622 node
->next
= (struct percpu_list_node
*)expect
;
623 ret
= rseq_cmpeqv_storev(RSEQ_MO_RELAXED
, RSEQ_PERCPU
,
624 targetptr
, expect
, newval
, cpu
);
625 if (rseq_likely(!ret
))
627 /* Retry if comparison fails or rseq aborts. */
634 * Unlike a traditional lock-less linked list; the availability of a
635 * rseq primitive allows us to implement pop without concerns over
638 struct percpu_list_node
*this_cpu_list_pop(struct percpu_list
*list
,
641 struct percpu_list_node
*node
= NULL
;
645 struct percpu_list_node
*head
;
646 intptr_t *targetptr
, expectnot
, *load
;
650 cpu
= get_current_cpu_id();
651 targetptr
= (intptr_t *)&list
->c
[cpu
].head
;
652 expectnot
= (intptr_t)NULL
;
653 offset
= offsetof(struct percpu_list_node
, next
);
654 load
= (intptr_t *)&head
;
655 ret
= rseq_cmpnev_storeoffp_load(RSEQ_MO_RELAXED
, RSEQ_PERCPU
,
656 targetptr
, expectnot
,
658 if (rseq_likely(!ret
)) {
664 /* Retry if rseq aborts. */
672 * __percpu_list_pop is not safe against concurrent accesses. Should
673 * only be used on lists that are not concurrently modified.
675 struct percpu_list_node
*__percpu_list_pop(struct percpu_list
*list
, int cpu
)
677 struct percpu_list_node
*node
;
679 node
= list
->c
[cpu
].head
;
682 list
->c
[cpu
].head
= node
->next
;
686 void *test_percpu_list_thread(void *arg
)
689 struct percpu_list
*list
= (struct percpu_list
*)arg
;
691 if (!opt_disable_rseq
&& rseq_register_current_thread())
695 for (i
= 0; i
< reps
; i
++) {
696 struct percpu_list_node
*node
;
698 node
= this_cpu_list_pop(list
, NULL
);
700 sched_yield(); /* encourage shuffling */
702 this_cpu_list_push(list
, node
, NULL
);
705 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
706 (int) rseq_gettid(), nr_abort
, signals_delivered
);
707 if (!opt_disable_rseq
&& rseq_unregister_current_thread())
713 /* Simultaneous modification to a per-cpu linked list from many threads. */
714 void test_percpu_list(void)
716 const int num_threads
= opt_threads
;
718 uint64_t sum
= 0, expected_sum
= 0;
719 struct percpu_list list
;
720 pthread_t test_threads
[num_threads
];
721 cpu_set_t allowed_cpus
;
723 memset(&list
, 0, sizeof(list
));
725 /* Generate list entries for every usable cpu. */
726 sched_getaffinity(0, sizeof(allowed_cpus
), &allowed_cpus
);
727 for (i
= 0; i
< CPU_SETSIZE
; i
++) {
728 if (rseq_use_cpu_index() && !CPU_ISSET(i
, &allowed_cpus
))
730 for (j
= 1; j
<= 100; j
++) {
731 struct percpu_list_node
*node
;
735 node
= malloc(sizeof(*node
));
738 node
->next
= list
.c
[i
].head
;
739 list
.c
[i
].head
= node
;
743 for (i
= 0; i
< num_threads
; i
++) {
744 ret
= pthread_create(&test_threads
[i
], NULL
,
745 test_percpu_list_thread
, &list
);
748 perror("pthread_create");
753 for (i
= 0; i
< num_threads
; i
++) {
754 ret
= pthread_join(test_threads
[i
], NULL
);
757 perror("pthread_join");
762 for (i
= 0; i
< CPU_SETSIZE
; i
++) {
763 struct percpu_list_node
*node
;
765 if (rseq_use_cpu_index() && !CPU_ISSET(i
, &allowed_cpus
))
768 while ((node
= __percpu_list_pop(&list
, i
))) {
775 * All entries should now be accounted for (unless some external
776 * actor is interfering with our allowed affinity while this
779 assert(sum
== expected_sum
);
782 bool this_cpu_buffer_push(struct percpu_buffer
*buffer
,
783 struct percpu_buffer_node
*node
,
790 intptr_t *targetptr_spec
, newval_spec
;
791 intptr_t *targetptr_final
, newval_final
;
795 cpu
= get_current_cpu_id();
796 offset
= RSEQ_READ_ONCE(buffer
->c
[cpu
].offset
);
797 if (offset
== buffer
->c
[cpu
].buflen
)
799 newval_spec
= (intptr_t)node
;
800 targetptr_spec
= (intptr_t *)&buffer
->c
[cpu
].array
[offset
];
801 newval_final
= offset
+ 1;
802 targetptr_final
= &buffer
->c
[cpu
].offset
;
803 ret
= rseq_cmpeqv_trystorev_storev(opt_mo
, RSEQ_PERCPU
,
804 targetptr_final
, offset
, targetptr_spec
,
805 newval_spec
, newval_final
, cpu
);
806 if (rseq_likely(!ret
)) {
810 /* Retry if comparison fails or rseq aborts. */
817 struct percpu_buffer_node
*this_cpu_buffer_pop(struct percpu_buffer
*buffer
,
820 struct percpu_buffer_node
*head
;
824 intptr_t *targetptr
, newval
;
828 cpu
= get_current_cpu_id();
829 /* Load offset with single-copy atomicity. */
830 offset
= RSEQ_READ_ONCE(buffer
->c
[cpu
].offset
);
835 head
= RSEQ_READ_ONCE(buffer
->c
[cpu
].array
[offset
- 1]);
837 targetptr
= (intptr_t *)&buffer
->c
[cpu
].offset
;
838 ret
= rseq_cmpeqv_cmpeqv_storev(RSEQ_MO_RELAXED
, RSEQ_PERCPU
,
840 (intptr_t *)&buffer
->c
[cpu
].array
[offset
- 1],
841 (intptr_t)head
, newval
, cpu
);
842 if (rseq_likely(!ret
))
844 /* Retry if comparison fails or rseq aborts. */
852 * __percpu_buffer_pop is not safe against concurrent accesses. Should
853 * only be used on buffers that are not concurrently modified.
855 struct percpu_buffer_node
*__percpu_buffer_pop(struct percpu_buffer
*buffer
,
858 struct percpu_buffer_node
*head
;
861 offset
= buffer
->c
[cpu
].offset
;
864 head
= buffer
->c
[cpu
].array
[offset
- 1];
865 buffer
->c
[cpu
].offset
= offset
- 1;
869 void *test_percpu_buffer_thread(void *arg
)
872 struct percpu_buffer
*buffer
= (struct percpu_buffer
*)arg
;
874 if (!opt_disable_rseq
&& rseq_register_current_thread())
878 for (i
= 0; i
< reps
; i
++) {
879 struct percpu_buffer_node
*node
;
881 node
= this_cpu_buffer_pop(buffer
, NULL
);
883 sched_yield(); /* encourage shuffling */
885 if (!this_cpu_buffer_push(buffer
, node
, NULL
)) {
886 /* Should increase buffer size. */
892 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
893 (int) rseq_gettid(), nr_abort
, signals_delivered
);
894 if (!opt_disable_rseq
&& rseq_unregister_current_thread())
900 /* Simultaneous modification to a per-cpu buffer from many threads. */
901 void test_percpu_buffer(void)
903 const int num_threads
= opt_threads
;
905 uint64_t sum
= 0, expected_sum
= 0;
906 struct percpu_buffer buffer
;
907 pthread_t test_threads
[num_threads
];
908 cpu_set_t allowed_cpus
;
910 memset(&buffer
, 0, sizeof(buffer
));
912 /* Generate list entries for every usable cpu. */
913 sched_getaffinity(0, sizeof(allowed_cpus
), &allowed_cpus
);
914 for (i
= 0; i
< CPU_SETSIZE
; i
++) {
915 if (rseq_use_cpu_index() && !CPU_ISSET(i
, &allowed_cpus
))
917 /* Worse-case is every item in same CPU. */
919 malloc(sizeof(*buffer
.c
[i
].array
) * CPU_SETSIZE
*
920 BUFFER_ITEM_PER_CPU
);
921 assert(buffer
.c
[i
].array
);
922 buffer
.c
[i
].buflen
= CPU_SETSIZE
* BUFFER_ITEM_PER_CPU
;
923 for (j
= 1; j
<= BUFFER_ITEM_PER_CPU
; j
++) {
924 struct percpu_buffer_node
*node
;
929 * We could theoretically put the word-sized
930 * "data" directly in the buffer. However, we
931 * want to model objects that would not fit
932 * within a single word, so allocate an object
935 node
= malloc(sizeof(*node
));
938 buffer
.c
[i
].array
[j
- 1] = node
;
939 buffer
.c
[i
].offset
++;
943 for (i
= 0; i
< num_threads
; i
++) {
944 ret
= pthread_create(&test_threads
[i
], NULL
,
945 test_percpu_buffer_thread
, &buffer
);
948 perror("pthread_create");
953 for (i
= 0; i
< num_threads
; i
++) {
954 ret
= pthread_join(test_threads
[i
], NULL
);
957 perror("pthread_join");
962 for (i
= 0; i
< CPU_SETSIZE
; i
++) {
963 struct percpu_buffer_node
*node
;
965 if (rseq_use_cpu_index() && !CPU_ISSET(i
, &allowed_cpus
))
968 while ((node
= __percpu_buffer_pop(&buffer
, i
))) {
972 free(buffer
.c
[i
].array
);
976 * All entries should now be accounted for (unless some external
977 * actor is interfering with our allowed affinity while this
980 assert(sum
== expected_sum
);
983 bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer
*buffer
,
984 struct percpu_memcpy_buffer_node item
,
991 intptr_t *targetptr_final
, newval_final
, offset
;
992 char *destptr
, *srcptr
;
996 cpu
= get_current_cpu_id();
997 /* Load offset with single-copy atomicity. */
998 offset
= RSEQ_READ_ONCE(buffer
->c
[cpu
].offset
);
999 if (offset
== buffer
->c
[cpu
].buflen
)
1001 destptr
= (char *)&buffer
->c
[cpu
].array
[offset
];
1002 srcptr
= (char *)&item
;
1003 /* copylen must be <= 4kB. */
1004 copylen
= sizeof(item
);
1005 newval_final
= offset
+ 1;
1006 targetptr_final
= &buffer
->c
[cpu
].offset
;
1007 ret
= rseq_cmpeqv_trymemcpy_storev(
1008 opt_mo
, RSEQ_PERCPU
,
1009 targetptr_final
, offset
,
1010 destptr
, srcptr
, copylen
,
1012 if (rseq_likely(!ret
)) {
1016 /* Retry if comparison fails or rseq aborts. */
1023 bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer
*buffer
,
1024 struct percpu_memcpy_buffer_node
*item
,
1027 bool result
= false;
1031 intptr_t *targetptr_final
, newval_final
, offset
;
1032 char *destptr
, *srcptr
;
1036 cpu
= get_current_cpu_id();
1037 /* Load offset with single-copy atomicity. */
1038 offset
= RSEQ_READ_ONCE(buffer
->c
[cpu
].offset
);
1041 destptr
= (char *)item
;
1042 srcptr
= (char *)&buffer
->c
[cpu
].array
[offset
- 1];
1043 /* copylen must be <= 4kB. */
1044 copylen
= sizeof(*item
);
1045 newval_final
= offset
- 1;
1046 targetptr_final
= &buffer
->c
[cpu
].offset
;
1047 ret
= rseq_cmpeqv_trymemcpy_storev(RSEQ_MO_RELAXED
, RSEQ_PERCPU
,
1048 targetptr_final
, offset
, destptr
, srcptr
, copylen
,
1050 if (rseq_likely(!ret
)) {
1054 /* Retry if comparison fails or rseq aborts. */
1062 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
1063 * only be used on buffers that are not concurrently modified.
1065 bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer
*buffer
,
1066 struct percpu_memcpy_buffer_node
*item
,
1071 offset
= buffer
->c
[cpu
].offset
;
1074 memcpy(item
, &buffer
->c
[cpu
].array
[offset
- 1], sizeof(*item
));
1075 buffer
->c
[cpu
].offset
= offset
- 1;
1079 void *test_percpu_memcpy_buffer_thread(void *arg
)
1082 struct percpu_memcpy_buffer
*buffer
= (struct percpu_memcpy_buffer
*)arg
;
1084 if (!opt_disable_rseq
&& rseq_register_current_thread())
1088 for (i
= 0; i
< reps
; i
++) {
1089 struct percpu_memcpy_buffer_node item
;
1092 result
= this_cpu_memcpy_buffer_pop(buffer
, &item
, NULL
);
1094 sched_yield(); /* encourage shuffling */
1096 if (!this_cpu_memcpy_buffer_push(buffer
, item
, NULL
)) {
1097 /* Should increase buffer size. */
1103 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
1104 (int) rseq_gettid(), nr_abort
, signals_delivered
);
1105 if (!opt_disable_rseq
&& rseq_unregister_current_thread())
1111 /* Simultaneous modification to a per-cpu buffer from many threads. */
1112 void test_percpu_memcpy_buffer(void)
1114 const int num_threads
= opt_threads
;
1116 uint64_t sum
= 0, expected_sum
= 0;
1117 struct percpu_memcpy_buffer buffer
;
1118 pthread_t test_threads
[num_threads
];
1119 cpu_set_t allowed_cpus
;
1121 memset(&buffer
, 0, sizeof(buffer
));
1123 /* Generate list entries for every usable cpu. */
1124 sched_getaffinity(0, sizeof(allowed_cpus
), &allowed_cpus
);
1125 for (i
= 0; i
< CPU_SETSIZE
; i
++) {
1126 if (rseq_use_cpu_index() && !CPU_ISSET(i
, &allowed_cpus
))
1128 /* Worse-case is every item in same CPU. */
1130 malloc(sizeof(*buffer
.c
[i
].array
) * CPU_SETSIZE
*
1131 MEMCPY_BUFFER_ITEM_PER_CPU
);
1132 assert(buffer
.c
[i
].array
);
1133 buffer
.c
[i
].buflen
= CPU_SETSIZE
* MEMCPY_BUFFER_ITEM_PER_CPU
;
1134 for (j
= 1; j
<= MEMCPY_BUFFER_ITEM_PER_CPU
; j
++) {
1135 expected_sum
+= 2 * j
+ 1;
1138 * We could theoretically put the word-sized
1139 * "data" directly in the buffer. However, we
1140 * want to model objects that would not fit
1141 * within a single word, so allocate an object
1144 buffer
.c
[i
].array
[j
- 1].data1
= j
;
1145 buffer
.c
[i
].array
[j
- 1].data2
= j
+ 1;
1146 buffer
.c
[i
].offset
++;
1150 for (i
= 0; i
< num_threads
; i
++) {
1151 ret
= pthread_create(&test_threads
[i
], NULL
,
1152 test_percpu_memcpy_buffer_thread
,
1156 perror("pthread_create");
1161 for (i
= 0; i
< num_threads
; i
++) {
1162 ret
= pthread_join(test_threads
[i
], NULL
);
1165 perror("pthread_join");
1170 for (i
= 0; i
< CPU_SETSIZE
; i
++) {
1171 struct percpu_memcpy_buffer_node item
;
1173 if (rseq_use_cpu_index() && !CPU_ISSET(i
, &allowed_cpus
))
1176 while (__percpu_memcpy_buffer_pop(&buffer
, &item
, i
)) {
1180 free(buffer
.c
[i
].array
);
1184 * All entries should now be accounted for (unless some external
1185 * actor is interfering with our allowed affinity while this
1188 assert(sum
== expected_sum
);
1191 static void test_signal_interrupt_handler(int signo
)
1193 signals_delivered
++;
1196 static int set_signal_handler(void)
1199 struct sigaction sa
;
1202 ret
= sigemptyset(&sigset
);
1204 perror("sigemptyset");
1208 sa
.sa_handler
= test_signal_interrupt_handler
;
1209 sa
.sa_mask
= sigset
;
1211 ret
= sigaction(SIGUSR1
, &sa
, NULL
);
1213 perror("sigaction");
1217 printf_verbose("Signal handler set for SIGUSR1\n");
1222 /* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
1223 #ifdef TEST_MEMBARRIER
1224 struct test_membarrier_thread_args
{
1226 intptr_t percpu_list_ptr
;
1229 /* Worker threads modify data in their "active" percpu lists. */
1230 void *test_membarrier_worker_thread(void *arg
)
1232 struct test_membarrier_thread_args
*args
=
1233 (struct test_membarrier_thread_args
*)arg
;
1234 const int iters
= opt_reps
;
1237 if (rseq_register_current_thread()) {
1238 fprintf(stderr
, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1239 errno
, strerror(errno
));
1243 /* Wait for initialization. */
1244 while (!__atomic_load_n(&args
->percpu_list_ptr
, __ATOMIC_ACQUIRE
)) {}
1246 for (i
= 0; i
< iters
; ++i
) {
1250 int cpu
= get_current_cpu_id();
1252 ret
= rseq_offset_deref_addv(RSEQ_MO_RELAXED
, RSEQ_PERCPU
,
1253 &args
->percpu_list_ptr
,
1254 sizeof(struct percpu_list_entry
) * cpu
, 1, cpu
);
1255 } while (rseq_unlikely(ret
));
1258 if (rseq_unregister_current_thread()) {
1259 fprintf(stderr
, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1260 errno
, strerror(errno
));
1266 void test_membarrier_init_percpu_list(struct percpu_list
*list
)
1270 memset(list
, 0, sizeof(*list
));
1271 for (i
= 0; i
< CPU_SETSIZE
; i
++) {
1272 struct percpu_list_node
*node
;
1274 node
= malloc(sizeof(*node
));
1278 list
->c
[i
].head
= node
;
1282 void test_membarrier_free_percpu_list(struct percpu_list
*list
)
1286 for (i
= 0; i
< CPU_SETSIZE
; i
++)
1287 free(list
->c
[i
].head
);
1291 * The manager thread swaps per-cpu lists that worker threads see,
1292 * and validates that there are no unexpected modifications.
1294 void *test_membarrier_manager_thread(void *arg
)
1296 struct test_membarrier_thread_args
*args
=
1297 (struct test_membarrier_thread_args
*)arg
;
1298 struct percpu_list list_a
, list_b
;
1299 intptr_t expect_a
= 0, expect_b
= 0;
1300 int cpu_a
= 0, cpu_b
= 0;
1302 if (rseq_register_current_thread()) {
1303 fprintf(stderr
, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1304 errno
, strerror(errno
));
1309 test_membarrier_init_percpu_list(&list_a
);
1310 test_membarrier_init_percpu_list(&list_b
);
1312 __atomic_store_n(&args
->percpu_list_ptr
, (intptr_t)&list_a
, __ATOMIC_RELEASE
);
1314 while (!__atomic_load_n(&args
->stop
, __ATOMIC_ACQUIRE
)) {
1315 /* list_a is "active". */
1316 cpu_a
= rand() % CPU_SETSIZE
;
1318 * As list_b is "inactive", we should never see changes
1321 if (expect_b
!= __atomic_load_n(&list_b
.c
[cpu_b
].head
->data
, __ATOMIC_ACQUIRE
)) {
1322 fprintf(stderr
, "Membarrier test failed\n");
1326 /* Make list_b "active". */
1327 __atomic_store_n(&args
->percpu_list_ptr
, (intptr_t)&list_b
, __ATOMIC_RELEASE
);
1328 if (rseq_membarrier_expedited(cpu_a
) &&
1329 errno
!= ENXIO
/* missing CPU */) {
1330 perror("sys_membarrier");
1334 * Cpu A should now only modify list_b, so the values
1335 * in list_a should be stable.
1337 expect_a
= __atomic_load_n(&list_a
.c
[cpu_a
].head
->data
, __ATOMIC_ACQUIRE
);
1339 cpu_b
= rand() % CPU_SETSIZE
;
1341 * As list_a is "inactive", we should never see changes
1344 if (expect_a
!= __atomic_load_n(&list_a
.c
[cpu_a
].head
->data
, __ATOMIC_ACQUIRE
)) {
1345 fprintf(stderr
, "Membarrier test failed\n");
1349 /* Make list_a "active". */
1350 __atomic_store_n(&args
->percpu_list_ptr
, (intptr_t)&list_a
, __ATOMIC_RELEASE
);
1351 if (rseq_membarrier_expedited(cpu_b
) &&
1352 errno
!= ENXIO
/* missing CPU*/) {
1353 perror("sys_membarrier");
1356 /* Remember a value from list_b. */
1357 expect_b
= __atomic_load_n(&list_b
.c
[cpu_b
].head
->data
, __ATOMIC_ACQUIRE
);
1360 test_membarrier_free_percpu_list(&list_a
);
1361 test_membarrier_free_percpu_list(&list_b
);
1363 if (rseq_unregister_current_thread()) {
1364 fprintf(stderr
, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1365 errno
, strerror(errno
));
1371 void test_membarrier(void)
1373 const int num_threads
= opt_threads
;
1374 struct test_membarrier_thread_args thread_args
;
1375 pthread_t worker_threads
[num_threads
];
1376 pthread_t manager_thread
;
1379 if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ
, 0, 0)) {
1380 perror("sys_membarrier");
1384 thread_args
.stop
= 0;
1385 thread_args
.percpu_list_ptr
= 0;
1386 ret
= pthread_create(&manager_thread
, NULL
,
1387 test_membarrier_manager_thread
, &thread_args
);
1390 perror("pthread_create");
1394 for (i
= 0; i
< num_threads
; i
++) {
1395 ret
= pthread_create(&worker_threads
[i
], NULL
,
1396 test_membarrier_worker_thread
, &thread_args
);
1399 perror("pthread_create");
1405 for (i
= 0; i
< num_threads
; i
++) {
1406 ret
= pthread_join(worker_threads
[i
], NULL
);
1409 perror("pthread_join");
1414 __atomic_store_n(&thread_args
.stop
, 1, __ATOMIC_RELEASE
);
1415 ret
= pthread_join(manager_thread
, NULL
);
1418 perror("pthread_join");
1422 #else /* TEST_MEMBARRIER */
1423 void test_membarrier(void)
1425 fprintf(stderr
, "rseq_offset_deref_addv is not implemented on this architecture. "
1426 "Skipping membarrier test.\n");
1430 static void show_usage(int argc
, char **argv
)
1432 printf("Usage : %s <OPTIONS>\n",
1434 printf("OPTIONS:\n");
1435 printf(" [-1 loops] Number of loops for delay injection 1\n");
1436 printf(" [-2 loops] Number of loops for delay injection 2\n");
1437 printf(" [-3 loops] Number of loops for delay injection 3\n");
1438 printf(" [-4 loops] Number of loops for delay injection 4\n");
1439 printf(" [-5 loops] Number of loops for delay injection 5\n");
1440 printf(" [-6 loops] Number of loops for delay injection 6\n");
1441 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1442 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1443 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1444 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1445 printf(" [-y] Yield\n");
1446 printf(" [-k] Kill thread with signal\n");
1447 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1448 printf(" [-t N] Number of threads (default 200)\n");
1449 printf(" [-r N] Number of repetitions per thread (default 5000)\n");
1450 printf(" [-d] Disable rseq system call (no initialization)\n");
1451 printf(" [-D M] Disable rseq for each M threads\n");
1452 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
1453 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
1454 printf(" [-v] Verbose output.\n");
1455 printf(" [-h] Show this help.\n");
1459 int main(int argc
, char **argv
)
1463 for (i
= 1; i
< argc
; i
++) {
1464 if (argv
[i
][0] != '-')
1466 switch (argv
[i
][1]) {
1477 show_usage(argc
, argv
);
1480 loop_cnt
[argv
[i
][1] - '0'] = atol(argv
[i
+ 1]);
1485 show_usage(argc
, argv
);
1488 opt_modulo
= atol(argv
[i
+ 1]);
1489 if (opt_modulo
< 0) {
1490 show_usage(argc
, argv
);
1497 show_usage(argc
, argv
);
1500 opt_sleep
= atol(argv
[i
+ 1]);
1501 if (opt_sleep
< 0) {
1502 show_usage(argc
, argv
);
1514 opt_disable_rseq
= 1;
1518 show_usage(argc
, argv
);
1521 opt_disable_mod
= atol(argv
[i
+ 1]);
1522 if (opt_disable_mod
< 0) {
1523 show_usage(argc
, argv
);
1530 show_usage(argc
, argv
);
1533 opt_threads
= atol(argv
[i
+ 1]);
1534 if (opt_threads
< 0) {
1535 show_usage(argc
, argv
);
1542 show_usage(argc
, argv
);
1545 opt_reps
= atoll(argv
[i
+ 1]);
1547 show_usage(argc
, argv
);
1553 show_usage(argc
, argv
);
1557 show_usage(argc
, argv
);
1560 opt_test
= *argv
[i
+ 1];
1570 show_usage(argc
, argv
);
1579 opt_mo
= RSEQ_MO_RELEASE
;
1582 show_usage(argc
, argv
);
1587 loop_cnt_1
= loop_cnt
[1];
1588 loop_cnt_2
= loop_cnt
[2];
1589 loop_cnt_3
= loop_cnt
[3];
1590 loop_cnt_4
= loop_cnt
[4];
1591 loop_cnt_5
= loop_cnt
[5];
1592 loop_cnt_6
= loop_cnt
[6];
1594 if (set_signal_handler())
1597 if (!opt_disable_rseq
&& rseq_register_current_thread())
1599 if (!opt_disable_rseq
&& !rseq_validate_cpu_id()) {
1600 fprintf(stderr
, "Error: cpu id getter unavailable\n");
1605 printf_verbose("spinlock\n");
1606 test_percpu_spinlock();
1609 printf_verbose("linked list\n");
1613 printf_verbose("buffer\n");
1614 test_percpu_buffer();
1617 printf_verbose("memcpy buffer\n");
1618 test_percpu_memcpy_buffer();
1621 printf_verbose("counter increment\n");
1625 printf_verbose("membarrier\n");
1629 if (!opt_disable_rseq
&& rseq_unregister_current_thread())