1 // SPDX-License-Identifier: LGPL-2.1
13 #include <sys/types.h>
18 static inline pid_t
rseq_gettid(void)
20 return syscall(__NR_gettid
);
24 static int loop_cnt
[NR_INJECT
+ 1];
26 static int loop_cnt_1
asm("asm_loop_cnt_1") __attribute__((used
));
27 static int loop_cnt_2
asm("asm_loop_cnt_2") __attribute__((used
));
28 static int loop_cnt_3
asm("asm_loop_cnt_3") __attribute__((used
));
29 static int loop_cnt_4
asm("asm_loop_cnt_4") __attribute__((used
));
30 static int loop_cnt_5
asm("asm_loop_cnt_5") __attribute__((used
));
31 static int loop_cnt_6
asm("asm_loop_cnt_6") __attribute__((used
));
33 static int opt_modulo
, verbose
;
35 static int opt_yield
, opt_signal
, opt_sleep
,
36 opt_disable_rseq
, opt_threads
= 200,
37 opt_disable_mod
= 0, opt_test
= 's', opt_mb
= 0;
39 #ifndef RSEQ_SKIP_FASTPATH
40 static long long opt_reps
= 5000;
42 static long long opt_reps
= 100;
45 static __thread
__attribute__((tls_model("initial-exec")))
46 unsigned int signals_delivered
;
50 static __thread
__attribute__((tls_model("initial-exec"), unused
))
51 unsigned int yield_mod_cnt
, nr_abort
;
53 #define printf_verbose(fmt, ...) \
56 printf(fmt, ## __VA_ARGS__); \
61 #define INJECT_ASM_REG "eax"
63 #define RSEQ_INJECT_CLOBBER \
66 #define RSEQ_INJECT_ASM(n) \
67 "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
68 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
71 "dec %%" INJECT_ASM_REG "\n\t" \
75 #elif defined(__x86_64__)
77 #define INJECT_ASM_REG_P "rax"
78 #define INJECT_ASM_REG "eax"
80 #define RSEQ_INJECT_CLOBBER \
84 #define RSEQ_INJECT_ASM(n) \
85 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
86 "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
87 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
90 "dec %%" INJECT_ASM_REG "\n\t" \
94 #elif defined(__s390__)
96 #define RSEQ_INJECT_INPUT \
97 , [loop_cnt_1]"m"(loop_cnt[1]) \
98 , [loop_cnt_2]"m"(loop_cnt[2]) \
99 , [loop_cnt_3]"m"(loop_cnt[3]) \
100 , [loop_cnt_4]"m"(loop_cnt[4]) \
101 , [loop_cnt_5]"m"(loop_cnt[5]) \
102 , [loop_cnt_6]"m"(loop_cnt[6])
104 #define INJECT_ASM_REG "r12"
106 #define RSEQ_INJECT_CLOBBER \
109 #define RSEQ_INJECT_ASM(n) \
110 "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
111 "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
114 "ahi %%" INJECT_ASM_REG ", -1\n\t" \
118 #elif defined(__ARMEL__)
120 #define RSEQ_INJECT_INPUT \
121 , [loop_cnt_1]"m"(loop_cnt[1]) \
122 , [loop_cnt_2]"m"(loop_cnt[2]) \
123 , [loop_cnt_3]"m"(loop_cnt[3]) \
124 , [loop_cnt_4]"m"(loop_cnt[4]) \
125 , [loop_cnt_5]"m"(loop_cnt[5]) \
126 , [loop_cnt_6]"m"(loop_cnt[6])
128 #define INJECT_ASM_REG "r4"
130 #define RSEQ_INJECT_CLOBBER \
133 #define RSEQ_INJECT_ASM(n) \
134 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
135 "cmp " INJECT_ASM_REG ", #0\n\t" \
138 "subs " INJECT_ASM_REG ", #1\n\t" \
142 #elif defined(__AARCH64EL__)
144 #define RSEQ_INJECT_INPUT \
145 , [loop_cnt_1] "Qo" (loop_cnt[1]) \
146 , [loop_cnt_2] "Qo" (loop_cnt[2]) \
147 , [loop_cnt_3] "Qo" (loop_cnt[3]) \
148 , [loop_cnt_4] "Qo" (loop_cnt[4]) \
149 , [loop_cnt_5] "Qo" (loop_cnt[5]) \
150 , [loop_cnt_6] "Qo" (loop_cnt[6])
152 #define INJECT_ASM_REG RSEQ_ASM_TMP_REG32
154 #define RSEQ_INJECT_ASM(n) \
155 " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \
156 " cbz " INJECT_ASM_REG ", 333f\n" \
158 " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \
159 " cbnz " INJECT_ASM_REG ", 222b\n" \
164 #define RSEQ_INJECT_INPUT \
165 , [loop_cnt_1]"m"(loop_cnt[1]) \
166 , [loop_cnt_2]"m"(loop_cnt[2]) \
167 , [loop_cnt_3]"m"(loop_cnt[3]) \
168 , [loop_cnt_4]"m"(loop_cnt[4]) \
169 , [loop_cnt_5]"m"(loop_cnt[5]) \
170 , [loop_cnt_6]"m"(loop_cnt[6])
172 #define INJECT_ASM_REG "r18"
174 #define RSEQ_INJECT_CLOBBER \
177 #define RSEQ_INJECT_ASM(n) \
178 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
179 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
182 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
186 #elif defined(__mips__)
188 #define RSEQ_INJECT_INPUT \
189 , [loop_cnt_1]"m"(loop_cnt[1]) \
190 , [loop_cnt_2]"m"(loop_cnt[2]) \
191 , [loop_cnt_3]"m"(loop_cnt[3]) \
192 , [loop_cnt_4]"m"(loop_cnt[4]) \
193 , [loop_cnt_5]"m"(loop_cnt[5]) \
194 , [loop_cnt_6]"m"(loop_cnt[6])
196 #define INJECT_ASM_REG "$5"
198 #define RSEQ_INJECT_CLOBBER \
201 #define RSEQ_INJECT_ASM(n) \
202 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
203 "beqz " INJECT_ASM_REG ", 333f\n\t" \
205 "addiu " INJECT_ASM_REG ", -1\n\t" \
206 "bnez " INJECT_ASM_REG ", 222b\n\t" \
210 #error unsupported target
213 #define RSEQ_INJECT_FAILED \
216 #define RSEQ_INJECT_C(n) \
218 int loc_i, loc_nr_loops = loop_cnt[n]; \
220 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
223 if (loc_nr_loops == -1 && opt_modulo) { \
224 if (yield_mod_cnt == opt_modulo - 1) { \
226 poll(NULL, 0, opt_sleep); \
240 #define printf_verbose(fmt, ...)
242 #endif /* BENCHMARK */
246 struct percpu_lock_entry
{
248 } __attribute__((aligned(128)));
251 struct percpu_lock_entry c
[CPU_SETSIZE
];
254 struct test_data_entry
{
256 } __attribute__((aligned(128)));
258 struct spinlock_test_data
{
259 struct percpu_lock lock
;
260 struct test_data_entry c
[CPU_SETSIZE
];
263 struct spinlock_thread_test_data
{
264 struct spinlock_test_data
*data
;
269 struct inc_test_data
{
270 struct test_data_entry c
[CPU_SETSIZE
];
273 struct inc_thread_test_data
{
274 struct inc_test_data
*data
;
279 struct percpu_list_node
{
281 struct percpu_list_node
*next
;
284 struct percpu_list_entry
{
285 struct percpu_list_node
*head
;
286 } __attribute__((aligned(128)));
289 struct percpu_list_entry c
[CPU_SETSIZE
];
292 #define BUFFER_ITEM_PER_CPU 100
294 struct percpu_buffer_node
{
298 struct percpu_buffer_entry
{
301 struct percpu_buffer_node
**array
;
302 } __attribute__((aligned(128)));
304 struct percpu_buffer
{
305 struct percpu_buffer_entry c
[CPU_SETSIZE
];
308 #define MEMCPY_BUFFER_ITEM_PER_CPU 100
310 struct percpu_memcpy_buffer_node
{
315 struct percpu_memcpy_buffer_entry
{
318 struct percpu_memcpy_buffer_node
*array
;
319 } __attribute__((aligned(128)));
321 struct percpu_memcpy_buffer
{
322 struct percpu_memcpy_buffer_entry c
[CPU_SETSIZE
];
325 /* A simple percpu spinlock. Grabs lock on current cpu. */
326 static int rseq_this_cpu_lock(struct percpu_lock
*lock
)
333 cpu
= rseq_cpu_start();
334 ret
= rseq_cmpeqv_storev(&lock
->c
[cpu
].v
,
336 if (rseq_likely(!ret
))
338 /* Retry if comparison fails or rseq aborts. */
341 * Acquire semantic when taking lock after control dependency.
342 * Matches rseq_smp_store_release().
344 rseq_smp_acquire__after_ctrl_dep();
348 static void rseq_percpu_unlock(struct percpu_lock
*lock
, int cpu
)
350 assert(lock
->c
[cpu
].v
== 1);
352 * Release lock, with release semantic. Matches
353 * rseq_smp_acquire__after_ctrl_dep().
355 rseq_smp_store_release(&lock
->c
[cpu
].v
, 0);
358 void *test_percpu_spinlock_thread(void *arg
)
360 struct spinlock_thread_test_data
*thread_data
= arg
;
361 struct spinlock_test_data
*data
= thread_data
->data
;
364 if (!opt_disable_rseq
&& thread_data
->reg
&&
365 rseq_register_current_thread())
367 reps
= thread_data
->reps
;
368 for (i
= 0; i
< reps
; i
++) {
369 int cpu
= rseq_cpu_start();
371 cpu
= rseq_this_cpu_lock(&data
->lock
);
372 data
->c
[cpu
].count
++;
373 rseq_percpu_unlock(&data
->lock
, cpu
);
375 if (i
!= 0 && !(i
% (reps
/ 10)))
376 printf_verbose("tid %d: count %lld\n",
377 (int) rseq_gettid(), i
);
380 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
381 (int) rseq_gettid(), nr_abort
, signals_delivered
);
382 if (!opt_disable_rseq
&& thread_data
->reg
&&
383 rseq_unregister_current_thread())
389 * A simple test which implements a sharded counter using a per-cpu
390 * lock. Obviously real applications might prefer to simply use a
391 * per-cpu increment; however, this is reasonable for a test and the
392 * lock can be extended to synchronize more complicated operations.
394 void test_percpu_spinlock(void)
396 const int num_threads
= opt_threads
;
399 pthread_t test_threads
[num_threads
];
400 struct spinlock_test_data data
;
401 struct spinlock_thread_test_data thread_data
[num_threads
];
403 memset(&data
, 0, sizeof(data
));
404 for (i
= 0; i
< num_threads
; i
++) {
405 thread_data
[i
].reps
= opt_reps
;
406 if (opt_disable_mod
<= 0 || (i
% opt_disable_mod
))
407 thread_data
[i
].reg
= 1;
409 thread_data
[i
].reg
= 0;
410 thread_data
[i
].data
= &data
;
411 ret
= pthread_create(&test_threads
[i
], NULL
,
412 test_percpu_spinlock_thread
,
416 perror("pthread_create");
421 for (i
= 0; i
< num_threads
; i
++) {
422 ret
= pthread_join(test_threads
[i
], NULL
);
425 perror("pthread_join");
431 for (i
= 0; i
< CPU_SETSIZE
; i
++)
432 sum
+= data
.c
[i
].count
;
434 assert(sum
== (uint64_t)opt_reps
* num_threads
);
437 void *test_percpu_inc_thread(void *arg
)
439 struct inc_thread_test_data
*thread_data
= arg
;
440 struct inc_test_data
*data
= thread_data
->data
;
443 if (!opt_disable_rseq
&& thread_data
->reg
&&
444 rseq_register_current_thread())
446 reps
= thread_data
->reps
;
447 for (i
= 0; i
< reps
; i
++) {
453 cpu
= rseq_cpu_start();
454 ret
= rseq_addv(&data
->c
[cpu
].count
, 1, cpu
);
455 } while (rseq_unlikely(ret
));
457 if (i
!= 0 && !(i
% (reps
/ 10)))
458 printf_verbose("tid %d: count %lld\n",
459 (int) rseq_gettid(), i
);
462 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
463 (int) rseq_gettid(), nr_abort
, signals_delivered
);
464 if (!opt_disable_rseq
&& thread_data
->reg
&&
465 rseq_unregister_current_thread())
470 void test_percpu_inc(void)
472 const int num_threads
= opt_threads
;
475 pthread_t test_threads
[num_threads
];
476 struct inc_test_data data
;
477 struct inc_thread_test_data thread_data
[num_threads
];
479 memset(&data
, 0, sizeof(data
));
480 for (i
= 0; i
< num_threads
; i
++) {
481 thread_data
[i
].reps
= opt_reps
;
482 if (opt_disable_mod
<= 0 || (i
% opt_disable_mod
))
483 thread_data
[i
].reg
= 1;
485 thread_data
[i
].reg
= 0;
486 thread_data
[i
].data
= &data
;
487 ret
= pthread_create(&test_threads
[i
], NULL
,
488 test_percpu_inc_thread
,
492 perror("pthread_create");
497 for (i
= 0; i
< num_threads
; i
++) {
498 ret
= pthread_join(test_threads
[i
], NULL
);
501 perror("pthread_join");
507 for (i
= 0; i
< CPU_SETSIZE
; i
++)
508 sum
+= data
.c
[i
].count
;
510 assert(sum
== (uint64_t)opt_reps
* num_threads
);
513 void this_cpu_list_push(struct percpu_list
*list
,
514 struct percpu_list_node
*node
,
520 intptr_t *targetptr
, newval
, expect
;
523 cpu
= rseq_cpu_start();
524 /* Load list->c[cpu].head with single-copy atomicity. */
525 expect
= (intptr_t)RSEQ_READ_ONCE(list
->c
[cpu
].head
);
526 newval
= (intptr_t)node
;
527 targetptr
= (intptr_t *)&list
->c
[cpu
].head
;
528 node
->next
= (struct percpu_list_node
*)expect
;
529 ret
= rseq_cmpeqv_storev(targetptr
, expect
, newval
, cpu
);
530 if (rseq_likely(!ret
))
532 /* Retry if comparison fails or rseq aborts. */
539 * Unlike a traditional lock-less linked list; the availability of a
540 * rseq primitive allows us to implement pop without concerns over
543 struct percpu_list_node
*this_cpu_list_pop(struct percpu_list
*list
,
546 struct percpu_list_node
*node
= NULL
;
550 struct percpu_list_node
*head
;
551 intptr_t *targetptr
, expectnot
, *load
;
555 cpu
= rseq_cpu_start();
556 targetptr
= (intptr_t *)&list
->c
[cpu
].head
;
557 expectnot
= (intptr_t)NULL
;
558 offset
= offsetof(struct percpu_list_node
, next
);
559 load
= (intptr_t *)&head
;
560 ret
= rseq_cmpnev_storeoffp_load(targetptr
, expectnot
,
562 if (rseq_likely(!ret
)) {
568 /* Retry if rseq aborts. */
576 * __percpu_list_pop is not safe against concurrent accesses. Should
577 * only be used on lists that are not concurrently modified.
579 struct percpu_list_node
*__percpu_list_pop(struct percpu_list
*list
, int cpu
)
581 struct percpu_list_node
*node
;
583 node
= list
->c
[cpu
].head
;
586 list
->c
[cpu
].head
= node
->next
;
590 void *test_percpu_list_thread(void *arg
)
593 struct percpu_list
*list
= (struct percpu_list
*)arg
;
595 if (!opt_disable_rseq
&& rseq_register_current_thread())
599 for (i
= 0; i
< reps
; i
++) {
600 struct percpu_list_node
*node
;
602 node
= this_cpu_list_pop(list
, NULL
);
604 sched_yield(); /* encourage shuffling */
606 this_cpu_list_push(list
, node
, NULL
);
609 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
610 (int) rseq_gettid(), nr_abort
, signals_delivered
);
611 if (!opt_disable_rseq
&& rseq_unregister_current_thread())
617 /* Simultaneous modification to a per-cpu linked list from many threads. */
618 void test_percpu_list(void)
620 const int num_threads
= opt_threads
;
622 uint64_t sum
= 0, expected_sum
= 0;
623 struct percpu_list list
;
624 pthread_t test_threads
[num_threads
];
625 cpu_set_t allowed_cpus
;
627 memset(&list
, 0, sizeof(list
));
629 /* Generate list entries for every usable cpu. */
630 sched_getaffinity(0, sizeof(allowed_cpus
), &allowed_cpus
);
631 for (i
= 0; i
< CPU_SETSIZE
; i
++) {
632 if (!CPU_ISSET(i
, &allowed_cpus
))
634 for (j
= 1; j
<= 100; j
++) {
635 struct percpu_list_node
*node
;
639 node
= malloc(sizeof(*node
));
642 node
->next
= list
.c
[i
].head
;
643 list
.c
[i
].head
= node
;
647 for (i
= 0; i
< num_threads
; i
++) {
648 ret
= pthread_create(&test_threads
[i
], NULL
,
649 test_percpu_list_thread
, &list
);
652 perror("pthread_create");
657 for (i
= 0; i
< num_threads
; i
++) {
658 ret
= pthread_join(test_threads
[i
], NULL
);
661 perror("pthread_join");
666 for (i
= 0; i
< CPU_SETSIZE
; i
++) {
667 struct percpu_list_node
*node
;
669 if (!CPU_ISSET(i
, &allowed_cpus
))
672 while ((node
= __percpu_list_pop(&list
, i
))) {
679 * All entries should now be accounted for (unless some external
680 * actor is interfering with our allowed affinity while this
683 assert(sum
== expected_sum
);
686 bool this_cpu_buffer_push(struct percpu_buffer
*buffer
,
687 struct percpu_buffer_node
*node
,
694 intptr_t *targetptr_spec
, newval_spec
;
695 intptr_t *targetptr_final
, newval_final
;
699 cpu
= rseq_cpu_start();
700 offset
= RSEQ_READ_ONCE(buffer
->c
[cpu
].offset
);
701 if (offset
== buffer
->c
[cpu
].buflen
)
703 newval_spec
= (intptr_t)node
;
704 targetptr_spec
= (intptr_t *)&buffer
->c
[cpu
].array
[offset
];
705 newval_final
= offset
+ 1;
706 targetptr_final
= &buffer
->c
[cpu
].offset
;
708 ret
= rseq_cmpeqv_trystorev_storev_release(
709 targetptr_final
, offset
, targetptr_spec
,
710 newval_spec
, newval_final
, cpu
);
712 ret
= rseq_cmpeqv_trystorev_storev(targetptr_final
,
713 offset
, targetptr_spec
, newval_spec
,
715 if (rseq_likely(!ret
)) {
719 /* Retry if comparison fails or rseq aborts. */
726 struct percpu_buffer_node
*this_cpu_buffer_pop(struct percpu_buffer
*buffer
,
729 struct percpu_buffer_node
*head
;
733 intptr_t *targetptr
, newval
;
737 cpu
= rseq_cpu_start();
738 /* Load offset with single-copy atomicity. */
739 offset
= RSEQ_READ_ONCE(buffer
->c
[cpu
].offset
);
744 head
= RSEQ_READ_ONCE(buffer
->c
[cpu
].array
[offset
- 1]);
746 targetptr
= (intptr_t *)&buffer
->c
[cpu
].offset
;
747 ret
= rseq_cmpeqv_cmpeqv_storev(targetptr
, offset
,
748 (intptr_t *)&buffer
->c
[cpu
].array
[offset
- 1],
749 (intptr_t)head
, newval
, cpu
);
750 if (rseq_likely(!ret
))
752 /* Retry if comparison fails or rseq aborts. */
760 * __percpu_buffer_pop is not safe against concurrent accesses. Should
761 * only be used on buffers that are not concurrently modified.
763 struct percpu_buffer_node
*__percpu_buffer_pop(struct percpu_buffer
*buffer
,
766 struct percpu_buffer_node
*head
;
769 offset
= buffer
->c
[cpu
].offset
;
772 head
= buffer
->c
[cpu
].array
[offset
- 1];
773 buffer
->c
[cpu
].offset
= offset
- 1;
777 void *test_percpu_buffer_thread(void *arg
)
780 struct percpu_buffer
*buffer
= (struct percpu_buffer
*)arg
;
782 if (!opt_disable_rseq
&& rseq_register_current_thread())
786 for (i
= 0; i
< reps
; i
++) {
787 struct percpu_buffer_node
*node
;
789 node
= this_cpu_buffer_pop(buffer
, NULL
);
791 sched_yield(); /* encourage shuffling */
793 if (!this_cpu_buffer_push(buffer
, node
, NULL
)) {
794 /* Should increase buffer size. */
800 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
801 (int) rseq_gettid(), nr_abort
, signals_delivered
);
802 if (!opt_disable_rseq
&& rseq_unregister_current_thread())
808 /* Simultaneous modification to a per-cpu buffer from many threads. */
809 void test_percpu_buffer(void)
811 const int num_threads
= opt_threads
;
813 uint64_t sum
= 0, expected_sum
= 0;
814 struct percpu_buffer buffer
;
815 pthread_t test_threads
[num_threads
];
816 cpu_set_t allowed_cpus
;
818 memset(&buffer
, 0, sizeof(buffer
));
820 /* Generate list entries for every usable cpu. */
821 sched_getaffinity(0, sizeof(allowed_cpus
), &allowed_cpus
);
822 for (i
= 0; i
< CPU_SETSIZE
; i
++) {
823 if (!CPU_ISSET(i
, &allowed_cpus
))
825 /* Worse-case is every item in same CPU. */
827 malloc(sizeof(*buffer
.c
[i
].array
) * CPU_SETSIZE
*
828 BUFFER_ITEM_PER_CPU
);
829 assert(buffer
.c
[i
].array
);
830 buffer
.c
[i
].buflen
= CPU_SETSIZE
* BUFFER_ITEM_PER_CPU
;
831 for (j
= 1; j
<= BUFFER_ITEM_PER_CPU
; j
++) {
832 struct percpu_buffer_node
*node
;
837 * We could theoretically put the word-sized
838 * "data" directly in the buffer. However, we
839 * want to model objects that would not fit
840 * within a single word, so allocate an object
843 node
= malloc(sizeof(*node
));
846 buffer
.c
[i
].array
[j
- 1] = node
;
847 buffer
.c
[i
].offset
++;
851 for (i
= 0; i
< num_threads
; i
++) {
852 ret
= pthread_create(&test_threads
[i
], NULL
,
853 test_percpu_buffer_thread
, &buffer
);
856 perror("pthread_create");
861 for (i
= 0; i
< num_threads
; i
++) {
862 ret
= pthread_join(test_threads
[i
], NULL
);
865 perror("pthread_join");
870 for (i
= 0; i
< CPU_SETSIZE
; i
++) {
871 struct percpu_buffer_node
*node
;
873 if (!CPU_ISSET(i
, &allowed_cpus
))
876 while ((node
= __percpu_buffer_pop(&buffer
, i
))) {
880 free(buffer
.c
[i
].array
);
884 * All entries should now be accounted for (unless some external
885 * actor is interfering with our allowed affinity while this
888 assert(sum
== expected_sum
);
891 bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer
*buffer
,
892 struct percpu_memcpy_buffer_node item
,
899 intptr_t *targetptr_final
, newval_final
, offset
;
900 char *destptr
, *srcptr
;
904 cpu
= rseq_cpu_start();
905 /* Load offset with single-copy atomicity. */
906 offset
= RSEQ_READ_ONCE(buffer
->c
[cpu
].offset
);
907 if (offset
== buffer
->c
[cpu
].buflen
)
909 destptr
= (char *)&buffer
->c
[cpu
].array
[offset
];
910 srcptr
= (char *)&item
;
911 /* copylen must be <= 4kB. */
912 copylen
= sizeof(item
);
913 newval_final
= offset
+ 1;
914 targetptr_final
= &buffer
->c
[cpu
].offset
;
916 ret
= rseq_cmpeqv_trymemcpy_storev_release(
917 targetptr_final
, offset
,
918 destptr
, srcptr
, copylen
,
921 ret
= rseq_cmpeqv_trymemcpy_storev(targetptr_final
,
922 offset
, destptr
, srcptr
, copylen
,
924 if (rseq_likely(!ret
)) {
928 /* Retry if comparison fails or rseq aborts. */
935 bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer
*buffer
,
936 struct percpu_memcpy_buffer_node
*item
,
943 intptr_t *targetptr_final
, newval_final
, offset
;
944 char *destptr
, *srcptr
;
948 cpu
= rseq_cpu_start();
949 /* Load offset with single-copy atomicity. */
950 offset
= RSEQ_READ_ONCE(buffer
->c
[cpu
].offset
);
953 destptr
= (char *)item
;
954 srcptr
= (char *)&buffer
->c
[cpu
].array
[offset
- 1];
955 /* copylen must be <= 4kB. */
956 copylen
= sizeof(*item
);
957 newval_final
= offset
- 1;
958 targetptr_final
= &buffer
->c
[cpu
].offset
;
959 ret
= rseq_cmpeqv_trymemcpy_storev(targetptr_final
,
960 offset
, destptr
, srcptr
, copylen
,
962 if (rseq_likely(!ret
)) {
966 /* Retry if comparison fails or rseq aborts. */
974 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
975 * only be used on buffers that are not concurrently modified.
977 bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer
*buffer
,
978 struct percpu_memcpy_buffer_node
*item
,
983 offset
= buffer
->c
[cpu
].offset
;
986 memcpy(item
, &buffer
->c
[cpu
].array
[offset
- 1], sizeof(*item
));
987 buffer
->c
[cpu
].offset
= offset
- 1;
991 void *test_percpu_memcpy_buffer_thread(void *arg
)
994 struct percpu_memcpy_buffer
*buffer
= (struct percpu_memcpy_buffer
*)arg
;
996 if (!opt_disable_rseq
&& rseq_register_current_thread())
1000 for (i
= 0; i
< reps
; i
++) {
1001 struct percpu_memcpy_buffer_node item
;
1004 result
= this_cpu_memcpy_buffer_pop(buffer
, &item
, NULL
);
1006 sched_yield(); /* encourage shuffling */
1008 if (!this_cpu_memcpy_buffer_push(buffer
, item
, NULL
)) {
1009 /* Should increase buffer size. */
1015 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
1016 (int) rseq_gettid(), nr_abort
, signals_delivered
);
1017 if (!opt_disable_rseq
&& rseq_unregister_current_thread())
1023 /* Simultaneous modification to a per-cpu buffer from many threads. */
1024 void test_percpu_memcpy_buffer(void)
1026 const int num_threads
= opt_threads
;
1028 uint64_t sum
= 0, expected_sum
= 0;
1029 struct percpu_memcpy_buffer buffer
;
1030 pthread_t test_threads
[num_threads
];
1031 cpu_set_t allowed_cpus
;
1033 memset(&buffer
, 0, sizeof(buffer
));
1035 /* Generate list entries for every usable cpu. */
1036 sched_getaffinity(0, sizeof(allowed_cpus
), &allowed_cpus
);
1037 for (i
= 0; i
< CPU_SETSIZE
; i
++) {
1038 if (!CPU_ISSET(i
, &allowed_cpus
))
1040 /* Worse-case is every item in same CPU. */
1042 malloc(sizeof(*buffer
.c
[i
].array
) * CPU_SETSIZE
*
1043 MEMCPY_BUFFER_ITEM_PER_CPU
);
1044 assert(buffer
.c
[i
].array
);
1045 buffer
.c
[i
].buflen
= CPU_SETSIZE
* MEMCPY_BUFFER_ITEM_PER_CPU
;
1046 for (j
= 1; j
<= MEMCPY_BUFFER_ITEM_PER_CPU
; j
++) {
1047 expected_sum
+= 2 * j
+ 1;
1050 * We could theoretically put the word-sized
1051 * "data" directly in the buffer. However, we
1052 * want to model objects that would not fit
1053 * within a single word, so allocate an object
1056 buffer
.c
[i
].array
[j
- 1].data1
= j
;
1057 buffer
.c
[i
].array
[j
- 1].data2
= j
+ 1;
1058 buffer
.c
[i
].offset
++;
1062 for (i
= 0; i
< num_threads
; i
++) {
1063 ret
= pthread_create(&test_threads
[i
], NULL
,
1064 test_percpu_memcpy_buffer_thread
,
1068 perror("pthread_create");
1073 for (i
= 0; i
< num_threads
; i
++) {
1074 ret
= pthread_join(test_threads
[i
], NULL
);
1077 perror("pthread_join");
1082 for (i
= 0; i
< CPU_SETSIZE
; i
++) {
1083 struct percpu_memcpy_buffer_node item
;
1085 if (!CPU_ISSET(i
, &allowed_cpus
))
1088 while (__percpu_memcpy_buffer_pop(&buffer
, &item
, i
)) {
1092 free(buffer
.c
[i
].array
);
1096 * All entries should now be accounted for (unless some external
1097 * actor is interfering with our allowed affinity while this
1100 assert(sum
== expected_sum
);
1103 static void test_signal_interrupt_handler(int signo
)
1105 signals_delivered
++;
1108 static int set_signal_handler(void)
1111 struct sigaction sa
;
1114 ret
= sigemptyset(&sigset
);
1116 perror("sigemptyset");
1120 sa
.sa_handler
= test_signal_interrupt_handler
;
1121 sa
.sa_mask
= sigset
;
1123 ret
= sigaction(SIGUSR1
, &sa
, NULL
);
1125 perror("sigaction");
1129 printf_verbose("Signal handler set for SIGUSR1\n");
1134 static void show_usage(int argc
, char **argv
)
1136 printf("Usage : %s <OPTIONS>\n",
1138 printf("OPTIONS:\n");
1139 printf(" [-1 loops] Number of loops for delay injection 1\n");
1140 printf(" [-2 loops] Number of loops for delay injection 2\n");
1141 printf(" [-3 loops] Number of loops for delay injection 3\n");
1142 printf(" [-4 loops] Number of loops for delay injection 4\n");
1143 printf(" [-5 loops] Number of loops for delay injection 5\n");
1144 printf(" [-6 loops] Number of loops for delay injection 6\n");
1145 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1146 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1147 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1148 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1149 printf(" [-y] Yield\n");
1150 printf(" [-k] Kill thread with signal\n");
1151 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1152 printf(" [-t N] Number of threads (default 200)\n");
1153 printf(" [-r N] Number of repetitions per thread (default 5000)\n");
1154 printf(" [-d] Disable rseq system call (no initialization)\n");
1155 printf(" [-D M] Disable rseq for each M threads\n");
1156 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement\n");
1157 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
1158 printf(" [-v] Verbose output.\n");
1159 printf(" [-h] Show this help.\n");
1163 int main(int argc
, char **argv
)
1167 for (i
= 1; i
< argc
; i
++) {
1168 if (argv
[i
][0] != '-')
1170 switch (argv
[i
][1]) {
1181 show_usage(argc
, argv
);
1184 loop_cnt
[argv
[i
][1] - '0'] = atol(argv
[i
+ 1]);
1189 show_usage(argc
, argv
);
1192 opt_modulo
= atol(argv
[i
+ 1]);
1193 if (opt_modulo
< 0) {
1194 show_usage(argc
, argv
);
1201 show_usage(argc
, argv
);
1204 opt_sleep
= atol(argv
[i
+ 1]);
1205 if (opt_sleep
< 0) {
1206 show_usage(argc
, argv
);
1218 opt_disable_rseq
= 1;
1222 show_usage(argc
, argv
);
1225 opt_disable_mod
= atol(argv
[i
+ 1]);
1226 if (opt_disable_mod
< 0) {
1227 show_usage(argc
, argv
);
1234 show_usage(argc
, argv
);
1237 opt_threads
= atol(argv
[i
+ 1]);
1238 if (opt_threads
< 0) {
1239 show_usage(argc
, argv
);
1246 show_usage(argc
, argv
);
1249 opt_reps
= atoll(argv
[i
+ 1]);
1251 show_usage(argc
, argv
);
1257 show_usage(argc
, argv
);
1261 show_usage(argc
, argv
);
1264 opt_test
= *argv
[i
+ 1];
1273 show_usage(argc
, argv
);
1285 show_usage(argc
, argv
);
1290 loop_cnt_1
= loop_cnt
[1];
1291 loop_cnt_2
= loop_cnt
[2];
1292 loop_cnt_3
= loop_cnt
[3];
1293 loop_cnt_4
= loop_cnt
[4];
1294 loop_cnt_5
= loop_cnt
[5];
1295 loop_cnt_6
= loop_cnt
[6];
1297 if (set_signal_handler())
1300 if (!opt_disable_rseq
&& rseq_register_current_thread())
1304 printf_verbose("spinlock\n");
1305 test_percpu_spinlock();
1308 printf_verbose("linked list\n");
1312 printf_verbose("buffer\n");
1313 test_percpu_buffer();
1316 printf_verbose("memcpy buffer\n");
1317 test_percpu_memcpy_buffer();
1320 printf_verbose("counter increment\n");
1324 if (!opt_disable_rseq
&& rseq_unregister_current_thread())