1 // SPDX-License-Identifier: GPL-2.0-only
3 * Stress userfaultfd syscall.
5 * Copyright (C) 2015 Red Hat, Inc.
7 * This test allocates two virtual areas and bounces the physical
8 * memory across the two virtual areas (from area_src to area_dst)
11 * There are three threads running per CPU:
13 * 1) one per-CPU thread takes a per-page pthread_mutex in a random
14 * page of the area_dst (while the physical page may still be in
15 * area_src), and increments a per-page counter in the same page,
16 * and checks its value against a verification region.
18 * 2) another per-CPU thread handles the userfaults generated by
19 * thread 1 above. userfaultfd blocking reads or poll() modes are
20 * exercised interleaved.
22 * 3) one last per-CPU thread transfers the memory in the background
23 * at maximum bandwidth (if not already transferred by thread
24 * 2). Each cpu thread takes cares of transferring a portion of the
27 * When all threads of type 3 completed the transfer, one bounce is
28 * complete. area_src and area_dst are then swapped. All threads are
29 * respawned and so the bounce is immediately restarted in the
32 * per-CPU threads 1 by triggering userfaults inside
33 * pthread_mutex_lock will also verify the atomicity of the memory
34 * transfer (UFFDIO_COPY).
42 #include <sys/types.h>
50 #include <sys/syscall.h>
51 #include <sys/ioctl.h>
54 #include <linux/userfaultfd.h>
58 #include "../kselftest.h"
60 #ifdef __NR_userfaultfd
62 static unsigned long nr_cpus
, nr_pages
, nr_pages_per_cpu
, page_size
;
64 #define BOUNCE_RANDOM (1<<0)
65 #define BOUNCE_RACINGFAULTS (1<<1)
66 #define BOUNCE_VERIFY (1<<2)
67 #define BOUNCE_POLL (1<<3)
71 #define TEST_HUGETLB 2
75 /* exercise the test_uffdio_*_eexist every ALARM_INTERVAL_SECS */
76 #define ALARM_INTERVAL_SECS 10
77 static volatile bool test_uffdio_copy_eexist
= true;
78 static volatile bool test_uffdio_zeropage_eexist
= true;
80 static bool map_shared
;
82 static char *huge_fd_off0
;
83 static unsigned long long *count_verify
;
84 static int uffd
, uffd_flags
, finished
, *pipefd
;
85 static char *area_src
, *area_src_alias
, *area_dst
, *area_dst_alias
;
86 static char *zeropage
;
89 /* pthread_mutex_t starts at page offset 0 */
90 #define area_mutex(___area, ___nr) \
91 ((pthread_mutex_t *) ((___area) + (___nr)*page_size))
93 * count is placed in the page after pthread_mutex_t naturally aligned
94 * to avoid non alignment faults on non-x86 archs.
96 #define area_count(___area, ___nr) \
97 ((volatile unsigned long long *) ((unsigned long) \
98 ((___area) + (___nr)*page_size + \
99 sizeof(pthread_mutex_t) + \
100 sizeof(unsigned long long) - 1) & \
101 ~(unsigned long)(sizeof(unsigned long long) \
104 const char *examples
=
105 "# Run anonymous memory test on 100MiB region with 99999 bounces:\n"
106 "./userfaultfd anon 100 99999\n\n"
107 "# Run share memory test on 1GiB region with 99 bounces:\n"
108 "./userfaultfd shmem 1000 99\n\n"
109 "# Run hugetlb memory test on 256MiB region with 50 bounces (using /dev/hugepages/hugefile):\n"
110 "./userfaultfd hugetlb 256 50 /dev/hugepages/hugefile\n\n"
111 "# Run the same hugetlb test but using shmem:\n"
112 "./userfaultfd hugetlb_shared 256 50 /dev/hugepages/hugefile\n\n"
113 "# 10MiB-~6GiB 999 bounces anonymous test, "
114 "continue forever unless an error triggers\n"
115 "while ./userfaultfd anon $[RANDOM % 6000 + 10] 999; do true; done\n\n";
117 static void usage(void)
119 fprintf(stderr
, "\nUsage: ./userfaultfd <test type> <MiB> <bounces> "
120 "[hugetlbfs_file]\n\n");
121 fprintf(stderr
, "Supported <test type>: anon, hugetlb, "
122 "hugetlb_shared, shmem\n\n");
123 fprintf(stderr
, "Examples:\n\n");
124 fprintf(stderr
, "%s", examples
);
128 static int anon_release_pages(char *rel_area
)
132 if (madvise(rel_area
, nr_pages
* page_size
, MADV_DONTNEED
)) {
140 static void anon_allocate_area(void **alloc_area
)
142 if (posix_memalign(alloc_area
, page_size
, nr_pages
* page_size
)) {
143 fprintf(stderr
, "out of memory\n");
148 static void noop_alias_mapping(__u64
*start
, size_t len
, unsigned long offset
)
153 static int hugetlb_release_pages(char *rel_area
)
157 if (fallocate(huge_fd
, FALLOC_FL_PUNCH_HOLE
| FALLOC_FL_KEEP_SIZE
,
158 rel_area
== huge_fd_off0
? 0 :
159 nr_pages
* page_size
,
160 nr_pages
* page_size
)) {
169 static void hugetlb_allocate_area(void **alloc_area
)
171 void *area_alias
= NULL
;
172 char **alloc_area_alias
;
173 *alloc_area
= mmap(NULL
, nr_pages
* page_size
, PROT_READ
| PROT_WRITE
,
174 (map_shared
? MAP_SHARED
: MAP_PRIVATE
) |
176 huge_fd
, *alloc_area
== area_src
? 0 :
177 nr_pages
* page_size
);
178 if (*alloc_area
== MAP_FAILED
) {
179 fprintf(stderr
, "mmap of hugetlbfs file failed\n");
184 area_alias
= mmap(NULL
, nr_pages
* page_size
, PROT_READ
| PROT_WRITE
,
185 MAP_SHARED
| MAP_HUGETLB
,
186 huge_fd
, *alloc_area
== area_src
? 0 :
187 nr_pages
* page_size
);
188 if (area_alias
== MAP_FAILED
) {
189 if (munmap(*alloc_area
, nr_pages
* page_size
) < 0)
190 perror("hugetlb munmap"), exit(1);
195 if (*alloc_area
== area_src
) {
196 huge_fd_off0
= *alloc_area
;
197 alloc_area_alias
= &area_src_alias
;
199 alloc_area_alias
= &area_dst_alias
;
202 *alloc_area_alias
= area_alias
;
205 static void hugetlb_alias_mapping(__u64
*start
, size_t len
, unsigned long offset
)
210 * We can't zap just the pagetable with hugetlbfs because
211 * MADV_DONTEED won't work. So exercise -EEXIST on a alias
212 * mapping where the pagetables are not established initially,
213 * this way we'll exercise the -EEXEC at the fs level.
215 *start
= (unsigned long) area_dst_alias
+ offset
;
219 static int shmem_release_pages(char *rel_area
)
223 if (madvise(rel_area
, nr_pages
* page_size
, MADV_REMOVE
)) {
231 static void shmem_allocate_area(void **alloc_area
)
233 *alloc_area
= mmap(NULL
, nr_pages
* page_size
, PROT_READ
| PROT_WRITE
,
234 MAP_ANONYMOUS
| MAP_SHARED
, -1, 0);
235 if (*alloc_area
== MAP_FAILED
) {
236 fprintf(stderr
, "shared memory mmap failed\n");
241 struct uffd_test_ops
{
242 unsigned long expected_ioctls
;
243 void (*allocate_area
)(void **alloc_area
);
244 int (*release_pages
)(char *rel_area
);
245 void (*alias_mapping
)(__u64
*start
, size_t len
, unsigned long offset
);
248 #define ANON_EXPECTED_IOCTLS ((1 << _UFFDIO_WAKE) | \
249 (1 << _UFFDIO_COPY) | \
250 (1 << _UFFDIO_ZEROPAGE))
252 static struct uffd_test_ops anon_uffd_test_ops
= {
253 .expected_ioctls
= ANON_EXPECTED_IOCTLS
,
254 .allocate_area
= anon_allocate_area
,
255 .release_pages
= anon_release_pages
,
256 .alias_mapping
= noop_alias_mapping
,
259 static struct uffd_test_ops shmem_uffd_test_ops
= {
260 .expected_ioctls
= ANON_EXPECTED_IOCTLS
,
261 .allocate_area
= shmem_allocate_area
,
262 .release_pages
= shmem_release_pages
,
263 .alias_mapping
= noop_alias_mapping
,
266 static struct uffd_test_ops hugetlb_uffd_test_ops
= {
267 .expected_ioctls
= UFFD_API_RANGE_IOCTLS_BASIC
,
268 .allocate_area
= hugetlb_allocate_area
,
269 .release_pages
= hugetlb_release_pages
,
270 .alias_mapping
= hugetlb_alias_mapping
,
273 static struct uffd_test_ops
*uffd_test_ops
;
275 static int my_bcmp(char *str1
, char *str2
, size_t n
)
278 for (i
= 0; i
< n
; i
++)
279 if (str1
[i
] != str2
[i
])
284 static void *locking_thread(void *arg
)
286 unsigned long cpu
= (unsigned long) arg
;
287 struct random_data rand
;
288 unsigned long page_nr
= *(&(page_nr
)); /* uninitialized warning */
290 unsigned long long count
;
295 if (bounces
& BOUNCE_RANDOM
) {
296 seed
= (unsigned int) time(NULL
) - bounces
;
297 if (!(bounces
& BOUNCE_RACINGFAULTS
))
299 bzero(&rand
, sizeof(rand
));
300 bzero(&randstate
, sizeof(randstate
));
301 if (initstate_r(seed
, randstate
, sizeof(randstate
), &rand
))
302 fprintf(stderr
, "srandom_r error\n"), exit(1);
305 if (!(bounces
& BOUNCE_RACINGFAULTS
))
306 page_nr
+= cpu
* nr_pages_per_cpu
;
310 if (bounces
& BOUNCE_RANDOM
) {
311 if (random_r(&rand
, &rand_nr
))
312 fprintf(stderr
, "random_r 1 error\n"), exit(1);
314 if (sizeof(page_nr
) > sizeof(rand_nr
)) {
315 if (random_r(&rand
, &rand_nr
))
316 fprintf(stderr
, "random_r 2 error\n"), exit(1);
317 page_nr
|= (((unsigned long) rand_nr
) << 16) <<
325 if (bounces
& BOUNCE_VERIFY
) {
326 count
= *area_count(area_dst
, page_nr
);
329 "page_nr %lu wrong count %Lu %Lu\n",
331 count_verify
[page_nr
]), exit(1);
335 * We can't use bcmp (or memcmp) because that
336 * returns 0 erroneously if the memory is
337 * changing under it (even if the end of the
338 * page is never changing and always
342 if (!my_bcmp(area_dst
+ page_nr
* page_size
, zeropage
,
345 "my_bcmp page_nr %lu wrong count %Lu %Lu\n",
347 count_verify
[page_nr
]), exit(1);
352 /* uncomment the below line to test with mutex */
353 /* pthread_mutex_lock(area_mutex(area_dst, page_nr)); */
354 while (!bcmp(area_dst
+ page_nr
* page_size
, zeropage
,
360 /* uncomment below line to test with mutex */
361 /* pthread_mutex_unlock(area_mutex(area_dst, page_nr)); */
364 "page_nr %lu all zero thread %lu %p %lu\n",
365 page_nr
, cpu
, area_dst
+ page_nr
* page_size
,
373 pthread_mutex_lock(area_mutex(area_dst
, page_nr
));
374 count
= *area_count(area_dst
, page_nr
);
375 if (count
!= count_verify
[page_nr
]) {
377 "page_nr %lu memory corruption %Lu %Lu\n",
379 count_verify
[page_nr
]), exit(1);
382 *area_count(area_dst
, page_nr
) = count_verify
[page_nr
] = count
;
383 pthread_mutex_unlock(area_mutex(area_dst
, page_nr
));
385 if (time(NULL
) - start
> 1)
387 "userfault too slow %ld "
388 "possible false positive with overcommit\n",
395 static void retry_copy_page(int ufd
, struct uffdio_copy
*uffdio_copy
,
396 unsigned long offset
)
398 uffd_test_ops
->alias_mapping(&uffdio_copy
->dst
,
401 if (ioctl(ufd
, UFFDIO_COPY
, uffdio_copy
)) {
402 /* real retval in ufdio_copy.copy */
403 if (uffdio_copy
->copy
!= -EEXIST
)
404 fprintf(stderr
, "UFFDIO_COPY retry error %Ld\n",
405 uffdio_copy
->copy
), exit(1);
407 fprintf(stderr
, "UFFDIO_COPY retry unexpected %Ld\n",
408 uffdio_copy
->copy
), exit(1);
412 static int __copy_page(int ufd
, unsigned long offset
, bool retry
)
414 struct uffdio_copy uffdio_copy
;
416 if (offset
>= nr_pages
* page_size
)
417 fprintf(stderr
, "unexpected offset %lu\n",
419 uffdio_copy
.dst
= (unsigned long) area_dst
+ offset
;
420 uffdio_copy
.src
= (unsigned long) area_src
+ offset
;
421 uffdio_copy
.len
= page_size
;
422 uffdio_copy
.mode
= 0;
423 uffdio_copy
.copy
= 0;
424 if (ioctl(ufd
, UFFDIO_COPY
, &uffdio_copy
)) {
425 /* real retval in ufdio_copy.copy */
426 if (uffdio_copy
.copy
!= -EEXIST
)
427 fprintf(stderr
, "UFFDIO_COPY error %Ld\n",
428 uffdio_copy
.copy
), exit(1);
429 } else if (uffdio_copy
.copy
!= page_size
) {
430 fprintf(stderr
, "UFFDIO_COPY unexpected copy %Ld\n",
431 uffdio_copy
.copy
), exit(1);
433 if (test_uffdio_copy_eexist
&& retry
) {
434 test_uffdio_copy_eexist
= false;
435 retry_copy_page(ufd
, &uffdio_copy
, offset
);
442 static int copy_page_retry(int ufd
, unsigned long offset
)
444 return __copy_page(ufd
, offset
, true);
447 static int copy_page(int ufd
, unsigned long offset
)
449 return __copy_page(ufd
, offset
, false);
452 static int uffd_read_msg(int ufd
, struct uffd_msg
*msg
)
454 int ret
= read(uffd
, msg
, sizeof(*msg
));
456 if (ret
!= sizeof(*msg
)) {
461 perror("blocking read error"), exit(1);
463 fprintf(stderr
, "short read\n"), exit(1);
470 /* Return 1 if page fault handled by us; otherwise 0 */
471 static int uffd_handle_page_fault(struct uffd_msg
*msg
)
473 unsigned long offset
;
475 if (msg
->event
!= UFFD_EVENT_PAGEFAULT
)
476 fprintf(stderr
, "unexpected msg event %u\n",
477 msg
->event
), exit(1);
479 if (bounces
& BOUNCE_VERIFY
&&
480 msg
->arg
.pagefault
.flags
& UFFD_PAGEFAULT_FLAG_WRITE
)
481 fprintf(stderr
, "unexpected write fault\n"), exit(1);
483 offset
= (char *)(unsigned long)msg
->arg
.pagefault
.address
- area_dst
;
484 offset
&= ~(page_size
-1);
486 return copy_page(uffd
, offset
);
489 static void *uffd_poll_thread(void *arg
)
491 unsigned long cpu
= (unsigned long) arg
;
492 struct pollfd pollfd
[2];
494 struct uffdio_register uffd_reg
;
497 unsigned long userfaults
= 0;
500 pollfd
[0].events
= POLLIN
;
501 pollfd
[1].fd
= pipefd
[cpu
*2];
502 pollfd
[1].events
= POLLIN
;
505 ret
= poll(pollfd
, 2, -1);
507 fprintf(stderr
, "poll error %d\n", ret
), exit(1);
509 perror("poll"), exit(1);
510 if (pollfd
[1].revents
& POLLIN
) {
511 if (read(pollfd
[1].fd
, &tmp_chr
, 1) != 1)
512 fprintf(stderr
, "read pipefd error\n"),
516 if (!(pollfd
[0].revents
& POLLIN
))
517 fprintf(stderr
, "pollfd[0].revents %d\n",
518 pollfd
[0].revents
), exit(1);
519 if (uffd_read_msg(uffd
, &msg
))
523 fprintf(stderr
, "unexpected msg event %u\n",
526 case UFFD_EVENT_PAGEFAULT
:
527 userfaults
+= uffd_handle_page_fault(&msg
);
529 case UFFD_EVENT_FORK
:
531 uffd
= msg
.arg
.fork
.ufd
;
534 case UFFD_EVENT_REMOVE
:
535 uffd_reg
.range
.start
= msg
.arg
.remove
.start
;
536 uffd_reg
.range
.len
= msg
.arg
.remove
.end
-
537 msg
.arg
.remove
.start
;
538 if (ioctl(uffd
, UFFDIO_UNREGISTER
, &uffd_reg
.range
))
539 fprintf(stderr
, "remove failure\n"), exit(1);
541 case UFFD_EVENT_REMAP
:
542 area_dst
= (char *)(unsigned long)msg
.arg
.remap
.to
;
546 return (void *)userfaults
;
549 pthread_mutex_t uffd_read_mutex
= PTHREAD_MUTEX_INITIALIZER
;
551 static void *uffd_read_thread(void *arg
)
553 unsigned long *this_cpu_userfaults
;
556 this_cpu_userfaults
= (unsigned long *) arg
;
557 *this_cpu_userfaults
= 0;
559 pthread_mutex_unlock(&uffd_read_mutex
);
560 /* from here cancellation is ok */
563 if (uffd_read_msg(uffd
, &msg
))
565 (*this_cpu_userfaults
) += uffd_handle_page_fault(&msg
);
570 static void *background_thread(void *arg
)
572 unsigned long cpu
= (unsigned long) arg
;
573 unsigned long page_nr
;
575 for (page_nr
= cpu
* nr_pages_per_cpu
;
576 page_nr
< (cpu
+1) * nr_pages_per_cpu
;
578 copy_page_retry(uffd
, page_nr
* page_size
);
583 static int stress(unsigned long *userfaults
)
586 pthread_t locking_threads
[nr_cpus
];
587 pthread_t uffd_threads
[nr_cpus
];
588 pthread_t background_threads
[nr_cpus
];
589 void **_userfaults
= (void **) userfaults
;
592 for (cpu
= 0; cpu
< nr_cpus
; cpu
++) {
593 if (pthread_create(&locking_threads
[cpu
], &attr
,
594 locking_thread
, (void *)cpu
))
596 if (bounces
& BOUNCE_POLL
) {
597 if (pthread_create(&uffd_threads
[cpu
], &attr
,
598 uffd_poll_thread
, (void *)cpu
))
601 if (pthread_create(&uffd_threads
[cpu
], &attr
,
605 pthread_mutex_lock(&uffd_read_mutex
);
607 if (pthread_create(&background_threads
[cpu
], &attr
,
608 background_thread
, (void *)cpu
))
611 for (cpu
= 0; cpu
< nr_cpus
; cpu
++)
612 if (pthread_join(background_threads
[cpu
], NULL
))
616 * Be strict and immediately zap area_src, the whole area has
617 * been transferred already by the background treads. The
618 * area_src could then be faulted in in a racy way by still
619 * running uffdio_threads reading zeropages after we zapped
620 * area_src (but they're guaranteed to get -EEXIST from
621 * UFFDIO_COPY without writing zero pages into area_dst
622 * because the background threads already completed).
624 if (uffd_test_ops
->release_pages(area_src
))
629 for (cpu
= 0; cpu
< nr_cpus
; cpu
++)
630 if (pthread_join(locking_threads
[cpu
], NULL
))
633 for (cpu
= 0; cpu
< nr_cpus
; cpu
++) {
635 if (bounces
& BOUNCE_POLL
) {
636 if (write(pipefd
[cpu
*2+1], &c
, 1) != 1) {
637 fprintf(stderr
, "pipefd write error\n");
640 if (pthread_join(uffd_threads
[cpu
], &_userfaults
[cpu
]))
643 if (pthread_cancel(uffd_threads
[cpu
]))
645 if (pthread_join(uffd_threads
[cpu
], NULL
))
653 static int userfaultfd_open(int features
)
655 struct uffdio_api uffdio_api
;
657 uffd
= syscall(__NR_userfaultfd
, O_CLOEXEC
| O_NONBLOCK
);
660 "userfaultfd syscall not available in this kernel\n");
663 uffd_flags
= fcntl(uffd
, F_GETFD
, NULL
);
665 uffdio_api
.api
= UFFD_API
;
666 uffdio_api
.features
= features
;
667 if (ioctl(uffd
, UFFDIO_API
, &uffdio_api
)) {
668 fprintf(stderr
, "UFFDIO_API\n");
671 if (uffdio_api
.api
!= UFFD_API
) {
672 fprintf(stderr
, "UFFDIO_API error %Lu\n", uffdio_api
.api
);
679 sigjmp_buf jbuf
, *sigbuf
;
681 static void sighndl(int sig
, siginfo_t
*siginfo
, void *ptr
)
685 siglongjmp(*sigbuf
, 1);
691 * For non-cooperative userfaultfd test we fork() a process that will
692 * generate pagefaults, will mremap the area monitored by the
693 * userfaultfd and at last this process will release the monitored
695 * For the anonymous and shared memory the area is divided into two
696 * parts, the first part is accessed before mremap, and the second
697 * part is accessed after mremap. Since hugetlbfs does not support
698 * mremap, the entire monitored area is accessed in a single pass for
700 * The release of the pages currently generates event for shmem and
701 * anonymous memory (UFFD_EVENT_REMOVE), hence it is not checked
703 * For signal test(UFFD_FEATURE_SIGBUS), signal_test = 1, we register
704 * monitored area, generate pagefaults and test that signal is delivered.
705 * Use UFFDIO_COPY to allocate missing page and retry. For signal_test = 2
706 * test robustness use case - we release monitored area, fork a process
707 * that will generate pagefaults and verify signal is generated.
708 * This also tests UFFD_FEATURE_EVENT_FORK event along with the signal
709 * feature. Using monitor thread, verify no userfault events are generated.
711 static int faulting_process(int signal_test
)
714 unsigned long long count
;
715 unsigned long split_nr_pages
;
716 unsigned long lastnr
;
717 struct sigaction act
;
718 unsigned long signalled
= 0;
720 if (test_type
!= TEST_HUGETLB
)
721 split_nr_pages
= (nr_pages
+ 1) / 2;
723 split_nr_pages
= nr_pages
;
727 memset(&act
, 0, sizeof(act
));
728 act
.sa_sigaction
= sighndl
;
729 act
.sa_flags
= SA_SIGINFO
;
730 if (sigaction(SIGBUS
, &act
, 0)) {
734 lastnr
= (unsigned long)-1;
737 for (nr
= 0; nr
< split_nr_pages
; nr
++) {
739 if (sigsetjmp(*sigbuf
, 1) != 0) {
741 fprintf(stderr
, "Signal repeated\n");
746 if (signal_test
== 1) {
747 if (copy_page(uffd
, nr
* page_size
))
756 count
= *area_count(area_dst
, nr
);
757 if (count
!= count_verify
[nr
]) {
759 "nr %lu memory corruption %Lu %Lu\n",
761 count_verify
[nr
]), exit(1);
766 return signalled
!= split_nr_pages
;
768 if (test_type
== TEST_HUGETLB
)
771 area_dst
= mremap(area_dst
, nr_pages
* page_size
, nr_pages
* page_size
,
772 MREMAP_MAYMOVE
| MREMAP_FIXED
, area_src
);
773 if (area_dst
== MAP_FAILED
)
774 perror("mremap"), exit(1);
776 for (; nr
< nr_pages
; nr
++) {
777 count
= *area_count(area_dst
, nr
);
778 if (count
!= count_verify
[nr
]) {
780 "nr %lu memory corruption %Lu %Lu\n",
782 count_verify
[nr
]), exit(1);
786 if (uffd_test_ops
->release_pages(area_dst
))
789 for (nr
= 0; nr
< nr_pages
; nr
++) {
790 if (my_bcmp(area_dst
+ nr
* page_size
, zeropage
, page_size
))
791 fprintf(stderr
, "nr %lu is not zero\n", nr
), exit(1);
797 static void retry_uffdio_zeropage(int ufd
,
798 struct uffdio_zeropage
*uffdio_zeropage
,
799 unsigned long offset
)
801 uffd_test_ops
->alias_mapping(&uffdio_zeropage
->range
.start
,
802 uffdio_zeropage
->range
.len
,
804 if (ioctl(ufd
, UFFDIO_ZEROPAGE
, uffdio_zeropage
)) {
805 if (uffdio_zeropage
->zeropage
!= -EEXIST
)
806 fprintf(stderr
, "UFFDIO_ZEROPAGE retry error %Ld\n",
807 uffdio_zeropage
->zeropage
), exit(1);
809 fprintf(stderr
, "UFFDIO_ZEROPAGE retry unexpected %Ld\n",
810 uffdio_zeropage
->zeropage
), exit(1);
814 static int __uffdio_zeropage(int ufd
, unsigned long offset
, bool retry
)
816 struct uffdio_zeropage uffdio_zeropage
;
818 unsigned long has_zeropage
;
820 has_zeropage
= uffd_test_ops
->expected_ioctls
& (1 << _UFFDIO_ZEROPAGE
);
822 if (offset
>= nr_pages
* page_size
)
823 fprintf(stderr
, "unexpected offset %lu\n",
825 uffdio_zeropage
.range
.start
= (unsigned long) area_dst
+ offset
;
826 uffdio_zeropage
.range
.len
= page_size
;
827 uffdio_zeropage
.mode
= 0;
828 ret
= ioctl(ufd
, UFFDIO_ZEROPAGE
, &uffdio_zeropage
);
830 /* real retval in ufdio_zeropage.zeropage */
832 if (uffdio_zeropage
.zeropage
== -EEXIST
)
833 fprintf(stderr
, "UFFDIO_ZEROPAGE -EEXIST\n"),
836 fprintf(stderr
, "UFFDIO_ZEROPAGE error %Ld\n",
837 uffdio_zeropage
.zeropage
), exit(1);
839 if (uffdio_zeropage
.zeropage
!= -EINVAL
)
841 "UFFDIO_ZEROPAGE not -EINVAL %Ld\n",
842 uffdio_zeropage
.zeropage
), exit(1);
844 } else if (has_zeropage
) {
845 if (uffdio_zeropage
.zeropage
!= page_size
) {
846 fprintf(stderr
, "UFFDIO_ZEROPAGE unexpected %Ld\n",
847 uffdio_zeropage
.zeropage
), exit(1);
849 if (test_uffdio_zeropage_eexist
&& retry
) {
850 test_uffdio_zeropage_eexist
= false;
851 retry_uffdio_zeropage(ufd
, &uffdio_zeropage
,
858 "UFFDIO_ZEROPAGE succeeded %Ld\n",
859 uffdio_zeropage
.zeropage
), exit(1);
865 static int uffdio_zeropage(int ufd
, unsigned long offset
)
867 return __uffdio_zeropage(ufd
, offset
, false);
870 /* exercise UFFDIO_ZEROPAGE */
871 static int userfaultfd_zeropage_test(void)
873 struct uffdio_register uffdio_register
;
874 unsigned long expected_ioctls
;
876 printf("testing UFFDIO_ZEROPAGE: ");
879 if (uffd_test_ops
->release_pages(area_dst
))
882 if (userfaultfd_open(0) < 0)
884 uffdio_register
.range
.start
= (unsigned long) area_dst
;
885 uffdio_register
.range
.len
= nr_pages
* page_size
;
886 uffdio_register
.mode
= UFFDIO_REGISTER_MODE_MISSING
;
887 if (ioctl(uffd
, UFFDIO_REGISTER
, &uffdio_register
))
888 fprintf(stderr
, "register failure\n"), exit(1);
890 expected_ioctls
= uffd_test_ops
->expected_ioctls
;
891 if ((uffdio_register
.ioctls
& expected_ioctls
) !=
894 "unexpected missing ioctl for anon memory\n"),
897 if (uffdio_zeropage(uffd
, 0)) {
898 if (my_bcmp(area_dst
, zeropage
, page_size
))
899 fprintf(stderr
, "zeropage is not zero\n"), exit(1);
907 static int userfaultfd_events_test(void)
909 struct uffdio_register uffdio_register
;
910 unsigned long expected_ioctls
;
911 unsigned long userfaults
;
917 printf("testing events (fork, remap, remove): ");
920 if (uffd_test_ops
->release_pages(area_dst
))
923 features
= UFFD_FEATURE_EVENT_FORK
| UFFD_FEATURE_EVENT_REMAP
|
924 UFFD_FEATURE_EVENT_REMOVE
;
925 if (userfaultfd_open(features
) < 0)
927 fcntl(uffd
, F_SETFL
, uffd_flags
| O_NONBLOCK
);
929 uffdio_register
.range
.start
= (unsigned long) area_dst
;
930 uffdio_register
.range
.len
= nr_pages
* page_size
;
931 uffdio_register
.mode
= UFFDIO_REGISTER_MODE_MISSING
;
932 if (ioctl(uffd
, UFFDIO_REGISTER
, &uffdio_register
))
933 fprintf(stderr
, "register failure\n"), exit(1);
935 expected_ioctls
= uffd_test_ops
->expected_ioctls
;
936 if ((uffdio_register
.ioctls
& expected_ioctls
) !=
939 "unexpected missing ioctl for anon memory\n"),
942 if (pthread_create(&uffd_mon
, &attr
, uffd_poll_thread
, NULL
))
943 perror("uffd_poll_thread create"), exit(1);
947 perror("fork"), exit(1);
950 return faulting_process(0);
952 waitpid(pid
, &err
, 0);
954 fprintf(stderr
, "faulting process failed\n"), exit(1);
956 if (write(pipefd
[1], &c
, sizeof(c
)) != sizeof(c
))
957 perror("pipe write"), exit(1);
958 if (pthread_join(uffd_mon
, (void **)&userfaults
))
962 printf("userfaults: %ld\n", userfaults
);
964 return userfaults
!= nr_pages
;
967 static int userfaultfd_sig_test(void)
969 struct uffdio_register uffdio_register
;
970 unsigned long expected_ioctls
;
971 unsigned long userfaults
;
977 printf("testing signal delivery: ");
980 if (uffd_test_ops
->release_pages(area_dst
))
983 features
= UFFD_FEATURE_EVENT_FORK
|UFFD_FEATURE_SIGBUS
;
984 if (userfaultfd_open(features
) < 0)
986 fcntl(uffd
, F_SETFL
, uffd_flags
| O_NONBLOCK
);
988 uffdio_register
.range
.start
= (unsigned long) area_dst
;
989 uffdio_register
.range
.len
= nr_pages
* page_size
;
990 uffdio_register
.mode
= UFFDIO_REGISTER_MODE_MISSING
;
991 if (ioctl(uffd
, UFFDIO_REGISTER
, &uffdio_register
))
992 fprintf(stderr
, "register failure\n"), exit(1);
994 expected_ioctls
= uffd_test_ops
->expected_ioctls
;
995 if ((uffdio_register
.ioctls
& expected_ioctls
) !=
998 "unexpected missing ioctl for anon memory\n"),
1001 if (faulting_process(1))
1002 fprintf(stderr
, "faulting process failed\n"), exit(1);
1004 if (uffd_test_ops
->release_pages(area_dst
))
1007 if (pthread_create(&uffd_mon
, &attr
, uffd_poll_thread
, NULL
))
1008 perror("uffd_poll_thread create"), exit(1);
1012 perror("fork"), exit(1);
1015 exit(faulting_process(2));
1017 waitpid(pid
, &err
, 0);
1019 fprintf(stderr
, "faulting process failed\n"), exit(1);
1021 if (write(pipefd
[1], &c
, sizeof(c
)) != sizeof(c
))
1022 perror("pipe write"), exit(1);
1023 if (pthread_join(uffd_mon
, (void **)&userfaults
))
1028 fprintf(stderr
, "Signal test failed, userfaults: %ld\n",
1031 return userfaults
!= 0;
1033 static int userfaultfd_stress(void)
1038 struct uffdio_register uffdio_register
;
1041 unsigned long userfaults
[nr_cpus
];
1043 uffd_test_ops
->allocate_area((void **)&area_src
);
1046 uffd_test_ops
->allocate_area((void **)&area_dst
);
1050 if (userfaultfd_open(0) < 0)
1053 count_verify
= malloc(nr_pages
* sizeof(unsigned long long));
1054 if (!count_verify
) {
1055 perror("count_verify");
1059 for (nr
= 0; nr
< nr_pages
; nr
++) {
1060 *area_mutex(area_src
, nr
) = (pthread_mutex_t
)
1061 PTHREAD_MUTEX_INITIALIZER
;
1062 count_verify
[nr
] = *area_count(area_src
, nr
) = 1;
1064 * In the transition between 255 to 256, powerpc will
1065 * read out of order in my_bcmp and see both bytes as
1066 * zero, so leave a placeholder below always non-zero
1067 * after the count, to avoid my_bcmp to trigger false
1070 *(area_count(area_src
, nr
) + 1) = 1;
1073 pipefd
= malloc(sizeof(int) * nr_cpus
* 2);
1078 for (cpu
= 0; cpu
< nr_cpus
; cpu
++) {
1079 if (pipe2(&pipefd
[cpu
*2], O_CLOEXEC
| O_NONBLOCK
)) {
1085 if (posix_memalign(&area
, page_size
, page_size
)) {
1086 fprintf(stderr
, "out of memory\n");
1090 bzero(zeropage
, page_size
);
1092 pthread_mutex_lock(&uffd_read_mutex
);
1094 pthread_attr_init(&attr
);
1095 pthread_attr_setstacksize(&attr
, 16*1024*1024);
1099 unsigned long expected_ioctls
;
1101 printf("bounces: %d, mode:", bounces
);
1102 if (bounces
& BOUNCE_RANDOM
)
1104 if (bounces
& BOUNCE_RACINGFAULTS
)
1106 if (bounces
& BOUNCE_VERIFY
)
1108 if (bounces
& BOUNCE_POLL
)
1113 if (bounces
& BOUNCE_POLL
)
1114 fcntl(uffd
, F_SETFL
, uffd_flags
| O_NONBLOCK
);
1116 fcntl(uffd
, F_SETFL
, uffd_flags
& ~O_NONBLOCK
);
1119 uffdio_register
.range
.start
= (unsigned long) area_dst
;
1120 uffdio_register
.range
.len
= nr_pages
* page_size
;
1121 uffdio_register
.mode
= UFFDIO_REGISTER_MODE_MISSING
;
1122 if (ioctl(uffd
, UFFDIO_REGISTER
, &uffdio_register
)) {
1123 fprintf(stderr
, "register failure\n");
1126 expected_ioctls
= uffd_test_ops
->expected_ioctls
;
1127 if ((uffdio_register
.ioctls
& expected_ioctls
) !=
1130 "unexpected missing ioctl for anon memory\n");
1134 if (area_dst_alias
) {
1135 uffdio_register
.range
.start
= (unsigned long)
1137 if (ioctl(uffd
, UFFDIO_REGISTER
, &uffdio_register
)) {
1138 fprintf(stderr
, "register failure alias\n");
1144 * The madvise done previously isn't enough: some
1145 * uffd_thread could have read userfaults (one of
1146 * those already resolved by the background thread)
1147 * and it may be in the process of calling
1148 * UFFDIO_COPY. UFFDIO_COPY will read the zapped
1149 * area_src and it would map a zero page in it (of
1150 * course such a UFFDIO_COPY is perfectly safe as it'd
1151 * return -EEXIST). The problem comes at the next
1152 * bounce though: that racing UFFDIO_COPY would
1153 * generate zeropages in the area_src, so invalidating
1154 * the previous MADV_DONTNEED. Without this additional
1155 * MADV_DONTNEED those zeropages leftovers in the
1156 * area_src would lead to -EEXIST failure during the
1157 * next bounce, effectively leaving a zeropage in the
1160 * Try to comment this out madvise to see the memory
1161 * corruption being caught pretty quick.
1163 * khugepaged is also inhibited to collapse THP after
1164 * MADV_DONTNEED only after the UFFDIO_REGISTER, so it's
1165 * required to MADV_DONTNEED here.
1167 if (uffd_test_ops
->release_pages(area_dst
))
1171 if (stress(userfaults
))
1175 if (ioctl(uffd
, UFFDIO_UNREGISTER
, &uffdio_register
.range
)) {
1176 fprintf(stderr
, "unregister failure\n");
1179 if (area_dst_alias
) {
1180 uffdio_register
.range
.start
= (unsigned long) area_dst
;
1181 if (ioctl(uffd
, UFFDIO_UNREGISTER
,
1182 &uffdio_register
.range
)) {
1183 fprintf(stderr
, "unregister failure alias\n");
1189 if (bounces
& BOUNCE_VERIFY
) {
1190 for (nr
= 0; nr
< nr_pages
; nr
++) {
1191 if (*area_count(area_dst
, nr
) != count_verify
[nr
]) {
1193 "error area_count %Lu %Lu %lu\n",
1194 *area_count(area_src
, nr
),
1203 /* prepare next bounce */
1204 tmp_area
= area_src
;
1205 area_src
= area_dst
;
1206 area_dst
= tmp_area
;
1208 tmp_area
= area_src_alias
;
1209 area_src_alias
= area_dst_alias
;
1210 area_dst_alias
= tmp_area
;
1212 printf("userfaults:");
1213 for (cpu
= 0; cpu
< nr_cpus
; cpu
++)
1214 printf(" %lu", userfaults
[cpu
]);
1222 return userfaultfd_zeropage_test() || userfaultfd_sig_test()
1223 || userfaultfd_events_test();
1227 * Copied from mlock2-tests.c
1229 unsigned long default_huge_page_size(void)
1231 unsigned long hps
= 0;
1234 FILE *f
= fopen("/proc/meminfo", "r");
1238 while (getline(&line
, &linelen
, f
) > 0) {
1239 if (sscanf(line
, "Hugepagesize: %lu kB", &hps
) == 1) {
1250 static void set_test_type(const char *type
)
1252 if (!strcmp(type
, "anon")) {
1253 test_type
= TEST_ANON
;
1254 uffd_test_ops
= &anon_uffd_test_ops
;
1255 } else if (!strcmp(type
, "hugetlb")) {
1256 test_type
= TEST_HUGETLB
;
1257 uffd_test_ops
= &hugetlb_uffd_test_ops
;
1258 } else if (!strcmp(type
, "hugetlb_shared")) {
1260 test_type
= TEST_HUGETLB
;
1261 uffd_test_ops
= &hugetlb_uffd_test_ops
;
1262 } else if (!strcmp(type
, "shmem")) {
1264 test_type
= TEST_SHMEM
;
1265 uffd_test_ops
= &shmem_uffd_test_ops
;
1267 fprintf(stderr
, "Unknown test type: %s\n", type
), exit(1);
1270 if (test_type
== TEST_HUGETLB
)
1271 page_size
= default_huge_page_size();
1273 page_size
= sysconf(_SC_PAGE_SIZE
);
1276 fprintf(stderr
, "Unable to determine page size\n"),
1278 if ((unsigned long) area_count(NULL
, 0) + sizeof(unsigned long long) * 2
1280 fprintf(stderr
, "Impossible to run this test\n"), exit(2);
1283 static void sigalrm(int sig
)
1287 test_uffdio_copy_eexist
= true;
1288 test_uffdio_zeropage_eexist
= true;
1289 alarm(ALARM_INTERVAL_SECS
);
1292 int main(int argc
, char **argv
)
1297 if (signal(SIGALRM
, sigalrm
) == SIG_ERR
)
1298 fprintf(stderr
, "failed to arm SIGALRM"), exit(1);
1299 alarm(ALARM_INTERVAL_SECS
);
1301 set_test_type(argv
[1]);
1303 nr_cpus
= sysconf(_SC_NPROCESSORS_ONLN
);
1304 nr_pages_per_cpu
= atol(argv
[2]) * 1024*1024 / page_size
/
1306 if (!nr_pages_per_cpu
) {
1307 fprintf(stderr
, "invalid MiB\n");
1311 bounces
= atoi(argv
[3]);
1313 fprintf(stderr
, "invalid bounces\n");
1316 nr_pages
= nr_pages_per_cpu
* nr_cpus
;
1318 if (test_type
== TEST_HUGETLB
) {
1321 huge_fd
= open(argv
[4], O_CREAT
| O_RDWR
, 0755);
1323 fprintf(stderr
, "Open of %s failed", argv
[3]);
1327 if (ftruncate(huge_fd
, 0)) {
1328 fprintf(stderr
, "ftruncate %s to size 0 failed", argv
[3]);
1329 perror("ftruncate");
1333 printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n",
1334 nr_pages
, nr_pages_per_cpu
);
1335 return userfaultfd_stress();
1338 #else /* __NR_userfaultfd */
1340 #warning "missing __NR_userfaultfd definition"
1344 printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n");
1348 #endif /* __NR_userfaultfd */