1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright 2020, Gustavo Luiz Duarte, IBM Corp.
5 * This test starts a transaction and triggers a signal, forcing a pagefault to
6 * happen when the kernel signal handling code touches the user signal stack.
8 * In order to avoid pre-faulting the signal stack memory and to force the
9 * pagefault to happen precisely in the kernel signal handling code, the
10 * pagefault handling is done in userspace using the userfaultfd facility.
12 * Further pagefaults are triggered by crafting the signal handler's ucontext
13 * to point to additional memory regions managed by the userfaultfd, so using
14 * the same mechanism used to avoid pre-faulting the signal stack memory.
16 * On failure (bug is present) kernel crashes or never returns control back to
17 * userspace. If bug is not present, tests completes almost immediately.
23 #include <linux/userfaultfd.h>
26 #include <sys/ioctl.h>
27 #include <sys/syscall.h>
37 #define UF_MEM_SIZE 655360 /* 10 x 64k pages */
39 /* Memory handled by userfaultfd */
41 static size_t uf_mem_offset
= 0;
44 * Data that will be copied into the faulting pages (instead of zero-filled
45 * pages). This is used to make the test more reliable and avoid segfaulting
46 * when we return from the signal handler. Since we are making the signal
47 * handler's ucontext point to newly allocated memory, when that memory is
48 * paged-in it will contain the expected content.
50 static char backing_mem
[UF_MEM_SIZE
];
52 static size_t pagesize
;
55 * Return a chunk of at least 'size' bytes of memory that will be handled by
56 * userfaultfd. If 'backing_data' is not NULL, its content will be save to
57 * 'backing_mem' and then copied into the faulting pages when the page fault
60 void *get_uf_mem(size_t size
, void *backing_data
)
64 if (uf_mem_offset
+ size
> UF_MEM_SIZE
) {
65 fprintf(stderr
, "Requesting more uf_mem than expected!\n");
69 ret
= &uf_mem
[uf_mem_offset
];
71 /* Save the data that will be copied into the faulting page */
72 if (backing_data
!= NULL
)
73 memcpy(&backing_mem
[uf_mem_offset
], backing_data
, size
);
75 /* Reserve the requested amount of uf_mem */
76 uf_mem_offset
+= size
;
77 /* Keep uf_mem_offset aligned to the page size (round up) */
78 uf_mem_offset
= (uf_mem_offset
+ pagesize
- 1) & ~(pagesize
- 1);
83 void *fault_handler_thread(void *arg
)
85 struct uffd_msg msg
; /* Data read from userfaultfd */
86 long uffd
; /* userfaultfd file descriptor */
87 struct uffdio_copy uffdio_copy
;
89 ssize_t nread
, offset
;
95 pollfd
.events
= POLLIN
;
96 if (poll(&pollfd
, 1, -1) == -1) {
97 perror("poll() failed");
101 nread
= read(uffd
, &msg
, sizeof(msg
));
103 fprintf(stderr
, "read(): EOF on userfaultfd\n");
108 perror("read() failed");
112 /* We expect only one kind of event */
113 if (msg
.event
!= UFFD_EVENT_PAGEFAULT
) {
114 fprintf(stderr
, "Unexpected event on userfaultfd\n");
119 * We need to handle page faults in units of pages(!).
120 * So, round faulting address down to page boundary.
122 uffdio_copy
.dst
= msg
.arg
.pagefault
.address
& ~(pagesize
-1);
124 offset
= (char *) uffdio_copy
.dst
- uf_mem
;
125 uffdio_copy
.src
= (unsigned long) &backing_mem
[offset
];
127 uffdio_copy
.len
= pagesize
;
128 uffdio_copy
.mode
= 0;
129 uffdio_copy
.copy
= 0;
130 if (ioctl(uffd
, UFFDIO_COPY
, &uffdio_copy
) == -1) {
131 perror("ioctl-UFFDIO_COPY failed");
137 void setup_uf_mem(void)
139 long uffd
; /* userfaultfd file descriptor */
141 struct uffdio_api uffdio_api
;
142 struct uffdio_register uffdio_register
;
145 pagesize
= sysconf(_SC_PAGE_SIZE
);
147 /* Create and enable userfaultfd object */
148 uffd
= syscall(__NR_userfaultfd
, O_CLOEXEC
| O_NONBLOCK
);
150 perror("userfaultfd() failed");
153 uffdio_api
.api
= UFFD_API
;
154 uffdio_api
.features
= 0;
155 if (ioctl(uffd
, UFFDIO_API
, &uffdio_api
) == -1) {
156 perror("ioctl-UFFDIO_API failed");
161 * Create a private anonymous mapping. The memory will be demand-zero
162 * paged, that is, not yet allocated. When we actually touch the memory
163 * the related page will be allocated via the userfaultfd mechanism.
165 uf_mem
= mmap(NULL
, UF_MEM_SIZE
, PROT_READ
| PROT_WRITE
,
166 MAP_PRIVATE
| MAP_ANONYMOUS
, -1, 0);
167 if (uf_mem
== MAP_FAILED
) {
168 perror("mmap() failed");
173 * Register the memory range of the mapping we've just mapped to be
174 * handled by the userfaultfd object. In 'mode' we request to track
175 * missing pages (i.e. pages that have not yet been faulted-in).
177 uffdio_register
.range
.start
= (unsigned long) uf_mem
;
178 uffdio_register
.range
.len
= UF_MEM_SIZE
;
179 uffdio_register
.mode
= UFFDIO_REGISTER_MODE_MISSING
;
180 if (ioctl(uffd
, UFFDIO_REGISTER
, &uffdio_register
) == -1) {
181 perror("ioctl-UFFDIO_REGISTER");
185 /* Create a thread that will process the userfaultfd events */
186 ret
= pthread_create(&thr
, NULL
, fault_handler_thread
, (void *) uffd
);
188 fprintf(stderr
, "pthread_create(): Error. Returned %d\n", ret
);
194 * Assumption: the signal was delivered while userspace was in transactional or
195 * suspended state, i.e. uc->uc_link != NULL.
197 void signal_handler(int signo
, siginfo_t
*si
, void *uc
)
199 ucontext_t
*ucp
= uc
;
201 /* Skip 'trap' after returning, otherwise we get a SIGTRAP again */
202 ucp
->uc_link
->uc_mcontext
.regs
->nip
+= 4;
204 ucp
->uc_mcontext
.v_regs
=
205 get_uf_mem(sizeof(elf_vrreg_t
), ucp
->uc_mcontext
.v_regs
);
207 ucp
->uc_link
->uc_mcontext
.v_regs
=
208 get_uf_mem(sizeof(elf_vrreg_t
), ucp
->uc_link
->uc_mcontext
.v_regs
);
210 ucp
->uc_link
= get_uf_mem(sizeof(ucontext_t
), ucp
->uc_link
);
213 bool have_userfaultfd(void)
218 rc
= syscall(__NR_userfaultfd
, -1);
220 return rc
== 0 || errno
!= ENOSYS
;
223 int tm_signal_pagefault(void)
228 SKIP_IF(!have_htm());
229 SKIP_IF(!have_userfaultfd());
234 * Set an alternative stack that will generate a page fault when the
235 * signal is raised. The page fault will be treated via userfaultfd,
236 * i.e. via fault_handler_thread.
238 ss
.ss_sp
= get_uf_mem(SIGSTKSZ
, NULL
);
239 ss
.ss_size
= SIGSTKSZ
;
241 if (sigaltstack(&ss
, NULL
) == -1) {
242 perror("sigaltstack() failed");
246 sa
.sa_flags
= SA_SIGINFO
| SA_ONSTACK
;
247 sa
.sa_sigaction
= signal_handler
;
248 if (sigaction(SIGTRAP
, &sa
, NULL
) == -1) {
249 perror("sigaction() failed");
253 /* Trigger a SIGTRAP in transactional state */
261 /* Trigger a SIGTRAP in suspended state */
274 int main(int argc
, char **argv
)
277 * Depending on kernel config, the TM Bad Thing might not result in a
278 * crash, instead the kernel never returns control back to userspace, so
279 * set a tight timeout. If the test passes it completes almost
282 test_harness_set_timeout(2);
283 return test_harness(tm_signal_pagefault
, "tm_signal_pagefault");