1 // SPDX-License-Identifier: GPL-2.0-or-later
3 #include <linux/syscalls.h>
4 #include <linux/time_namespace.h>
9 * Support for robust futexes: the kernel cleans up held futexes at
12 * Implementation: user-space maintains a per-thread list of locks it
13 * is holding. Upon do_exit(), the kernel carefully walks this list,
14 * and marks all locks that are owned by this thread with the
15 * FUTEX_OWNER_DIED bit, and wakes up a waiter (if any). The list is
16 * always manipulated with the lock held, so the list is private and
17 * per-thread. Userspace also maintains a per-thread 'list_op_pending'
18 * field, to allow the kernel to clean up if the thread dies after
19 * acquiring the lock, but just before it could have added itself to
20 * the list. There can only be one such pending lock.
24 * sys_set_robust_list() - Set the robust-futex list head of a task
25 * @head: pointer to the list-head
26 * @len: length of the list-head, as userspace expects
28 SYSCALL_DEFINE2(set_robust_list
, struct robust_list_head __user
*, head
,
32 * The kernel knows only one size for now:
34 if (unlikely(len
!= sizeof(*head
)))
37 current
->robust_list
= head
;
43 * sys_get_robust_list() - Get the robust-futex list head of a task
44 * @pid: pid of the process [zero for current task]
45 * @head_ptr: pointer to a list-head pointer, the kernel fills it in
46 * @len_ptr: pointer to a length field, the kernel fills in the header size
48 SYSCALL_DEFINE3(get_robust_list
, int, pid
,
49 struct robust_list_head __user
* __user
*, head_ptr
,
50 size_t __user
*, len_ptr
)
52 struct robust_list_head __user
*head
;
54 struct task_struct
*p
;
62 p
= find_task_by_vpid(pid
);
68 if (!ptrace_may_access(p
, PTRACE_MODE_READ_REALCREDS
))
71 head
= p
->robust_list
;
74 if (put_user(sizeof(*head
), len_ptr
))
76 return put_user(head
, head_ptr
);
84 long do_futex(u32 __user
*uaddr
, int op
, u32 val
, ktime_t
*timeout
,
85 u32 __user
*uaddr2
, u32 val2
, u32 val3
)
87 unsigned int flags
= futex_to_flags(op
);
88 int cmd
= op
& FUTEX_CMD_MASK
;
90 if (flags
& FLAGS_CLOCKRT
) {
91 if (cmd
!= FUTEX_WAIT_BITSET
&&
92 cmd
!= FUTEX_WAIT_REQUEUE_PI
&&
93 cmd
!= FUTEX_LOCK_PI2
)
99 val3
= FUTEX_BITSET_MATCH_ANY
;
101 case FUTEX_WAIT_BITSET
:
102 return futex_wait(uaddr
, flags
, val
, timeout
, val3
);
104 val3
= FUTEX_BITSET_MATCH_ANY
;
106 case FUTEX_WAKE_BITSET
:
107 return futex_wake(uaddr
, flags
, val
, val3
);
109 return futex_requeue(uaddr
, flags
, uaddr2
, flags
, val
, val2
, NULL
, 0);
110 case FUTEX_CMP_REQUEUE
:
111 return futex_requeue(uaddr
, flags
, uaddr2
, flags
, val
, val2
, &val3
, 0);
113 return futex_wake_op(uaddr
, flags
, uaddr2
, val
, val2
, val3
);
115 flags
|= FLAGS_CLOCKRT
;
118 return futex_lock_pi(uaddr
, flags
, timeout
, 0);
119 case FUTEX_UNLOCK_PI
:
120 return futex_unlock_pi(uaddr
, flags
);
121 case FUTEX_TRYLOCK_PI
:
122 return futex_lock_pi(uaddr
, flags
, NULL
, 1);
123 case FUTEX_WAIT_REQUEUE_PI
:
124 val3
= FUTEX_BITSET_MATCH_ANY
;
125 return futex_wait_requeue_pi(uaddr
, flags
, val
, timeout
, val3
,
127 case FUTEX_CMP_REQUEUE_PI
:
128 return futex_requeue(uaddr
, flags
, uaddr2
, flags
, val
, val2
, &val3
, 1);
133 static __always_inline
bool futex_cmd_has_timeout(u32 cmd
)
139 case FUTEX_WAIT_BITSET
:
140 case FUTEX_WAIT_REQUEUE_PI
:
146 static __always_inline
int
147 futex_init_timeout(u32 cmd
, u32 op
, struct timespec64
*ts
, ktime_t
*t
)
149 if (!timespec64_valid(ts
))
152 *t
= timespec64_to_ktime(*ts
);
153 if (cmd
== FUTEX_WAIT
)
154 *t
= ktime_add_safe(ktime_get(), *t
);
155 else if (cmd
!= FUTEX_LOCK_PI
&& !(op
& FUTEX_CLOCK_REALTIME
))
156 *t
= timens_ktime_to_host(CLOCK_MONOTONIC
, *t
);
160 SYSCALL_DEFINE6(futex
, u32 __user
*, uaddr
, int, op
, u32
, val
,
161 const struct __kernel_timespec __user
*, utime
,
162 u32 __user
*, uaddr2
, u32
, val3
)
164 int ret
, cmd
= op
& FUTEX_CMD_MASK
;
165 ktime_t t
, *tp
= NULL
;
166 struct timespec64 ts
;
168 if (utime
&& futex_cmd_has_timeout(cmd
)) {
169 if (unlikely(should_fail_futex(!(op
& FUTEX_PRIVATE_FLAG
))))
171 if (get_timespec64(&ts
, utime
))
173 ret
= futex_init_timeout(cmd
, op
, &ts
, &t
);
179 return do_futex(uaddr
, op
, val
, tp
, uaddr2
, (unsigned long)utime
, val3
);
183 * futex_parse_waitv - Parse a waitv array from userspace
184 * @futexv: Kernel side list of waiters to be filled
185 * @uwaitv: Userspace list to be parsed
186 * @nr_futexes: Length of futexv
187 * @wake: Wake to call when futex is woken
188 * @wake_data: Data for the wake handler
190 * Return: Error code on failure, 0 on success
192 int futex_parse_waitv(struct futex_vector
*futexv
,
193 struct futex_waitv __user
*uwaitv
,
194 unsigned int nr_futexes
, futex_wake_fn
*wake
,
197 struct futex_waitv aux
;
200 for (i
= 0; i
< nr_futexes
; i
++) {
203 if (copy_from_user(&aux
, &uwaitv
[i
], sizeof(aux
)))
206 if ((aux
.flags
& ~FUTEX2_VALID_MASK
) || aux
.__reserved
)
209 flags
= futex2_to_flags(aux
.flags
);
210 if (!futex_flags_valid(flags
))
213 if (!futex_validate_input(flags
, aux
.val
))
216 futexv
[i
].w
.flags
= flags
;
217 futexv
[i
].w
.val
= aux
.val
;
218 futexv
[i
].w
.uaddr
= aux
.uaddr
;
219 futexv
[i
].q
= futex_q_init
;
220 futexv
[i
].q
.wake
= wake
;
221 futexv
[i
].q
.wake_data
= wake_data
;
227 static int futex2_setup_timeout(struct __kernel_timespec __user
*timeout
,
228 clockid_t clockid
, struct hrtimer_sleeper
*to
)
230 int flag_clkid
= 0, flag_init
= 0;
231 struct timespec64 ts
;
238 if (clockid
== CLOCK_REALTIME
) {
239 flag_clkid
= FLAGS_CLOCKRT
;
240 flag_init
= FUTEX_CLOCK_REALTIME
;
243 if (clockid
!= CLOCK_REALTIME
&& clockid
!= CLOCK_MONOTONIC
)
246 if (get_timespec64(&ts
, timeout
))
250 * Since there's no opcode for futex_waitv, use
251 * FUTEX_WAIT_BITSET that uses absolute timeout as well
253 ret
= futex_init_timeout(FUTEX_WAIT_BITSET
, flag_init
, &ts
, &time
);
257 futex_setup_timer(&time
, to
, flag_clkid
, 0);
261 static inline void futex2_destroy_timeout(struct hrtimer_sleeper
*to
)
263 hrtimer_cancel(&to
->timer
);
264 destroy_hrtimer_on_stack(&to
->timer
);
268 * sys_futex_waitv - Wait on a list of futexes
269 * @waiters: List of futexes to wait on
270 * @nr_futexes: Length of futexv
271 * @flags: Flag for timeout (monotonic/realtime)
272 * @timeout: Optional absolute timeout.
273 * @clockid: Clock to be used for the timeout, realtime or monotonic.
275 * Given an array of `struct futex_waitv`, wait on each uaddr. The thread wakes
276 * if a futex_wake() is performed at any uaddr. The syscall returns immediately
277 * if any waiter has *uaddr != val. *timeout is an optional timeout value for
278 * the operation. Each waiter has individual flags. The `flags` argument for
279 * the syscall should be used solely for specifying the timeout as realtime, if
280 * needed. Flags for private futexes, sizes, etc. should be used on the
281 * individual flags of each waiter.
283 * Returns the array index of one of the woken futexes. No further information
284 * is provided: any number of other futexes may also have been woken by the
285 * same event, and if more than one futex was woken, the retrned index may
286 * refer to any one of them. (It is not necessaryily the futex with the
287 * smallest index, nor the one most recently woken, nor...)
290 SYSCALL_DEFINE5(futex_waitv
, struct futex_waitv __user
*, waiters
,
291 unsigned int, nr_futexes
, unsigned int, flags
,
292 struct __kernel_timespec __user
*, timeout
, clockid_t
, clockid
)
294 struct hrtimer_sleeper to
;
295 struct futex_vector
*futexv
;
298 /* This syscall supports no flags for now */
302 if (!nr_futexes
|| nr_futexes
> FUTEX_WAITV_MAX
|| !waiters
)
305 if (timeout
&& (ret
= futex2_setup_timeout(timeout
, clockid
, &to
)))
308 futexv
= kcalloc(nr_futexes
, sizeof(*futexv
), GFP_KERNEL
);
314 ret
= futex_parse_waitv(futexv
, waiters
, nr_futexes
, futex_wake_mark
,
317 ret
= futex_wait_multiple(futexv
, nr_futexes
, timeout
? &to
: NULL
);
323 futex2_destroy_timeout(&to
);
328 * sys_futex_wake - Wake a number of futexes
329 * @uaddr: Address of the futex(es) to wake
331 * @nr: Number of the futexes to wake
332 * @flags: FUTEX2 flags
334 * Identical to the traditional FUTEX_WAKE_BITSET op, except it is part of the
335 * futex2 family of calls.
338 SYSCALL_DEFINE4(futex_wake
,
339 void __user
*, uaddr
,
344 if (flags
& ~FUTEX2_VALID_MASK
)
347 flags
= futex2_to_flags(flags
);
348 if (!futex_flags_valid(flags
))
351 if (!futex_validate_input(flags
, mask
))
354 return futex_wake(uaddr
, FLAGS_STRICT
| flags
, nr
, mask
);
358 * sys_futex_wait - Wait on a futex
359 * @uaddr: Address of the futex to wait on
360 * @val: Value of @uaddr
362 * @flags: FUTEX2 flags
363 * @timeout: Optional absolute timeout
364 * @clockid: Clock to be used for the timeout, realtime or monotonic
366 * Identical to the traditional FUTEX_WAIT_BITSET op, except it is part of the
367 * futex2 familiy of calls.
370 SYSCALL_DEFINE6(futex_wait
,
371 void __user
*, uaddr
,
375 struct __kernel_timespec __user
*, timeout
,
378 struct hrtimer_sleeper to
;
381 if (flags
& ~FUTEX2_VALID_MASK
)
384 flags
= futex2_to_flags(flags
);
385 if (!futex_flags_valid(flags
))
388 if (!futex_validate_input(flags
, val
) ||
389 !futex_validate_input(flags
, mask
))
392 if (timeout
&& (ret
= futex2_setup_timeout(timeout
, clockid
, &to
)))
395 ret
= __futex_wait(uaddr
, flags
, val
, timeout
? &to
: NULL
, mask
);
398 futex2_destroy_timeout(&to
);
404 * sys_futex_requeue - Requeue a waiter from one futex to another
405 * @waiters: array describing the source and destination futex
407 * @nr_wake: number of futexes to wake
408 * @nr_requeue: number of futexes to requeue
410 * Identical to the traditional FUTEX_CMP_REQUEUE op, except it is part of the
411 * futex2 family of calls.
414 SYSCALL_DEFINE4(futex_requeue
,
415 struct futex_waitv __user
*, waiters
,
420 struct futex_vector futexes
[2];
430 ret
= futex_parse_waitv(futexes
, waiters
, 2, futex_wake_mark
, NULL
);
434 cmpval
= futexes
[0].w
.val
;
436 return futex_requeue(u64_to_user_ptr(futexes
[0].w
.uaddr
), futexes
[0].w
.flags
,
437 u64_to_user_ptr(futexes
[1].w
.uaddr
), futexes
[1].w
.flags
,
438 nr_wake
, nr_requeue
, &cmpval
, 0);
442 COMPAT_SYSCALL_DEFINE2(set_robust_list
,
443 struct compat_robust_list_head __user
*, head
,
446 if (unlikely(len
!= sizeof(*head
)))
449 current
->compat_robust_list
= head
;
454 COMPAT_SYSCALL_DEFINE3(get_robust_list
, int, pid
,
455 compat_uptr_t __user
*, head_ptr
,
456 compat_size_t __user
*, len_ptr
)
458 struct compat_robust_list_head __user
*head
;
460 struct task_struct
*p
;
468 p
= find_task_by_vpid(pid
);
474 if (!ptrace_may_access(p
, PTRACE_MODE_READ_REALCREDS
))
477 head
= p
->compat_robust_list
;
480 if (put_user(sizeof(*head
), len_ptr
))
482 return put_user(ptr_to_compat(head
), head_ptr
);
489 #endif /* CONFIG_COMPAT */
491 #ifdef CONFIG_COMPAT_32BIT_TIME
492 SYSCALL_DEFINE6(futex_time32
, u32 __user
*, uaddr
, int, op
, u32
, val
,
493 const struct old_timespec32 __user
*, utime
, u32 __user
*, uaddr2
,
496 int ret
, cmd
= op
& FUTEX_CMD_MASK
;
497 ktime_t t
, *tp
= NULL
;
498 struct timespec64 ts
;
500 if (utime
&& futex_cmd_has_timeout(cmd
)) {
501 if (get_old_timespec32(&ts
, utime
))
503 ret
= futex_init_timeout(cmd
, op
, &ts
, &t
);
509 return do_futex(uaddr
, op
, val
, tp
, uaddr2
, (unsigned long)utime
, val3
);
511 #endif /* CONFIG_COMPAT_32BIT_TIME */