1 // SPDX-License-Identifier: GPL-2.0
3 * shstk.c - Intel shadow stack support
5 * Copyright (c) 2021, Intel Corporation.
6 * Yu-cheng Yu <yu-cheng.yu@intel.com>
9 #include <linux/sched.h>
10 #include <linux/bitops.h>
11 #include <linux/types.h>
13 #include <linux/mman.h>
14 #include <linux/slab.h>
15 #include <linux/uaccess.h>
16 #include <linux/sched/signal.h>
17 #include <linux/compat.h>
18 #include <linux/sizes.h>
19 #include <linux/user.h>
20 #include <linux/syscalls.h>
22 #include <asm/fpu/xstate.h>
23 #include <asm/fpu/types.h>
24 #include <asm/shstk.h>
25 #include <asm/special_insns.h>
26 #include <asm/fpu/api.h>
27 #include <asm/prctl.h>
29 #define SS_FRAME_SIZE 8
31 static bool features_enabled(unsigned long features
)
33 return current
->thread
.features
& features
;
36 static void features_set(unsigned long features
)
38 current
->thread
.features
|= features
;
41 static void features_clr(unsigned long features
)
43 current
->thread
.features
&= ~features
;
47 * Create a restore token on the shadow stack. A token is always 8-byte
50 static int create_rstor_token(unsigned long ssp
, unsigned long *token_addr
)
54 /* Token must be aligned */
55 if (!IS_ALIGNED(ssp
, 8))
58 addr
= ssp
- SS_FRAME_SIZE
;
61 * SSP is aligned, so reserved bits and mode bit are a zero, just mark
66 if (write_user_shstk_64((u64 __user
*)addr
, (u64
)ssp
))
76 * VM_SHADOW_STACK will have a guard page. This helps userspace protect
77 * itself from attacks. The reasoning is as follows:
79 * The shadow stack pointer(SSP) is moved by CALL, RET, and INCSSPQ. The
80 * INCSSP instruction can increment the shadow stack pointer. It is the
81 * shadow stack analog of an instruction like:
85 * However, there is one important difference between an ADD on %rsp
86 * and INCSSP. In addition to modifying SSP, INCSSP also reads from the
87 * memory of the first and last elements that were "popped". It can be
88 * thought of as acting like this:
90 * READ_ONCE(ssp); // read+discard top element on stack
91 * ssp += nr_to_pop * 8; // move the shadow stack
92 * READ_ONCE(ssp-8); // read+discard last popped stack element
94 * The maximum distance INCSSP can move the SSP is 2040 bytes, before
95 * it would read the memory. Therefore a single page gap will be enough
96 * to prevent any operation from shifting the SSP to an adjacent stack,
97 * since it would have to land in the gap at least once, causing a
100 static unsigned long alloc_shstk(unsigned long addr
, unsigned long size
,
101 unsigned long token_offset
, bool set_res_tok
)
103 int flags
= MAP_ANONYMOUS
| MAP_PRIVATE
| MAP_ABOVE4G
;
104 struct mm_struct
*mm
= current
->mm
;
105 unsigned long mapped_addr
, unused
;
108 flags
|= MAP_FIXED_NOREPLACE
;
111 mapped_addr
= do_mmap(NULL
, addr
, size
, PROT_READ
, flags
,
112 VM_SHADOW_STACK
| VM_WRITE
, 0, &unused
, NULL
);
113 mmap_write_unlock(mm
);
115 if (!set_res_tok
|| IS_ERR_VALUE(mapped_addr
))
118 if (create_rstor_token(mapped_addr
+ token_offset
, NULL
)) {
119 vm_munmap(mapped_addr
, size
);
127 static unsigned long adjust_shstk_size(unsigned long size
)
130 return PAGE_ALIGN(size
);
132 return PAGE_ALIGN(min_t(unsigned long long, rlimit(RLIMIT_STACK
), SZ_4G
));
135 static void unmap_shadow_stack(u64 base
, u64 size
)
139 r
= vm_munmap(base
, size
);
142 * mmap_write_lock_killable() failed with -EINTR. This means
143 * the process is about to die and have it's MM cleaned up.
144 * This task shouldn't ever make it back to userspace. In this
145 * case it is ok to leak a shadow stack, so just exit out.
151 * For all other types of vm_munmap() failure, either the
152 * system is out of memory or there is bug.
157 static int shstk_setup(void)
159 struct thread_shstk
*shstk
= ¤t
->thread
.shstk
;
160 unsigned long addr
, size
;
162 /* Already enabled */
163 if (features_enabled(ARCH_SHSTK_SHSTK
))
166 /* Also not supported for 32 bit */
167 if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK
) || in_ia32_syscall())
170 size
= adjust_shstk_size(0);
171 addr
= alloc_shstk(0, size
, 0, false);
172 if (IS_ERR_VALUE(addr
))
173 return PTR_ERR((void *)addr
);
175 fpregs_lock_and_load();
176 wrmsrl(MSR_IA32_PL3_SSP
, addr
+ size
);
177 wrmsrl(MSR_IA32_U_CET
, CET_SHSTK_EN
);
182 features_set(ARCH_SHSTK_SHSTK
);
187 void reset_thread_features(void)
189 memset(¤t
->thread
.shstk
, 0, sizeof(struct thread_shstk
));
190 current
->thread
.features
= 0;
191 current
->thread
.features_locked
= 0;
194 unsigned long shstk_alloc_thread_stack(struct task_struct
*tsk
, unsigned long clone_flags
,
195 unsigned long stack_size
)
197 struct thread_shstk
*shstk
= &tsk
->thread
.shstk
;
198 unsigned long addr
, size
;
201 * If shadow stack is not enabled on the new thread, skip any
202 * switch to a new shadow stack.
204 if (!features_enabled(ARCH_SHSTK_SHSTK
))
208 * For CLONE_VFORK the child will share the parents shadow stack.
209 * Make sure to clear the internal tracking of the thread shadow
210 * stack so the freeing logic run for child knows to leave it alone.
212 if (clone_flags
& CLONE_VFORK
) {
219 * For !CLONE_VM the child will use a copy of the parents shadow
222 if (!(clone_flags
& CLONE_VM
))
225 size
= adjust_shstk_size(stack_size
);
226 addr
= alloc_shstk(0, size
, 0, false);
227 if (IS_ERR_VALUE(addr
))
236 static unsigned long get_user_shstk_addr(void)
238 unsigned long long ssp
;
240 fpregs_lock_and_load();
242 rdmsrl(MSR_IA32_PL3_SSP
, ssp
);
249 #define SHSTK_DATA_BIT BIT(63)
251 static int put_shstk_data(u64 __user
*addr
, u64 data
)
253 if (WARN_ON_ONCE(data
& SHSTK_DATA_BIT
))
257 * Mark the high bit so that the sigframe can't be processed as a
260 if (write_user_shstk_64(addr
, data
| SHSTK_DATA_BIT
))
265 static int get_shstk_data(unsigned long *data
, unsigned long __user
*addr
)
269 if (unlikely(get_user(ldata
, addr
)))
272 if (!(ldata
& SHSTK_DATA_BIT
))
275 *data
= ldata
& ~SHSTK_DATA_BIT
;
280 static int shstk_push_sigframe(unsigned long *ssp
)
282 unsigned long target_ssp
= *ssp
;
284 /* Token must be aligned */
285 if (!IS_ALIGNED(target_ssp
, 8))
288 *ssp
-= SS_FRAME_SIZE
;
289 if (put_shstk_data((void __user
*)*ssp
, target_ssp
))
295 static int shstk_pop_sigframe(unsigned long *ssp
)
297 struct vm_area_struct
*vma
;
298 unsigned long token_addr
;
299 bool need_to_check_vma
;
303 * It is possible for the SSP to be off the end of a shadow stack by 4
304 * or 8 bytes. If the shadow stack is at the start of a page or 4 bytes
305 * before it, it might be this case, so check that the address being
306 * read is actually shadow stack.
308 if (!IS_ALIGNED(*ssp
, 8))
311 need_to_check_vma
= PAGE_ALIGN(*ssp
) == *ssp
;
313 if (need_to_check_vma
)
314 mmap_read_lock_killable(current
->mm
);
316 err
= get_shstk_data(&token_addr
, (unsigned long __user
*)*ssp
);
320 if (need_to_check_vma
) {
321 vma
= find_vma(current
->mm
, *ssp
);
322 if (!vma
|| !(vma
->vm_flags
& VM_SHADOW_STACK
)) {
327 mmap_read_unlock(current
->mm
);
330 /* Restore SSP aligned? */
331 if (unlikely(!IS_ALIGNED(token_addr
, 8)))
334 /* SSP in userspace? */
335 if (unlikely(token_addr
>= TASK_SIZE_MAX
))
342 if (need_to_check_vma
)
343 mmap_read_unlock(current
->mm
);
347 int setup_signal_shadow_stack(struct ksignal
*ksig
)
349 void __user
*restorer
= ksig
->ka
.sa
.sa_restorer
;
353 if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK
) ||
354 !features_enabled(ARCH_SHSTK_SHSTK
))
360 ssp
= get_user_shstk_addr();
364 err
= shstk_push_sigframe(&ssp
);
368 /* Push restorer address */
369 ssp
-= SS_FRAME_SIZE
;
370 err
= write_user_shstk_64((u64 __user
*)ssp
, (u64
)restorer
);
374 fpregs_lock_and_load();
375 wrmsrl(MSR_IA32_PL3_SSP
, ssp
);
381 int restore_signal_shadow_stack(void)
386 if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK
) ||
387 !features_enabled(ARCH_SHSTK_SHSTK
))
390 ssp
= get_user_shstk_addr();
394 err
= shstk_pop_sigframe(&ssp
);
398 fpregs_lock_and_load();
399 wrmsrl(MSR_IA32_PL3_SSP
, ssp
);
405 void shstk_free(struct task_struct
*tsk
)
407 struct thread_shstk
*shstk
= &tsk
->thread
.shstk
;
409 if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK
) ||
410 !features_enabled(ARCH_SHSTK_SHSTK
))
414 * When fork() with CLONE_VM fails, the child (tsk) already has a
415 * shadow stack allocated, and exit_thread() calls this function to
416 * free it. In this case the parent (current) and the child share
417 * the same mm struct.
419 if (!tsk
->mm
|| tsk
->mm
!= current
->mm
)
423 * If shstk->base is NULL, then this task is not managing its
424 * own shadow stack (CLONE_VFORK). So skip freeing it.
430 * shstk->base is NULL for CLONE_VFORK child tasks, and so is
431 * normal. But size = 0 on a shstk->base is not normal and
432 * indicated an attempt to free the thread shadow stack twice.
435 if (WARN_ON(!shstk
->size
))
438 unmap_shadow_stack(shstk
->base
, shstk
->size
);
443 static int wrss_control(bool enable
)
447 if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK
))
451 * Only enable WRSS if shadow stack is enabled. If shadow stack is not
452 * enabled, WRSS will already be disabled, so don't bother clearing it
455 if (!features_enabled(ARCH_SHSTK_SHSTK
))
458 /* Already enabled/disabled? */
459 if (features_enabled(ARCH_SHSTK_WRSS
) == enable
)
462 fpregs_lock_and_load();
463 rdmsrl(MSR_IA32_U_CET
, msrval
);
466 features_set(ARCH_SHSTK_WRSS
);
467 msrval
|= CET_WRSS_EN
;
469 features_clr(ARCH_SHSTK_WRSS
);
470 if (!(msrval
& CET_WRSS_EN
))
473 msrval
&= ~CET_WRSS_EN
;
476 wrmsrl(MSR_IA32_U_CET
, msrval
);
484 static int shstk_disable(void)
486 if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK
))
489 /* Already disabled? */
490 if (!features_enabled(ARCH_SHSTK_SHSTK
))
493 fpregs_lock_and_load();
494 /* Disable WRSS too when disabling shadow stack */
495 wrmsrl(MSR_IA32_U_CET
, 0);
496 wrmsrl(MSR_IA32_PL3_SSP
, 0);
500 features_clr(ARCH_SHSTK_SHSTK
| ARCH_SHSTK_WRSS
);
505 SYSCALL_DEFINE3(map_shadow_stack
, unsigned long, addr
, unsigned long, size
, unsigned int, flags
)
507 bool set_tok
= flags
& SHADOW_STACK_SET_TOKEN
;
508 unsigned long aligned_size
;
510 if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK
))
513 if (flags
& ~SHADOW_STACK_SET_TOKEN
)
516 /* If there isn't space for a token */
517 if (set_tok
&& size
< 8)
520 if (addr
&& addr
< SZ_4G
)
524 * An overflow would result in attempting to write the restore token
525 * to the wrong location. Not catastrophic, but just return the right
526 * error code and block it.
528 aligned_size
= PAGE_ALIGN(size
);
529 if (aligned_size
< size
)
532 return alloc_shstk(addr
, aligned_size
, size
, set_tok
);
535 long shstk_prctl(struct task_struct
*task
, int option
, unsigned long arg2
)
537 unsigned long features
= arg2
;
539 if (option
== ARCH_SHSTK_STATUS
) {
540 return put_user(task
->thread
.features
, (unsigned long __user
*)arg2
);
543 if (option
== ARCH_SHSTK_LOCK
) {
544 task
->thread
.features_locked
|= features
;
548 /* Only allow via ptrace */
549 if (task
!= current
) {
550 if (option
== ARCH_SHSTK_UNLOCK
&& IS_ENABLED(CONFIG_CHECKPOINT_RESTORE
)) {
551 task
->thread
.features_locked
&= ~features
;
557 /* Do not allow to change locked features */
558 if (features
& task
->thread
.features_locked
)
561 /* Only support enabling/disabling one feature at a time. */
562 if (hweight_long(features
) > 1)
565 if (option
== ARCH_SHSTK_DISABLE
) {
566 if (features
& ARCH_SHSTK_WRSS
)
567 return wrss_control(false);
568 if (features
& ARCH_SHSTK_SHSTK
)
569 return shstk_disable();
573 /* Handle ARCH_SHSTK_ENABLE */
574 if (features
& ARCH_SHSTK_SHSTK
)
575 return shstk_setup();
576 if (features
& ARCH_SHSTK_WRSS
)
577 return wrss_control(true);
581 int shstk_update_last_frame(unsigned long val
)
585 if (!features_enabled(ARCH_SHSTK_SHSTK
))
588 ssp
= get_user_shstk_addr();
589 return write_user_shstk_64((u64 __user
*)ssp
, (u64
)val
);
592 bool shstk_is_enabled(void)
594 return features_enabled(ARCH_SHSTK_SHSTK
);