580 lines
13 KiB
C
580 lines
13 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* shstk.c - Intel shadow stack support
|
|
*
|
|
* Copyright (c) 2021, Intel Corporation.
|
|
* Yu-cheng Yu <yu-cheng.yu@intel.com>
|
|
*/
|
|
|
|
#include <linux/sched.h>
|
|
#include <linux/bitops.h>
|
|
#include <linux/types.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/mman.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/uaccess.h>
|
|
#include <linux/sched/signal.h>
|
|
#include <linux/compat.h>
|
|
#include <linux/sizes.h>
|
|
#include <linux/user.h>
|
|
#include <linux/syscalls.h>
|
|
#include <asm/msr.h>
|
|
#include <asm/fpu/xstate.h>
|
|
#include <asm/fpu/types.h>
|
|
#include <asm/shstk.h>
|
|
#include <asm/special_insns.h>
|
|
#include <asm/fpu/api.h>
|
|
#include <asm/prctl.h>
|
|
|
|
#define SS_FRAME_SIZE 8
|
|
|
|
static bool features_enabled(unsigned long features)
|
|
{
|
|
return current->thread.features & features;
|
|
}
|
|
|
|
static void features_set(unsigned long features)
|
|
{
|
|
current->thread.features |= features;
|
|
}
|
|
|
|
static void features_clr(unsigned long features)
|
|
{
|
|
current->thread.features &= ~features;
|
|
}
|
|
|
|
/*
|
|
* Create a restore token on the shadow stack. A token is always 8-byte
|
|
* and aligned to 8.
|
|
*/
|
|
static int create_rstor_token(unsigned long ssp, unsigned long *token_addr)
|
|
{
|
|
unsigned long addr;
|
|
|
|
/* Token must be aligned */
|
|
if (!IS_ALIGNED(ssp, 8))
|
|
return -EINVAL;
|
|
|
|
addr = ssp - SS_FRAME_SIZE;
|
|
|
|
/*
|
|
* SSP is aligned, so reserved bits and mode bit are a zero, just mark
|
|
* the token 64-bit.
|
|
*/
|
|
ssp |= BIT(0);
|
|
|
|
if (write_user_shstk_64((u64 __user *)addr, (u64)ssp))
|
|
return -EFAULT;
|
|
|
|
if (token_addr)
|
|
*token_addr = addr;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* VM_SHADOW_STACK will have a guard page. This helps userspace protect
|
|
* itself from attacks. The reasoning is as follows:
|
|
*
|
|
* The shadow stack pointer(SSP) is moved by CALL, RET, and INCSSPQ. The
|
|
* INCSSP instruction can increment the shadow stack pointer. It is the
|
|
* shadow stack analog of an instruction like:
|
|
*
|
|
* addq $0x80, %rsp
|
|
*
|
|
* However, there is one important difference between an ADD on %rsp
|
|
* and INCSSP. In addition to modifying SSP, INCSSP also reads from the
|
|
* memory of the first and last elements that were "popped". It can be
|
|
* thought of as acting like this:
|
|
*
|
|
* READ_ONCE(ssp); // read+discard top element on stack
|
|
* ssp += nr_to_pop * 8; // move the shadow stack
|
|
* READ_ONCE(ssp-8); // read+discard last popped stack element
|
|
*
|
|
* The maximum distance INCSSP can move the SSP is 2040 bytes, before
|
|
* it would read the memory. Therefore a single page gap will be enough
|
|
* to prevent any operation from shifting the SSP to an adjacent stack,
|
|
* since it would have to land in the gap at least once, causing a
|
|
* fault.
|
|
*/
|
|
static unsigned long alloc_shstk(unsigned long addr, unsigned long size,
|
|
unsigned long token_offset, bool set_res_tok)
|
|
{
|
|
int flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_ABOVE4G;
|
|
struct mm_struct *mm = current->mm;
|
|
unsigned long mapped_addr, unused;
|
|
|
|
if (addr)
|
|
flags |= MAP_FIXED_NOREPLACE;
|
|
|
|
mmap_write_lock(mm);
|
|
mapped_addr = do_mmap(NULL, addr, size, PROT_READ, flags,
|
|
VM_SHADOW_STACK | VM_WRITE, 0, &unused, NULL);
|
|
mmap_write_unlock(mm);
|
|
|
|
if (!set_res_tok || IS_ERR_VALUE(mapped_addr))
|
|
goto out;
|
|
|
|
if (create_rstor_token(mapped_addr + token_offset, NULL)) {
|
|
vm_munmap(mapped_addr, size);
|
|
return -EINVAL;
|
|
}
|
|
|
|
out:
|
|
return mapped_addr;
|
|
}
|
|
|
|
static unsigned long adjust_shstk_size(unsigned long size)
|
|
{
|
|
if (size)
|
|
return PAGE_ALIGN(size);
|
|
|
|
return PAGE_ALIGN(min_t(unsigned long long, rlimit(RLIMIT_STACK), SZ_4G));
|
|
}
|
|
|
|
static void unmap_shadow_stack(u64 base, u64 size)
|
|
{
|
|
int r;
|
|
|
|
r = vm_munmap(base, size);
|
|
|
|
/*
|
|
* mmap_write_lock_killable() failed with -EINTR. This means
|
|
* the process is about to die and have it's MM cleaned up.
|
|
* This task shouldn't ever make it back to userspace. In this
|
|
* case it is ok to leak a shadow stack, so just exit out.
|
|
*/
|
|
if (r == -EINTR)
|
|
return;
|
|
|
|
/*
|
|
* For all other types of vm_munmap() failure, either the
|
|
* system is out of memory or there is bug.
|
|
*/
|
|
WARN_ON_ONCE(r);
|
|
}
|
|
|
|
static int shstk_setup(void)
|
|
{
|
|
struct thread_shstk *shstk = ¤t->thread.shstk;
|
|
unsigned long addr, size;
|
|
|
|
/* Already enabled */
|
|
if (features_enabled(ARCH_SHSTK_SHSTK))
|
|
return 0;
|
|
|
|
/* Also not supported for 32 bit and x32 */
|
|
if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) || in_32bit_syscall())
|
|
return -EOPNOTSUPP;
|
|
|
|
size = adjust_shstk_size(0);
|
|
addr = alloc_shstk(0, size, 0, false);
|
|
if (IS_ERR_VALUE(addr))
|
|
return PTR_ERR((void *)addr);
|
|
|
|
fpregs_lock_and_load();
|
|
wrmsrl(MSR_IA32_PL3_SSP, addr + size);
|
|
wrmsrl(MSR_IA32_U_CET, CET_SHSTK_EN);
|
|
fpregs_unlock();
|
|
|
|
shstk->base = addr;
|
|
shstk->size = size;
|
|
features_set(ARCH_SHSTK_SHSTK);
|
|
|
|
return 0;
|
|
}
|
|
|
|
void reset_thread_features(void)
|
|
{
|
|
memset(¤t->thread.shstk, 0, sizeof(struct thread_shstk));
|
|
current->thread.features = 0;
|
|
current->thread.features_locked = 0;
|
|
}
|
|
|
|
unsigned long shstk_alloc_thread_stack(struct task_struct *tsk, unsigned long clone_flags,
|
|
unsigned long stack_size)
|
|
{
|
|
struct thread_shstk *shstk = &tsk->thread.shstk;
|
|
unsigned long addr, size;
|
|
|
|
/*
|
|
* If shadow stack is not enabled on the new thread, skip any
|
|
* switch to a new shadow stack.
|
|
*/
|
|
if (!features_enabled(ARCH_SHSTK_SHSTK))
|
|
return 0;
|
|
|
|
/*
|
|
* For CLONE_VFORK the child will share the parents shadow stack.
|
|
* Make sure to clear the internal tracking of the thread shadow
|
|
* stack so the freeing logic run for child knows to leave it alone.
|
|
*/
|
|
if (clone_flags & CLONE_VFORK) {
|
|
shstk->base = 0;
|
|
shstk->size = 0;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* For !CLONE_VM the child will use a copy of the parents shadow
|
|
* stack.
|
|
*/
|
|
if (!(clone_flags & CLONE_VM))
|
|
return 0;
|
|
|
|
size = adjust_shstk_size(stack_size);
|
|
addr = alloc_shstk(0, size, 0, false);
|
|
if (IS_ERR_VALUE(addr))
|
|
return addr;
|
|
|
|
shstk->base = addr;
|
|
shstk->size = size;
|
|
|
|
return addr + size;
|
|
}
|
|
|
|
static unsigned long get_user_shstk_addr(void)
|
|
{
|
|
unsigned long long ssp;
|
|
|
|
fpregs_lock_and_load();
|
|
|
|
rdmsrl(MSR_IA32_PL3_SSP, ssp);
|
|
|
|
fpregs_unlock();
|
|
|
|
return ssp;
|
|
}
|
|
|
|
#define SHSTK_DATA_BIT BIT(63)
|
|
|
|
static int put_shstk_data(u64 __user *addr, u64 data)
|
|
{
|
|
if (WARN_ON_ONCE(data & SHSTK_DATA_BIT))
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* Mark the high bit so that the sigframe can't be processed as a
|
|
* return address.
|
|
*/
|
|
if (write_user_shstk_64(addr, data | SHSTK_DATA_BIT))
|
|
return -EFAULT;
|
|
return 0;
|
|
}
|
|
|
|
static int get_shstk_data(unsigned long *data, unsigned long __user *addr)
|
|
{
|
|
unsigned long ldata;
|
|
|
|
if (unlikely(get_user(ldata, addr)))
|
|
return -EFAULT;
|
|
|
|
if (!(ldata & SHSTK_DATA_BIT))
|
|
return -EINVAL;
|
|
|
|
*data = ldata & ~SHSTK_DATA_BIT;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int shstk_push_sigframe(unsigned long *ssp)
|
|
{
|
|
unsigned long target_ssp = *ssp;
|
|
|
|
/* Token must be aligned */
|
|
if (!IS_ALIGNED(target_ssp, 8))
|
|
return -EINVAL;
|
|
|
|
*ssp -= SS_FRAME_SIZE;
|
|
if (put_shstk_data((void __user *)*ssp, target_ssp))
|
|
return -EFAULT;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int shstk_pop_sigframe(unsigned long *ssp)
|
|
{
|
|
struct vm_area_struct *vma;
|
|
unsigned long token_addr;
|
|
bool need_to_check_vma;
|
|
int err = 1;
|
|
|
|
/*
|
|
* It is possible for the SSP to be off the end of a shadow stack by 4
|
|
* or 8 bytes. If the shadow stack is at the start of a page or 4 bytes
|
|
* before it, it might be this case, so check that the address being
|
|
* read is actually shadow stack.
|
|
*/
|
|
if (!IS_ALIGNED(*ssp, 8))
|
|
return -EINVAL;
|
|
|
|
need_to_check_vma = PAGE_ALIGN(*ssp) == *ssp;
|
|
|
|
if (need_to_check_vma)
|
|
mmap_read_lock_killable(current->mm);
|
|
|
|
err = get_shstk_data(&token_addr, (unsigned long __user *)*ssp);
|
|
if (unlikely(err))
|
|
goto out_err;
|
|
|
|
if (need_to_check_vma) {
|
|
vma = find_vma(current->mm, *ssp);
|
|
if (!vma || !(vma->vm_flags & VM_SHADOW_STACK)) {
|
|
err = -EFAULT;
|
|
goto out_err;
|
|
}
|
|
|
|
mmap_read_unlock(current->mm);
|
|
}
|
|
|
|
/* Restore SSP aligned? */
|
|
if (unlikely(!IS_ALIGNED(token_addr, 8)))
|
|
return -EINVAL;
|
|
|
|
/* SSP in userspace? */
|
|
if (unlikely(token_addr >= TASK_SIZE_MAX))
|
|
return -EINVAL;
|
|
|
|
*ssp = token_addr;
|
|
|
|
return 0;
|
|
out_err:
|
|
if (need_to_check_vma)
|
|
mmap_read_unlock(current->mm);
|
|
return err;
|
|
}
|
|
|
|
int setup_signal_shadow_stack(struct ksignal *ksig)
|
|
{
|
|
void __user *restorer = ksig->ka.sa.sa_restorer;
|
|
unsigned long ssp;
|
|
int err;
|
|
|
|
if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) ||
|
|
!features_enabled(ARCH_SHSTK_SHSTK))
|
|
return 0;
|
|
|
|
if (!restorer)
|
|
return -EINVAL;
|
|
|
|
ssp = get_user_shstk_addr();
|
|
if (unlikely(!ssp))
|
|
return -EINVAL;
|
|
|
|
err = shstk_push_sigframe(&ssp);
|
|
if (unlikely(err))
|
|
return err;
|
|
|
|
/* Push restorer address */
|
|
ssp -= SS_FRAME_SIZE;
|
|
err = write_user_shstk_64((u64 __user *)ssp, (u64)restorer);
|
|
if (unlikely(err))
|
|
return -EFAULT;
|
|
|
|
fpregs_lock_and_load();
|
|
wrmsrl(MSR_IA32_PL3_SSP, ssp);
|
|
fpregs_unlock();
|
|
|
|
return 0;
|
|
}
|
|
|
|
int restore_signal_shadow_stack(void)
|
|
{
|
|
unsigned long ssp;
|
|
int err;
|
|
|
|
if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) ||
|
|
!features_enabled(ARCH_SHSTK_SHSTK))
|
|
return 0;
|
|
|
|
ssp = get_user_shstk_addr();
|
|
if (unlikely(!ssp))
|
|
return -EINVAL;
|
|
|
|
err = shstk_pop_sigframe(&ssp);
|
|
if (unlikely(err))
|
|
return err;
|
|
|
|
fpregs_lock_and_load();
|
|
wrmsrl(MSR_IA32_PL3_SSP, ssp);
|
|
fpregs_unlock();
|
|
|
|
return 0;
|
|
}
|
|
|
|
void shstk_free(struct task_struct *tsk)
|
|
{
|
|
struct thread_shstk *shstk = &tsk->thread.shstk;
|
|
|
|
if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) ||
|
|
!features_enabled(ARCH_SHSTK_SHSTK))
|
|
return;
|
|
|
|
/*
|
|
* When fork() with CLONE_VM fails, the child (tsk) already has a
|
|
* shadow stack allocated, and exit_thread() calls this function to
|
|
* free it. In this case the parent (current) and the child share
|
|
* the same mm struct.
|
|
*/
|
|
if (!tsk->mm || tsk->mm != current->mm)
|
|
return;
|
|
|
|
/*
|
|
* If shstk->base is NULL, then this task is not managing its
|
|
* own shadow stack (CLONE_VFORK). So skip freeing it.
|
|
*/
|
|
if (!shstk->base)
|
|
return;
|
|
|
|
/*
|
|
* shstk->base is NULL for CLONE_VFORK child tasks, and so is
|
|
* normal. But size = 0 on a shstk->base is not normal and
|
|
* indicated an attempt to free the thread shadow stack twice.
|
|
* Warn about it.
|
|
*/
|
|
if (WARN_ON(!shstk->size))
|
|
return;
|
|
|
|
unmap_shadow_stack(shstk->base, shstk->size);
|
|
|
|
shstk->size = 0;
|
|
}
|
|
|
|
static int wrss_control(bool enable)
|
|
{
|
|
u64 msrval;
|
|
|
|
if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK))
|
|
return -EOPNOTSUPP;
|
|
|
|
/*
|
|
* Only enable WRSS if shadow stack is enabled. If shadow stack is not
|
|
* enabled, WRSS will already be disabled, so don't bother clearing it
|
|
* when disabling.
|
|
*/
|
|
if (!features_enabled(ARCH_SHSTK_SHSTK))
|
|
return -EPERM;
|
|
|
|
/* Already enabled/disabled? */
|
|
if (features_enabled(ARCH_SHSTK_WRSS) == enable)
|
|
return 0;
|
|
|
|
fpregs_lock_and_load();
|
|
rdmsrl(MSR_IA32_U_CET, msrval);
|
|
|
|
if (enable) {
|
|
features_set(ARCH_SHSTK_WRSS);
|
|
msrval |= CET_WRSS_EN;
|
|
} else {
|
|
features_clr(ARCH_SHSTK_WRSS);
|
|
if (!(msrval & CET_WRSS_EN))
|
|
goto unlock;
|
|
|
|
msrval &= ~CET_WRSS_EN;
|
|
}
|
|
|
|
wrmsrl(MSR_IA32_U_CET, msrval);
|
|
|
|
unlock:
|
|
fpregs_unlock();
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int shstk_disable(void)
|
|
{
|
|
if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK))
|
|
return -EOPNOTSUPP;
|
|
|
|
/* Already disabled? */
|
|
if (!features_enabled(ARCH_SHSTK_SHSTK))
|
|
return 0;
|
|
|
|
fpregs_lock_and_load();
|
|
/* Disable WRSS too when disabling shadow stack */
|
|
wrmsrl(MSR_IA32_U_CET, 0);
|
|
wrmsrl(MSR_IA32_PL3_SSP, 0);
|
|
fpregs_unlock();
|
|
|
|
shstk_free(current);
|
|
features_clr(ARCH_SHSTK_SHSTK | ARCH_SHSTK_WRSS);
|
|
|
|
return 0;
|
|
}
|
|
|
|
SYSCALL_DEFINE3(map_shadow_stack, unsigned long, addr, unsigned long, size, unsigned int, flags)
|
|
{
|
|
bool set_tok = flags & SHADOW_STACK_SET_TOKEN;
|
|
unsigned long aligned_size;
|
|
|
|
if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK))
|
|
return -EOPNOTSUPP;
|
|
|
|
if (flags & ~SHADOW_STACK_SET_TOKEN)
|
|
return -EINVAL;
|
|
|
|
/* If there isn't space for a token */
|
|
if (set_tok && size < 8)
|
|
return -ENOSPC;
|
|
|
|
if (addr && addr < SZ_4G)
|
|
return -ERANGE;
|
|
|
|
/*
|
|
* An overflow would result in attempting to write the restore token
|
|
* to the wrong location. Not catastrophic, but just return the right
|
|
* error code and block it.
|
|
*/
|
|
aligned_size = PAGE_ALIGN(size);
|
|
if (aligned_size < size)
|
|
return -EOVERFLOW;
|
|
|
|
return alloc_shstk(addr, aligned_size, size, set_tok);
|
|
}
|
|
|
|
long shstk_prctl(struct task_struct *task, int option, unsigned long arg2)
|
|
{
|
|
unsigned long features = arg2;
|
|
|
|
if (option == ARCH_SHSTK_STATUS) {
|
|
return put_user(task->thread.features, (unsigned long __user *)arg2);
|
|
}
|
|
|
|
if (option == ARCH_SHSTK_LOCK) {
|
|
task->thread.features_locked |= features;
|
|
return 0;
|
|
}
|
|
|
|
/* Only allow via ptrace */
|
|
if (task != current) {
|
|
if (option == ARCH_SHSTK_UNLOCK && IS_ENABLED(CONFIG_CHECKPOINT_RESTORE)) {
|
|
task->thread.features_locked &= ~features;
|
|
return 0;
|
|
}
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* Do not allow to change locked features */
|
|
if (features & task->thread.features_locked)
|
|
return -EPERM;
|
|
|
|
/* Only support enabling/disabling one feature at a time. */
|
|
if (hweight_long(features) > 1)
|
|
return -EINVAL;
|
|
|
|
if (option == ARCH_SHSTK_DISABLE) {
|
|
if (features & ARCH_SHSTK_WRSS)
|
|
return wrss_control(false);
|
|
if (features & ARCH_SHSTK_SHSTK)
|
|
return shstk_disable();
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* Handle ARCH_SHSTK_ENABLE */
|
|
if (features & ARCH_SHSTK_SHSTK)
|
|
return shstk_setup();
|
|
if (features & ARCH_SHSTK_WRSS)
|
|
return wrss_control(true);
|
|
return -EINVAL;
|
|
}
|