From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Thu, 3 Oct 2019 00:03:20 +0000 Subject: [PATCH] Linux 4.14, 4.19, 5.0+ compat: SIMD save/restore Contrary to initial testing we cannot rely on these kernels to invalidate the per-cpu FPU state and restore the FPU registers. Therefore, the kfpu_begin() and kfpu_end() functions have been updated to unconditionally save and restore the FPU state. Signed-off-by: Brian Behlendorf Issue #9346 (cherry picked from commit 813fd014a90229127f80b970a8fef5049fd4c713) Signed-off-by: Thomas Lamprecht --- config/kernel-fpu.m4 | 82 +++++++++++---------- include/linux/simd_x86.h | 152 ++++++++++++++++++++++++++++----------- 2 files changed, 155 insertions(+), 79 deletions(-) diff --git a/config/kernel-fpu.m4 b/config/kernel-fpu.m4 index a2c47d65a..9ed9b14ad 100644 --- a/config/kernel-fpu.m4 +++ b/config/kernel-fpu.m4 @@ -2,15 +2,9 @@ dnl # dnl # Handle differences in kernel FPU code. dnl # dnl # Kernel -dnl # 5.2: The fpu->initialized flag was replaced by TIF_NEED_FPU_LOAD. -dnl # HAVE_KERNEL_TIF_NEED_FPU_LOAD -dnl # -dnl # 5.0: As an optimization SIMD operations performed by kernel -dnl # threads can skip saving and restoring their FPU context. -dnl # Wrappers have been introduced to determine the running -dnl # context and use either the SIMD or generic implementation. +dnl # 5.0: Wrappers have been introduced to save/restore the FPU state. dnl # This change was made to the 4.19.38 and 4.14.120 LTS kernels. -dnl # HAVE_KERNEL_FPU_INITIALIZED +dnl # HAVE_KERNEL_FPU_INTERNAL dnl # dnl # 4.2: Use __kernel_fpu_{begin,end}() dnl # HAVE_UNDERSCORE_KERNEL_FPU & KERNEL_EXPORTS_X86_FPU @@ -61,22 +55,47 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_FPU], [ __kernel_fpu_end(); ], [], [$ZFS_META_LICENSE]) - ZFS_LINUX_TEST_SRC([fpu_initialized], [ - #include + ZFS_LINUX_TEST_SRC([fpu_internal], [ + #if defined(__x86_64) || defined(__x86_64__) || \ + defined(__i386) || defined(__i386__) + #if !defined(__x86) + #define __x86 + #endif + #endif + + #if !defined(__x86) + #error Unsupported architecture + #endif + #include - ],[ - struct fpu *fpu = ¤t->thread.fpu; - if (fpu->initialized) { return (0); }; - ]) - ZFS_LINUX_TEST_SRC([tif_need_fpu_load], [ - #include - #include + #if !defined(PF_KTHREAD) + #error PF_KTHREAD not defined + #endif - #if !defined(TIF_NEED_FPU_LOAD) - #error "TIF_NEED_FPU_LOAD undefined" + #ifdef HAVE_KERNEL_FPU_API_HEADER + #include + #include + #else + #include + #include + #endif + + #if !defined(XSTATE_XSAVE) + #error XSTATE_XSAVE not defined + #endif + + #if !defined(XSTATE_XRESTORE) + #error XSTATE_XRESTORE not defined #endif - ],[]) + ],[ + struct fpu *fpu = ¤t->thread.fpu; + union fpregs_state *st = &fpu->state; + struct fregs_state *fr __attribute__ ((unused)) = &st->fsave; + struct fxregs_state *fxr __attribute__ ((unused)) = &st->fxsave; + struct xregs_state *xr __attribute__ ((unused)) = &st->xsave; + fpu->last_cpu = -1; + ]) ]) AC_DEFUN([ZFS_AC_KERNEL_FPU], [ @@ -104,25 +123,12 @@ AC_DEFUN([ZFS_AC_KERNEL_FPU], [ AC_DEFINE(KERNEL_EXPORTS_X86_FPU, 1, [kernel exports FPU functions]) ],[ - dnl # - dnl # Linux 5.0 kernel - dnl # - ZFS_LINUX_TEST_RESULT([fpu_initialized], [ - AC_MSG_RESULT(fpu.initialized) - AC_DEFINE(HAVE_KERNEL_FPU_INITIALIZED, 1, - [kernel fpu.initialized exists]) + ZFS_LINUX_TEST_RESULT([fpu_internal], [ + AC_MSG_RESULT(internal) + AC_DEFINE(HAVE_KERNEL_FPU_INTERNAL, 1, + [kernel fpu internal]) ],[ - dnl # - dnl # Linux 5.2 kernel - dnl # - ZFS_LINUX_TEST_RESULT([tif_need_fpu_load], [ - AC_MSG_RESULT(TIF_NEED_FPU_LOAD) - AC_DEFINE( - HAVE_KERNEL_TIF_NEED_FPU_LOAD, 1, - [kernel TIF_NEED_FPU_LOAD exists]) - ],[ - AC_MSG_RESULT(unavailable) - ]) + AC_MSG_RESULT(unavailable) ]) ]) ]) diff --git a/include/linux/simd_x86.h b/include/linux/simd_x86.h index 641f43955..d1ded3af2 100644 --- a/include/linux/simd_x86.h +++ b/include/linux/simd_x86.h @@ -126,38 +126,68 @@ #endif #else /* defined(KERNEL_EXPORTS_X86_FPU) */ + /* * When the kernel_fpu_* symbols are unavailable then provide our own * versions which allow the FPU to be safely used in kernel threads. * In practice, this is not a significant restriction for ZFS since the * vast majority of SIMD operations are performed by the IO pipeline. */ +#if defined(HAVE_KERNEL_FPU_INTERNAL) /* - * Returns non-zero if FPU operations are allowed in the current context. + * FPU usage only allowed in dedicated kernel threads. */ -#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD) -#define kfpu_allowed() ((current->flags & PF_KTHREAD) && \ - test_thread_flag(TIF_NEED_FPU_LOAD)) -#elif defined(HAVE_KERNEL_FPU_INITIALIZED) -#define kfpu_allowed() ((current->flags & PF_KTHREAD) && \ - current->thread.fpu.initialized) -#else -#define kfpu_allowed() 0 -#endif +#define kfpu_allowed() (current->flags & PF_KTHREAD) +#define ex_handler_fprestore ex_handler_default + +/* + * FPU save and restore instructions. + */ +#define __asm __asm__ __volatile__ +#define kfpu_fxsave(addr) __asm("fxsave %0" : "=m" (*(addr))) +#define kfpu_fxsaveq(addr) __asm("fxsaveq %0" : "=m" (*(addr))) +#define kfpu_fnsave(addr) __asm("fnsave %0; fwait" : "=m" (*(addr))) +#define kfpu_fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr))) +#define kfpu_fxrstorq(addr) __asm("fxrstorq %0" : : "m" (*(addr))) +#define kfpu_frstor(addr) __asm("frstor %0" : : "m" (*(addr))) +#define kfpu_fxsr_clean(rval) __asm("fnclex; emms; fildl %P[addr]" \ + : : [addr] "m" (rval)); static inline void kfpu_initialize(void) { WARN_ON_ONCE(!(current->flags & PF_KTHREAD)); -#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD) - __fpu_invalidate_fpregs_state(¤t->thread.fpu); - set_thread_flag(TIF_NEED_FPU_LOAD); -#elif defined(HAVE_KERNEL_FPU_INITIALIZED) - __fpu_invalidate_fpregs_state(¤t->thread.fpu); - current->thread.fpu.initialized = 1; -#endif + /* Invalidate the task's FPU state */ + current->thread.fpu.last_cpu = -1; +} + +static inline void +kfpu_save_xsave(struct xregs_state *addr, uint64_t mask) +{ + uint32_t low, hi; + int err; + + low = mask; + hi = mask >> 32; + XSTATE_XSAVE(addr, low, hi, err); + WARN_ON_ONCE(err); +} + +static inline void +kfpu_save_fxsr(struct fxregs_state *addr) +{ + if (IS_ENABLED(CONFIG_X86_32)) + kfpu_fxsave(addr); + else + kfpu_fxsaveq(addr); +} + +static inline void +kfpu_save_fsave(struct fregs_state *addr) +{ + kfpu_fnsave(addr); } static inline void @@ -172,46 +202,86 @@ kfpu_begin(void) preempt_disable(); local_irq_disable(); -#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD) /* * The current FPU registers need to be preserved by kfpu_begin() - * and restored by kfpu_end(). This is required because we can - * not call __cpu_invalidate_fpregs_state() to invalidate the - * per-cpu FPU state and force them to be restored during a - * context switch. + * and restored by kfpu_end(). This is always required because we + * can not call __cpu_invalidate_fpregs_state() to invalidate the + * per-cpu FPU state and force them to be restored. Furthermore, + * this implementation relies on the space provided in the task + * structure to store the user FPU state. As such, it can only + * be used with dedicated kernels which by definition will never + * store user FPU state. */ - copy_fpregs_to_fpstate(¤t->thread.fpu); -#elif defined(HAVE_KERNEL_FPU_INITIALIZED) + if (static_cpu_has(X86_FEATURE_XSAVE)) { + kfpu_save_xsave(¤t->thread.fpu.state.xsave, ~0); + } else if (static_cpu_has(X86_FEATURE_FXSR)) { + kfpu_save_fxsr(¤t->thread.fpu.state.fxsave); + } else { + kfpu_save_fsave(¤t->thread.fpu.state.fsave); + } +} + +static inline void +kfpu_restore_xsave(struct xregs_state *addr, uint64_t mask) +{ + uint32_t low, hi; + + low = mask; + hi = mask >> 32; + XSTATE_XRESTORE(addr, low, hi); +} + +static inline void +kfpu_restore_fxsr(struct fxregs_state *addr) +{ /* - * There is no need to preserve and restore the FPU registers. - * They will always be restored from the task's stored FPU state - * when switching contexts. + * On AuthenticAMD K7 and K8 processors the fxrstor instruction only + * restores the _x87 FOP, FIP, and FDP registers when an exception + * is pending. Clean the _x87 state to force the restore. */ - WARN_ON_ONCE(current->thread.fpu.initialized == 0); -#endif + if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) + kfpu_fxsr_clean(addr); + + if (IS_ENABLED(CONFIG_X86_32)) { + kfpu_fxrstor(addr); + } else { + kfpu_fxrstorq(addr); + } } static inline void -kfpu_end(void) +kfpu_restore_fsave(struct fregs_state *addr) { -#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD) - union fpregs_state *state = ¤t->thread.fpu.state; - int error; + kfpu_frstor(addr); +} - if (use_xsave()) { - error = copy_kernel_to_xregs_err(&state->xsave, -1); - } else if (use_fxsr()) { - error = copy_kernel_to_fxregs_err(&state->fxsave); +static inline void +kfpu_end(void) +{ + if (static_cpu_has(X86_FEATURE_XSAVE)) { + kfpu_restore_xsave(¤t->thread.fpu.state.xsave, ~0); + } else if (static_cpu_has(X86_FEATURE_FXSR)) { + kfpu_restore_fxsr(¤t->thread.fpu.state.fxsave); } else { - error = copy_kernel_to_fregs_err(&state->fsave); + kfpu_restore_fsave(¤t->thread.fpu.state.fsave); } - WARN_ON_ONCE(error); -#endif local_irq_enable(); preempt_enable(); } -#endif /* defined(HAVE_KERNEL_FPU) */ + +#else + +/* + * FPU support is unavailable. + */ +#define kfpu_allowed() 0 +#define kfpu_initialize(tsk) do {} while (0) +#define kfpu_begin() do {} while (0) +#define kfpu_end() do {} while (0) + +#endif /* defined(HAVE_KERNEL_FPU_INTERNAL) */ +#endif /* defined(KERNEL_EXPORTS_X86_FPU) */ #else /* defined(_KERNEL) */ /*