FPU/SIMD: separate ZFS and Kernel approach for less interference
github pull-request #9406[0]. [0]: https://github.com/zfsonlinux/zfs/pull/9406#event-2704412478 Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
This commit is contained in:
parent
45fdd1f323
commit
8f11c72175
@ -2,9 +2,6 @@ From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Brian Behlendorf <behlendorf1@llnl.gov>
|
||||
Date: Fri, 12 Jul 2019 09:31:20 -0700
|
||||
Subject: [PATCH] Linux 5.0 compat: SIMD compatibility
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Restore the SIMD optimization for 4.19.38 LTS, 4.14.120 LTS,
|
||||
and 5.0 and newer kernels. This is accomplished by leveraging
|
||||
@ -59,8 +56,6 @@ Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
|
||||
Closes #8754
|
||||
Closes #8793
|
||||
Closes #8965
|
||||
(cherry picked from commit e5db31349484e5e859c7a942eb15b98d68ce5b4d)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
||||
---
|
||||
cmd/ztest/ztest.c | 3 +
|
||||
|
333
debian/patches/0008-Linux-4.14-4.19-5.0-compat-SIMD-save-restore.patch
vendored
Normal file
333
debian/patches/0008-Linux-4.14-4.19-5.0-compat-SIMD-save-restore.patch
vendored
Normal file
@ -0,0 +1,333 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Brian Behlendorf <behlendorf1@llnl.gov>
|
||||
Date: Thu, 3 Oct 2019 00:03:20 +0000
|
||||
Subject: [PATCH] Linux 4.14, 4.19, 5.0+ compat: SIMD save/restore
|
||||
|
||||
Contrary to initial testing we cannot rely on these kernels to
|
||||
invalidate the per-cpu FPU state and restore the FPU registers.
|
||||
Therefore, the kfpu_begin() and kfpu_end() functions have been
|
||||
updated to unconditionally save and restore the FPU state.
|
||||
|
||||
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
|
||||
Issue #9346
|
||||
(cherry picked from commit 813fd014a90229127f80b970a8fef5049fd4c713)
|
||||
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
||||
---
|
||||
config/kernel-fpu.m4 | 82 +++++++++++----------
|
||||
include/linux/simd_x86.h | 152 ++++++++++++++++++++++++++++-----------
|
||||
2 files changed, 155 insertions(+), 79 deletions(-)
|
||||
|
||||
diff --git a/config/kernel-fpu.m4 b/config/kernel-fpu.m4
|
||||
index a2c47d65a..9ed9b14ad 100644
|
||||
--- a/config/kernel-fpu.m4
|
||||
+++ b/config/kernel-fpu.m4
|
||||
@@ -2,15 +2,9 @@ dnl #
|
||||
dnl # Handle differences in kernel FPU code.
|
||||
dnl #
|
||||
dnl # Kernel
|
||||
-dnl # 5.2: The fpu->initialized flag was replaced by TIF_NEED_FPU_LOAD.
|
||||
-dnl # HAVE_KERNEL_TIF_NEED_FPU_LOAD
|
||||
-dnl #
|
||||
-dnl # 5.0: As an optimization SIMD operations performed by kernel
|
||||
-dnl # threads can skip saving and restoring their FPU context.
|
||||
-dnl # Wrappers have been introduced to determine the running
|
||||
-dnl # context and use either the SIMD or generic implementation.
|
||||
+dnl # 5.0: Wrappers have been introduced to save/restore the FPU state.
|
||||
dnl # This change was made to the 4.19.38 and 4.14.120 LTS kernels.
|
||||
-dnl # HAVE_KERNEL_FPU_INITIALIZED
|
||||
+dnl # HAVE_KERNEL_FPU_INTERNAL
|
||||
dnl #
|
||||
dnl # 4.2: Use __kernel_fpu_{begin,end}()
|
||||
dnl # HAVE_UNDERSCORE_KERNEL_FPU & KERNEL_EXPORTS_X86_FPU
|
||||
@@ -61,22 +55,47 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_FPU], [
|
||||
__kernel_fpu_end();
|
||||
], [], [$ZFS_META_LICENSE])
|
||||
|
||||
- ZFS_LINUX_TEST_SRC([fpu_initialized], [
|
||||
- #include <linux/module.h>
|
||||
+ ZFS_LINUX_TEST_SRC([fpu_internal], [
|
||||
+ #if defined(__x86_64) || defined(__x86_64__) || \
|
||||
+ defined(__i386) || defined(__i386__)
|
||||
+ #if !defined(__x86)
|
||||
+ #define __x86
|
||||
+ #endif
|
||||
+ #endif
|
||||
+
|
||||
+ #if !defined(__x86)
|
||||
+ #error Unsupported architecture
|
||||
+ #endif
|
||||
+
|
||||
#include <linux/sched.h>
|
||||
- ],[
|
||||
- struct fpu *fpu = ¤t->thread.fpu;
|
||||
- if (fpu->initialized) { return (0); };
|
||||
- ])
|
||||
|
||||
- ZFS_LINUX_TEST_SRC([tif_need_fpu_load], [
|
||||
- #include <linux/module.h>
|
||||
- #include <asm/thread_info.h>
|
||||
+ #if !defined(PF_KTHREAD)
|
||||
+ #error PF_KTHREAD not defined
|
||||
+ #endif
|
||||
|
||||
- #if !defined(TIF_NEED_FPU_LOAD)
|
||||
- #error "TIF_NEED_FPU_LOAD undefined"
|
||||
+ #ifdef HAVE_KERNEL_FPU_API_HEADER
|
||||
+ #include <asm/fpu/api.h>
|
||||
+ #include <asm/fpu/internal.h>
|
||||
+ #else
|
||||
+ #include <asm/i387.h>
|
||||
+ #include <asm/xcr.h>
|
||||
+ #endif
|
||||
+
|
||||
+ #if !defined(XSTATE_XSAVE)
|
||||
+ #error XSTATE_XSAVE not defined
|
||||
+ #endif
|
||||
+
|
||||
+ #if !defined(XSTATE_XRESTORE)
|
||||
+ #error XSTATE_XRESTORE not defined
|
||||
#endif
|
||||
- ],[])
|
||||
+ ],[
|
||||
+ struct fpu *fpu = ¤t->thread.fpu;
|
||||
+ union fpregs_state *st = &fpu->state;
|
||||
+ struct fregs_state *fr __attribute__ ((unused)) = &st->fsave;
|
||||
+ struct fxregs_state *fxr __attribute__ ((unused)) = &st->fxsave;
|
||||
+ struct xregs_state *xr __attribute__ ((unused)) = &st->xsave;
|
||||
+ fpu->last_cpu = -1;
|
||||
+ ])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_FPU], [
|
||||
@@ -104,25 +123,12 @@ AC_DEFUN([ZFS_AC_KERNEL_FPU], [
|
||||
AC_DEFINE(KERNEL_EXPORTS_X86_FPU, 1,
|
||||
[kernel exports FPU functions])
|
||||
],[
|
||||
- dnl #
|
||||
- dnl # Linux 5.0 kernel
|
||||
- dnl #
|
||||
- ZFS_LINUX_TEST_RESULT([fpu_initialized], [
|
||||
- AC_MSG_RESULT(fpu.initialized)
|
||||
- AC_DEFINE(HAVE_KERNEL_FPU_INITIALIZED, 1,
|
||||
- [kernel fpu.initialized exists])
|
||||
+ ZFS_LINUX_TEST_RESULT([fpu_internal], [
|
||||
+ AC_MSG_RESULT(internal)
|
||||
+ AC_DEFINE(HAVE_KERNEL_FPU_INTERNAL, 1,
|
||||
+ [kernel fpu internal])
|
||||
],[
|
||||
- dnl #
|
||||
- dnl # Linux 5.2 kernel
|
||||
- dnl #
|
||||
- ZFS_LINUX_TEST_RESULT([tif_need_fpu_load], [
|
||||
- AC_MSG_RESULT(TIF_NEED_FPU_LOAD)
|
||||
- AC_DEFINE(
|
||||
- HAVE_KERNEL_TIF_NEED_FPU_LOAD, 1,
|
||||
- [kernel TIF_NEED_FPU_LOAD exists])
|
||||
- ],[
|
||||
- AC_MSG_RESULT(unavailable)
|
||||
- ])
|
||||
+ AC_MSG_RESULT(unavailable)
|
||||
])
|
||||
])
|
||||
])
|
||||
diff --git a/include/linux/simd_x86.h b/include/linux/simd_x86.h
|
||||
index 641f43955..d1ded3af2 100644
|
||||
--- a/include/linux/simd_x86.h
|
||||
+++ b/include/linux/simd_x86.h
|
||||
@@ -126,38 +126,68 @@
|
||||
#endif
|
||||
|
||||
#else /* defined(KERNEL_EXPORTS_X86_FPU) */
|
||||
+
|
||||
/*
|
||||
* When the kernel_fpu_* symbols are unavailable then provide our own
|
||||
* versions which allow the FPU to be safely used in kernel threads.
|
||||
* In practice, this is not a significant restriction for ZFS since the
|
||||
* vast majority of SIMD operations are performed by the IO pipeline.
|
||||
*/
|
||||
+#if defined(HAVE_KERNEL_FPU_INTERNAL)
|
||||
|
||||
/*
|
||||
- * Returns non-zero if FPU operations are allowed in the current context.
|
||||
+ * FPU usage only allowed in dedicated kernel threads.
|
||||
*/
|
||||
-#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD)
|
||||
-#define kfpu_allowed() ((current->flags & PF_KTHREAD) && \
|
||||
- test_thread_flag(TIF_NEED_FPU_LOAD))
|
||||
-#elif defined(HAVE_KERNEL_FPU_INITIALIZED)
|
||||
-#define kfpu_allowed() ((current->flags & PF_KTHREAD) && \
|
||||
- current->thread.fpu.initialized)
|
||||
-#else
|
||||
-#define kfpu_allowed() 0
|
||||
-#endif
|
||||
+#define kfpu_allowed() (current->flags & PF_KTHREAD)
|
||||
+#define ex_handler_fprestore ex_handler_default
|
||||
+
|
||||
+/*
|
||||
+ * FPU save and restore instructions.
|
||||
+ */
|
||||
+#define __asm __asm__ __volatile__
|
||||
+#define kfpu_fxsave(addr) __asm("fxsave %0" : "=m" (*(addr)))
|
||||
+#define kfpu_fxsaveq(addr) __asm("fxsaveq %0" : "=m" (*(addr)))
|
||||
+#define kfpu_fnsave(addr) __asm("fnsave %0; fwait" : "=m" (*(addr)))
|
||||
+#define kfpu_fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr)))
|
||||
+#define kfpu_fxrstorq(addr) __asm("fxrstorq %0" : : "m" (*(addr)))
|
||||
+#define kfpu_frstor(addr) __asm("frstor %0" : : "m" (*(addr)))
|
||||
+#define kfpu_fxsr_clean(rval) __asm("fnclex; emms; fildl %P[addr]" \
|
||||
+ : : [addr] "m" (rval));
|
||||
|
||||
static inline void
|
||||
kfpu_initialize(void)
|
||||
{
|
||||
WARN_ON_ONCE(!(current->flags & PF_KTHREAD));
|
||||
|
||||
-#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD)
|
||||
- __fpu_invalidate_fpregs_state(¤t->thread.fpu);
|
||||
- set_thread_flag(TIF_NEED_FPU_LOAD);
|
||||
-#elif defined(HAVE_KERNEL_FPU_INITIALIZED)
|
||||
- __fpu_invalidate_fpregs_state(¤t->thread.fpu);
|
||||
- current->thread.fpu.initialized = 1;
|
||||
-#endif
|
||||
+ /* Invalidate the task's FPU state */
|
||||
+ current->thread.fpu.last_cpu = -1;
|
||||
+}
|
||||
+
|
||||
+static inline void
|
||||
+kfpu_save_xsave(struct xregs_state *addr, uint64_t mask)
|
||||
+{
|
||||
+ uint32_t low, hi;
|
||||
+ int err;
|
||||
+
|
||||
+ low = mask;
|
||||
+ hi = mask >> 32;
|
||||
+ XSTATE_XSAVE(addr, low, hi, err);
|
||||
+ WARN_ON_ONCE(err);
|
||||
+}
|
||||
+
|
||||
+static inline void
|
||||
+kfpu_save_fxsr(struct fxregs_state *addr)
|
||||
+{
|
||||
+ if (IS_ENABLED(CONFIG_X86_32))
|
||||
+ kfpu_fxsave(addr);
|
||||
+ else
|
||||
+ kfpu_fxsaveq(addr);
|
||||
+}
|
||||
+
|
||||
+static inline void
|
||||
+kfpu_save_fsave(struct fregs_state *addr)
|
||||
+{
|
||||
+ kfpu_fnsave(addr);
|
||||
}
|
||||
|
||||
static inline void
|
||||
@@ -172,46 +202,86 @@ kfpu_begin(void)
|
||||
preempt_disable();
|
||||
local_irq_disable();
|
||||
|
||||
-#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD)
|
||||
/*
|
||||
* The current FPU registers need to be preserved by kfpu_begin()
|
||||
- * and restored by kfpu_end(). This is required because we can
|
||||
- * not call __cpu_invalidate_fpregs_state() to invalidate the
|
||||
- * per-cpu FPU state and force them to be restored during a
|
||||
- * context switch.
|
||||
+ * and restored by kfpu_end(). This is always required because we
|
||||
+ * can not call __cpu_invalidate_fpregs_state() to invalidate the
|
||||
+ * per-cpu FPU state and force them to be restored. Furthermore,
|
||||
+ * this implementation relies on the space provided in the task
|
||||
+ * structure to store the user FPU state. As such, it can only
|
||||
+ * be used with dedicated kernels which by definition will never
|
||||
+ * store user FPU state.
|
||||
*/
|
||||
- copy_fpregs_to_fpstate(¤t->thread.fpu);
|
||||
-#elif defined(HAVE_KERNEL_FPU_INITIALIZED)
|
||||
+ if (static_cpu_has(X86_FEATURE_XSAVE)) {
|
||||
+ kfpu_save_xsave(¤t->thread.fpu.state.xsave, ~0);
|
||||
+ } else if (static_cpu_has(X86_FEATURE_FXSR)) {
|
||||
+ kfpu_save_fxsr(¤t->thread.fpu.state.fxsave);
|
||||
+ } else {
|
||||
+ kfpu_save_fsave(¤t->thread.fpu.state.fsave);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static inline void
|
||||
+kfpu_restore_xsave(struct xregs_state *addr, uint64_t mask)
|
||||
+{
|
||||
+ uint32_t low, hi;
|
||||
+
|
||||
+ low = mask;
|
||||
+ hi = mask >> 32;
|
||||
+ XSTATE_XRESTORE(addr, low, hi);
|
||||
+}
|
||||
+
|
||||
+static inline void
|
||||
+kfpu_restore_fxsr(struct fxregs_state *addr)
|
||||
+{
|
||||
/*
|
||||
- * There is no need to preserve and restore the FPU registers.
|
||||
- * They will always be restored from the task's stored FPU state
|
||||
- * when switching contexts.
|
||||
+ * On AuthenticAMD K7 and K8 processors the fxrstor instruction only
|
||||
+ * restores the _x87 FOP, FIP, and FDP registers when an exception
|
||||
+ * is pending. Clean the _x87 state to force the restore.
|
||||
*/
|
||||
- WARN_ON_ONCE(current->thread.fpu.initialized == 0);
|
||||
-#endif
|
||||
+ if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK)))
|
||||
+ kfpu_fxsr_clean(addr);
|
||||
+
|
||||
+ if (IS_ENABLED(CONFIG_X86_32)) {
|
||||
+ kfpu_fxrstor(addr);
|
||||
+ } else {
|
||||
+ kfpu_fxrstorq(addr);
|
||||
+ }
|
||||
}
|
||||
|
||||
static inline void
|
||||
-kfpu_end(void)
|
||||
+kfpu_restore_fsave(struct fregs_state *addr)
|
||||
{
|
||||
-#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD)
|
||||
- union fpregs_state *state = ¤t->thread.fpu.state;
|
||||
- int error;
|
||||
+ kfpu_frstor(addr);
|
||||
+}
|
||||
|
||||
- if (use_xsave()) {
|
||||
- error = copy_kernel_to_xregs_err(&state->xsave, -1);
|
||||
- } else if (use_fxsr()) {
|
||||
- error = copy_kernel_to_fxregs_err(&state->fxsave);
|
||||
+static inline void
|
||||
+kfpu_end(void)
|
||||
+{
|
||||
+ if (static_cpu_has(X86_FEATURE_XSAVE)) {
|
||||
+ kfpu_restore_xsave(¤t->thread.fpu.state.xsave, ~0);
|
||||
+ } else if (static_cpu_has(X86_FEATURE_FXSR)) {
|
||||
+ kfpu_restore_fxsr(¤t->thread.fpu.state.fxsave);
|
||||
} else {
|
||||
- error = copy_kernel_to_fregs_err(&state->fsave);
|
||||
+ kfpu_restore_fsave(¤t->thread.fpu.state.fsave);
|
||||
}
|
||||
- WARN_ON_ONCE(error);
|
||||
-#endif
|
||||
|
||||
local_irq_enable();
|
||||
preempt_enable();
|
||||
}
|
||||
-#endif /* defined(HAVE_KERNEL_FPU) */
|
||||
+
|
||||
+#else
|
||||
+
|
||||
+/*
|
||||
+ * FPU support is unavailable.
|
||||
+ */
|
||||
+#define kfpu_allowed() 0
|
||||
+#define kfpu_initialize(tsk) do {} while (0)
|
||||
+#define kfpu_begin() do {} while (0)
|
||||
+#define kfpu_end() do {} while (0)
|
||||
+
|
||||
+#endif /* defined(HAVE_KERNEL_FPU_INTERNAL) */
|
||||
+#endif /* defined(KERNEL_EXPORTS_X86_FPU) */
|
||||
|
||||
#else /* defined(_KERNEL) */
|
||||
/*
|
@ -1,59 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
||||
Date: Wed, 25 Sep 2019 10:48:48 +0200
|
||||
Subject: [PATCH] [SIMD]: FPU register save/restore is also required on 5.0
|
||||
kernels
|
||||
|
||||
NOTE: the kernel needs to have the copy_kernel_to_xregs_err,
|
||||
copy_kernel_to_fxregs_err and copy_kernel_to_fregs_err functions
|
||||
backported for this to work.
|
||||
|
||||
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
||||
---
|
||||
include/linux/simd_x86.h | 11 ++++-------
|
||||
1 file changed, 4 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/include/linux/simd_x86.h b/include/linux/simd_x86.h
|
||||
index edd456098..98503a29e 100644
|
||||
--- a/include/linux/simd_x86.h
|
||||
+++ b/include/linux/simd_x86.h
|
||||
@@ -181,7 +181,6 @@ kfpu_begin(void)
|
||||
preempt_disable();
|
||||
local_irq_disable();
|
||||
|
||||
-#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD)
|
||||
/*
|
||||
* The current FPU registers need to be preserved by kfpu_begin()
|
||||
* and restored by kfpu_end(). This is required because we can
|
||||
@@ -190,11 +189,11 @@ kfpu_begin(void)
|
||||
* context switch.
|
||||
*/
|
||||
copy_fpregs_to_fpstate(¤t->thread.fpu);
|
||||
-#elif defined(HAVE_KERNEL_FPU_INITIALIZED)
|
||||
+
|
||||
+
|
||||
+#if defined(HAVE_KERNEL_FPU_INITIALIZED)
|
||||
/*
|
||||
- * There is no need to preserve and restore the FPU registers.
|
||||
- * They will always be restored from the task's stored FPU state
|
||||
- * when switching contexts.
|
||||
+ * Was removed with 5.2 as it was always set to 1 there
|
||||
*/
|
||||
WARN_ON_ONCE(current->thread.fpu.initialized == 0);
|
||||
#endif
|
||||
@@ -203,7 +202,6 @@ kfpu_begin(void)
|
||||
static inline void
|
||||
kfpu_end(void)
|
||||
{
|
||||
-#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD)
|
||||
union fpregs_state *state = ¤t->thread.fpu.state;
|
||||
int error;
|
||||
|
||||
@@ -215,7 +213,6 @@ kfpu_end(void)
|
||||
error = copy_kernel_to_fregs_err(&state->fsave);
|
||||
}
|
||||
WARN_ON_ONCE(error);
|
||||
-#endif
|
||||
|
||||
local_irq_enable();
|
||||
preempt_enable();
|
@ -2,9 +2,6 @@ From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Brian Behlendorf <behlendorf1@llnl.gov>
|
||||
Date: Wed, 17 Jul 2019 09:14:36 -0700
|
||||
Subject: [PATCH] Fix CONFIG_X86_DEBUG_FPU build failure
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
When CONFIG_X86_DEBUG_FPU is defined the alternatives_patched symbol
|
||||
is pulled in as a dependency which results in a build failure. To
|
||||
@ -16,15 +13,13 @@ Reviewed-by: Tony Hutter <hutter2@llnl.gov>
|
||||
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
|
||||
Closes #9041
|
||||
Closes #9049
|
||||
(cherry picked from commit 095b5412b31c07cad5cec74a4eb5ace011c92b27)
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
||||
---
|
||||
include/linux/simd_x86.h | 9 +++++++++
|
||||
1 file changed, 9 insertions(+)
|
||||
|
||||
diff --git a/include/linux/simd_x86.h b/include/linux/simd_x86.h
|
||||
index 641f43955..edd456098 100644
|
||||
index d1ded3af2..b32bccc41 100644
|
||||
--- a/include/linux/simd_x86.h
|
||||
+++ b/include/linux/simd_x86.h
|
||||
@@ -84,6 +84,15 @@
|
2433
debian/patches/0010-OpenZFS-restructuring-move-platform-specific-headers.patch
vendored
Normal file
2433
debian/patches/0010-OpenZFS-restructuring-move-platform-specific-headers.patch
vendored
Normal file
File diff suppressed because it is too large
Load Diff
327
debian/patches/0011-Allow-FPU-usage-in-user-and-kernel-thread-contexts.patch
vendored
Normal file
327
debian/patches/0011-Allow-FPU-usage-in-user-and-kernel-thread-contexts.patch
vendored
Normal file
@ -0,0 +1,327 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Brian Behlendorf <behlendorf1@llnl.gov>
|
||||
Date: Wed, 9 Oct 2019 22:36:19 +0000
|
||||
Subject: [PATCH] Allow FPU usage in user and kernel thread contexts
|
||||
|
||||
Even for dedicated kernel threads we apparently cannot be
|
||||
guaranteed that the kernel won't modify the FPU state which
|
||||
we saved in the task struck. Allocate our own per-cpu state
|
||||
to preserve the saved register state. Aside from correctness,
|
||||
this allows use of the FPU in user threads again.
|
||||
|
||||
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
|
||||
(cherry picked from commit 11170d9073edcbb613f5a4c992293cbb4e3c8e31)
|
||||
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
||||
---
|
||||
config/kernel-fpu.m4 | 7 --
|
||||
include/os/linux/kernel/linux/simd.h | 1 -
|
||||
include/os/linux/kernel/linux/simd_aarch64.h | 2 -
|
||||
include/os/linux/kernel/linux/simd_x86.h | 87 +++++++++++++-------
|
||||
module/spl/spl-taskq.c | 2 -
|
||||
module/spl/spl-thread.c | 2 -
|
||||
module/zcommon/zfs_prop.c | 18 ++++
|
||||
7 files changed, 73 insertions(+), 46 deletions(-)
|
||||
|
||||
diff --git a/config/kernel-fpu.m4 b/config/kernel-fpu.m4
|
||||
index 9ed9b14ad..15bea3c22 100644
|
||||
--- a/config/kernel-fpu.m4
|
||||
+++ b/config/kernel-fpu.m4
|
||||
@@ -67,12 +67,6 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_FPU], [
|
||||
#error Unsupported architecture
|
||||
#endif
|
||||
|
||||
- #include <linux/sched.h>
|
||||
-
|
||||
- #if !defined(PF_KTHREAD)
|
||||
- #error PF_KTHREAD not defined
|
||||
- #endif
|
||||
-
|
||||
#ifdef HAVE_KERNEL_FPU_API_HEADER
|
||||
#include <asm/fpu/api.h>
|
||||
#include <asm/fpu/internal.h>
|
||||
@@ -94,7 +88,6 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_FPU], [
|
||||
struct fregs_state *fr __attribute__ ((unused)) = &st->fsave;
|
||||
struct fxregs_state *fxr __attribute__ ((unused)) = &st->fxsave;
|
||||
struct xregs_state *xr __attribute__ ((unused)) = &st->xsave;
|
||||
- fpu->last_cpu = -1;
|
||||
])
|
||||
])
|
||||
|
||||
diff --git a/include/os/linux/kernel/linux/simd.h b/include/os/linux/kernel/linux/simd.h
|
||||
index 1f6574a90..5138a908b 100644
|
||||
--- a/include/os/linux/kernel/linux/simd.h
|
||||
+++ b/include/os/linux/kernel/linux/simd.h
|
||||
@@ -33,7 +33,6 @@
|
||||
#else
|
||||
|
||||
#define kfpu_allowed() 0
|
||||
-#define kfpu_initialize(tsk) do {} while (0)
|
||||
#define kfpu_begin() do {} while (0)
|
||||
#define kfpu_end() do {} while (0)
|
||||
|
||||
diff --git a/include/os/linux/kernel/linux/simd_aarch64.h b/include/os/linux/kernel/linux/simd_aarch64.h
|
||||
index ac530d920..f6cf3c377 100644
|
||||
--- a/include/os/linux/kernel/linux/simd_aarch64.h
|
||||
+++ b/include/os/linux/kernel/linux/simd_aarch64.h
|
||||
@@ -27,7 +27,6 @@
|
||||
*
|
||||
* Kernel fpu methods:
|
||||
* kfpu_allowed()
|
||||
- * kfpu_initialize()
|
||||
* kfpu_begin()
|
||||
* kfpu_end()
|
||||
*/
|
||||
@@ -43,7 +42,6 @@
|
||||
#include <asm/neon.h>
|
||||
|
||||
#define kfpu_allowed() 1
|
||||
-#define kfpu_initialize(tsk) do {} while (0)
|
||||
#define kfpu_begin() kernel_neon_begin()
|
||||
#define kfpu_end() kernel_neon_end()
|
||||
|
||||
diff --git a/include/os/linux/kernel/linux/simd_x86.h b/include/os/linux/kernel/linux/simd_x86.h
|
||||
index 486e31845..c42ea918e 100644
|
||||
--- a/include/os/linux/kernel/linux/simd_x86.h
|
||||
+++ b/include/os/linux/kernel/linux/simd_x86.h
|
||||
@@ -27,7 +27,6 @@
|
||||
*
|
||||
* Kernel fpu methods:
|
||||
* kfpu_allowed()
|
||||
- * kfpu_initialize()
|
||||
* kfpu_begin()
|
||||
* kfpu_end()
|
||||
*
|
||||
@@ -99,7 +98,6 @@
|
||||
#if defined(KERNEL_EXPORTS_X86_FPU)
|
||||
|
||||
#define kfpu_allowed() 1
|
||||
-#define kfpu_initialize(tsk) do {} while (0)
|
||||
|
||||
#if defined(HAVE_UNDERSCORE_KERNEL_FPU)
|
||||
#define kfpu_begin() \
|
||||
@@ -129,16 +127,52 @@
|
||||
|
||||
/*
|
||||
* When the kernel_fpu_* symbols are unavailable then provide our own
|
||||
- * versions which allow the FPU to be safely used in kernel threads.
|
||||
- * In practice, this is not a significant restriction for ZFS since the
|
||||
- * vast majority of SIMD operations are performed by the IO pipeline.
|
||||
+ * versions which allow the FPU to be safely used.
|
||||
*/
|
||||
#if defined(HAVE_KERNEL_FPU_INTERNAL)
|
||||
|
||||
+extern struct fpu **zfs_kfpu_fpregs;
|
||||
+
|
||||
/*
|
||||
- * FPU usage only allowed in dedicated kernel threads.
|
||||
+ * Initialize per-cpu variables to store FPU state.
|
||||
*/
|
||||
-#define kfpu_allowed() (current->flags & PF_KTHREAD)
|
||||
+static inline void
|
||||
+kfpu_fini(void)
|
||||
+{
|
||||
+ int cpu;
|
||||
+
|
||||
+ for_each_possible_cpu(cpu) {
|
||||
+ if (zfs_kfpu_fpregs[cpu] != NULL) {
|
||||
+ kfree(zfs_kfpu_fpregs[cpu]);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ kfree(zfs_kfpu_fpregs);
|
||||
+}
|
||||
+
|
||||
+static inline int
|
||||
+kfpu_init(void)
|
||||
+{
|
||||
+ int cpu;
|
||||
+
|
||||
+ zfs_kfpu_fpregs = kzalloc(num_possible_cpus() *
|
||||
+ sizeof (struct fpu *), GFP_KERNEL);
|
||||
+ if (zfs_kfpu_fpregs == NULL)
|
||||
+ return (ENOMEM);
|
||||
+
|
||||
+ for_each_possible_cpu(cpu) {
|
||||
+ zfs_kfpu_fpregs[cpu] = kmalloc_node(sizeof (struct fpu),
|
||||
+ GFP_KERNEL, cpu_to_node(cpu));
|
||||
+ if (zfs_kfpu_fpregs[cpu] == NULL) {
|
||||
+ kfpu_fini();
|
||||
+ return (ENOMEM);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return (0);
|
||||
+}
|
||||
+
|
||||
+#define kfpu_allowed() 1
|
||||
#define ex_handler_fprestore ex_handler_default
|
||||
|
||||
/*
|
||||
@@ -154,15 +188,6 @@
|
||||
#define kfpu_fxsr_clean(rval) __asm("fnclex; emms; fildl %P[addr]" \
|
||||
: : [addr] "m" (rval));
|
||||
|
||||
-static inline void
|
||||
-kfpu_initialize(void)
|
||||
-{
|
||||
- WARN_ON_ONCE(!(current->flags & PF_KTHREAD));
|
||||
-
|
||||
- /* Invalidate the task's FPU state */
|
||||
- current->thread.fpu.last_cpu = -1;
|
||||
-}
|
||||
-
|
||||
static inline void
|
||||
kfpu_save_xsave(struct xregs_state *addr, uint64_t mask)
|
||||
{
|
||||
@@ -193,8 +218,6 @@ kfpu_save_fsave(struct fregs_state *addr)
|
||||
static inline void
|
||||
kfpu_begin(void)
|
||||
{
|
||||
- WARN_ON_ONCE(!kfpu_allowed());
|
||||
-
|
||||
/*
|
||||
* Preemption and interrupts must be disabled for the critical
|
||||
* region where the FPU state is being modified.
|
||||
@@ -204,20 +227,18 @@ kfpu_begin(void)
|
||||
|
||||
/*
|
||||
* The current FPU registers need to be preserved by kfpu_begin()
|
||||
- * and restored by kfpu_end(). This is always required because we
|
||||
- * can not call __cpu_invalidate_fpregs_state() to invalidate the
|
||||
- * per-cpu FPU state and force them to be restored. Furthermore,
|
||||
- * this implementation relies on the space provided in the task
|
||||
- * structure to store the user FPU state. As such, it can only
|
||||
- * be used with dedicated kernels which by definition will never
|
||||
- * store user FPU state.
|
||||
+ * and restored by kfpu_end(). They are stored in a dedicated
|
||||
+ * per-cpu variable, not in the task struct, this allows any user
|
||||
+ * FPU state to be correctly preserved and restored.
|
||||
*/
|
||||
+ struct fpu *fpu = zfs_kfpu_fpregs[smp_processor_id()];
|
||||
+
|
||||
if (static_cpu_has(X86_FEATURE_XSAVE)) {
|
||||
- kfpu_save_xsave(¤t->thread.fpu.state.xsave, ~0);
|
||||
+ kfpu_save_xsave(&fpu->state.xsave, ~0);
|
||||
} else if (static_cpu_has(X86_FEATURE_FXSR)) {
|
||||
- kfpu_save_fxsr(¤t->thread.fpu.state.fxsave);
|
||||
+ kfpu_save_fxsr(&fpu->state.fxsave);
|
||||
} else {
|
||||
- kfpu_save_fsave(¤t->thread.fpu.state.fsave);
|
||||
+ kfpu_save_fsave(&fpu->state.fsave);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -258,12 +279,14 @@ kfpu_restore_fsave(struct fregs_state *addr)
|
||||
static inline void
|
||||
kfpu_end(void)
|
||||
{
|
||||
+ struct fpu *fpu = zfs_kfpu_fpregs[smp_processor_id()];
|
||||
+
|
||||
if (static_cpu_has(X86_FEATURE_XSAVE)) {
|
||||
- kfpu_restore_xsave(¤t->thread.fpu.state.xsave, ~0);
|
||||
+ kfpu_restore_xsave(&fpu->state.xsave, ~0);
|
||||
} else if (static_cpu_has(X86_FEATURE_FXSR)) {
|
||||
- kfpu_restore_fxsr(¤t->thread.fpu.state.fxsave);
|
||||
+ kfpu_restore_fxsr(&fpu->state.fxsave);
|
||||
} else {
|
||||
- kfpu_restore_fsave(¤t->thread.fpu.state.fsave);
|
||||
+ kfpu_restore_fsave(&fpu->state.fsave);
|
||||
}
|
||||
|
||||
local_irq_enable();
|
||||
@@ -276,7 +299,6 @@ kfpu_end(void)
|
||||
* FPU support is unavailable.
|
||||
*/
|
||||
#define kfpu_allowed() 0
|
||||
-#define kfpu_initialize(tsk) do {} while (0)
|
||||
#define kfpu_begin() do {} while (0)
|
||||
#define kfpu_end() do {} while (0)
|
||||
|
||||
@@ -286,6 +308,7 @@ kfpu_end(void)
|
||||
/*
|
||||
* Linux kernel provides an interface for CPU feature testing.
|
||||
*/
|
||||
+
|
||||
/*
|
||||
* Detect register set support
|
||||
*/
|
||||
diff --git a/module/spl/spl-taskq.c b/module/spl/spl-taskq.c
|
||||
index 90e1d0a4d..a39f94e4c 100644
|
||||
--- a/module/spl/spl-taskq.c
|
||||
+++ b/module/spl/spl-taskq.c
|
||||
@@ -28,7 +28,6 @@
|
||||
#include <sys/taskq.h>
|
||||
#include <sys/kmem.h>
|
||||
#include <sys/tsd.h>
|
||||
-#include <sys/simd.h>
|
||||
|
||||
int spl_taskq_thread_bind = 0;
|
||||
module_param(spl_taskq_thread_bind, int, 0644);
|
||||
@@ -854,7 +853,6 @@ taskq_thread(void *args)
|
||||
sigfillset(&blocked);
|
||||
sigprocmask(SIG_BLOCK, &blocked, NULL);
|
||||
flush_signals(current);
|
||||
- kfpu_initialize();
|
||||
|
||||
tsd_set(taskq_tsd, tq);
|
||||
spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
|
||||
diff --git a/module/spl/spl-thread.c b/module/spl/spl-thread.c
|
||||
index 29de9252a..0352a31ea 100644
|
||||
--- a/module/spl/spl-thread.c
|
||||
+++ b/module/spl/spl-thread.c
|
||||
@@ -27,7 +27,6 @@
|
||||
#include <sys/thread.h>
|
||||
#include <sys/kmem.h>
|
||||
#include <sys/tsd.h>
|
||||
-#include <sys/simd.h>
|
||||
|
||||
/*
|
||||
* Thread interfaces
|
||||
@@ -55,7 +54,6 @@ thread_generic_wrapper(void *arg)
|
||||
args = tp->tp_args;
|
||||
set_current_state(tp->tp_state);
|
||||
set_user_nice((kthread_t *)current, PRIO_TO_NICE(tp->tp_pri));
|
||||
- kfpu_initialize();
|
||||
kmem_free(tp->tp_name, tp->tp_name_size);
|
||||
kmem_free(tp, sizeof (thread_priv_t));
|
||||
|
||||
diff --git a/module/zcommon/zfs_prop.c b/module/zcommon/zfs_prop.c
|
||||
index dab749138..b5fa1c2f4 100644
|
||||
--- a/module/zcommon/zfs_prop.c
|
||||
+++ b/module/zcommon/zfs_prop.c
|
||||
@@ -853,10 +853,27 @@ zfs_prop_align_right(zfs_prop_t prop)
|
||||
#endif
|
||||
|
||||
#if defined(_KERNEL)
|
||||
+
|
||||
+#if defined(HAVE_KERNEL_FPU_INTERNAL)
|
||||
+#include <linux/simd.h>
|
||||
+
|
||||
+struct fpu **zfs_kfpu_fpregs;
|
||||
+EXPORT_SYMBOL(zfs_kfpu_fpregs);
|
||||
+
|
||||
+#else
|
||||
+#define kfpu_init() 0
|
||||
+#define kfpu_fini() ((void) 0)
|
||||
+#endif /* HAVE_KERNEL_FPU_INTERNAL */
|
||||
+
|
||||
static int __init
|
||||
zcommon_init(void)
|
||||
{
|
||||
+ int error = kfpu_init();
|
||||
+ if (error)
|
||||
+ return (-error);
|
||||
+
|
||||
fletcher_4_init();
|
||||
+
|
||||
return (0);
|
||||
}
|
||||
|
||||
@@ -864,6 +881,7 @@ static void __exit
|
||||
zcommon_fini(void)
|
||||
{
|
||||
fletcher_4_fini();
|
||||
+ kfpu_fini();
|
||||
}
|
||||
|
||||
module_init(zcommon_init);
|
8
debian/patches/series
vendored
8
debian/patches/series
vendored
@ -4,6 +4,8 @@
|
||||
0004-import-with-d-dev-disk-by-id-in-scan-service.patch
|
||||
0005-Enable-zed-emails.patch
|
||||
0006-Linux-5.0-compat-SIMD-compatibility.patch
|
||||
0007-Fix-CONFIG_X86_DEBUG_FPU-build-failure.patch
|
||||
0008-SIMD-FPU-register-save-restore-is-also-required-on-5.patch
|
||||
0009-Perform-KABI-checks-in-parallel.patch
|
||||
0007-Perform-KABI-checks-in-parallel.patch
|
||||
0008-Linux-4.14-4.19-5.0-compat-SIMD-save-restore.patch
|
||||
0009-Fix-CONFIG_X86_DEBUG_FPU-build-failure.patch
|
||||
0010-OpenZFS-restructuring-move-platform-specific-headers.patch
|
||||
0011-Allow-FPU-usage-in-user-and-kernel-thread-contexts.patch
|
||||
|
Loading…
Reference in New Issue
Block a user