FPU/SIMD: separate ZFS and Kernel approach for less interference

github pull-request #9406[0].

[0]: https://github.com/zfsonlinux/zfs/pull/9406#event-2704412478

Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
This commit is contained in:
Thomas Lamprecht 2019-10-18 20:03:00 +02:00
parent 45fdd1f323
commit 8f11c72175
8 changed files with 3099 additions and 73 deletions

View File

@ -2,9 +2,6 @@ From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Brian Behlendorf <behlendorf1@llnl.gov>
Date: Fri, 12 Jul 2019 09:31:20 -0700
Subject: [PATCH] Linux 5.0 compat: SIMD compatibility
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Restore the SIMD optimization for 4.19.38 LTS, 4.14.120 LTS,
and 5.0 and newer kernels. This is accomplished by leveraging
@ -59,8 +56,6 @@ Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #8754
Closes #8793
Closes #8965
(cherry picked from commit e5db31349484e5e859c7a942eb15b98d68ce5b4d)
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
cmd/ztest/ztest.c | 3 +

View File

@ -0,0 +1,333 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Brian Behlendorf <behlendorf1@llnl.gov>
Date: Thu, 3 Oct 2019 00:03:20 +0000
Subject: [PATCH] Linux 4.14, 4.19, 5.0+ compat: SIMD save/restore
Contrary to initial testing we cannot rely on these kernels to
invalidate the per-cpu FPU state and restore the FPU registers.
Therefore, the kfpu_begin() and kfpu_end() functions have been
updated to unconditionally save and restore the FPU state.
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Issue #9346
(cherry picked from commit 813fd014a90229127f80b970a8fef5049fd4c713)
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
config/kernel-fpu.m4 | 82 +++++++++++----------
include/linux/simd_x86.h | 152 ++++++++++++++++++++++++++++-----------
2 files changed, 155 insertions(+), 79 deletions(-)
diff --git a/config/kernel-fpu.m4 b/config/kernel-fpu.m4
index a2c47d65a..9ed9b14ad 100644
--- a/config/kernel-fpu.m4
+++ b/config/kernel-fpu.m4
@@ -2,15 +2,9 @@ dnl #
dnl # Handle differences in kernel FPU code.
dnl #
dnl # Kernel
-dnl # 5.2: The fpu->initialized flag was replaced by TIF_NEED_FPU_LOAD.
-dnl # HAVE_KERNEL_TIF_NEED_FPU_LOAD
-dnl #
-dnl # 5.0: As an optimization SIMD operations performed by kernel
-dnl # threads can skip saving and restoring their FPU context.
-dnl # Wrappers have been introduced to determine the running
-dnl # context and use either the SIMD or generic implementation.
+dnl # 5.0: Wrappers have been introduced to save/restore the FPU state.
dnl # This change was made to the 4.19.38 and 4.14.120 LTS kernels.
-dnl # HAVE_KERNEL_FPU_INITIALIZED
+dnl # HAVE_KERNEL_FPU_INTERNAL
dnl #
dnl # 4.2: Use __kernel_fpu_{begin,end}()
dnl # HAVE_UNDERSCORE_KERNEL_FPU & KERNEL_EXPORTS_X86_FPU
@@ -61,22 +55,47 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_FPU], [
__kernel_fpu_end();
], [], [$ZFS_META_LICENSE])
- ZFS_LINUX_TEST_SRC([fpu_initialized], [
- #include <linux/module.h>
+ ZFS_LINUX_TEST_SRC([fpu_internal], [
+ #if defined(__x86_64) || defined(__x86_64__) || \
+ defined(__i386) || defined(__i386__)
+ #if !defined(__x86)
+ #define __x86
+ #endif
+ #endif
+
+ #if !defined(__x86)
+ #error Unsupported architecture
+ #endif
+
#include <linux/sched.h>
- ],[
- struct fpu *fpu = &current->thread.fpu;
- if (fpu->initialized) { return (0); };
- ])
- ZFS_LINUX_TEST_SRC([tif_need_fpu_load], [
- #include <linux/module.h>
- #include <asm/thread_info.h>
+ #if !defined(PF_KTHREAD)
+ #error PF_KTHREAD not defined
+ #endif
- #if !defined(TIF_NEED_FPU_LOAD)
- #error "TIF_NEED_FPU_LOAD undefined"
+ #ifdef HAVE_KERNEL_FPU_API_HEADER
+ #include <asm/fpu/api.h>
+ #include <asm/fpu/internal.h>
+ #else
+ #include <asm/i387.h>
+ #include <asm/xcr.h>
+ #endif
+
+ #if !defined(XSTATE_XSAVE)
+ #error XSTATE_XSAVE not defined
+ #endif
+
+ #if !defined(XSTATE_XRESTORE)
+ #error XSTATE_XRESTORE not defined
#endif
- ],[])
+ ],[
+ struct fpu *fpu = &current->thread.fpu;
+ union fpregs_state *st = &fpu->state;
+ struct fregs_state *fr __attribute__ ((unused)) = &st->fsave;
+ struct fxregs_state *fxr __attribute__ ((unused)) = &st->fxsave;
+ struct xregs_state *xr __attribute__ ((unused)) = &st->xsave;
+ fpu->last_cpu = -1;
+ ])
])
AC_DEFUN([ZFS_AC_KERNEL_FPU], [
@@ -104,25 +123,12 @@ AC_DEFUN([ZFS_AC_KERNEL_FPU], [
AC_DEFINE(KERNEL_EXPORTS_X86_FPU, 1,
[kernel exports FPU functions])
],[
- dnl #
- dnl # Linux 5.0 kernel
- dnl #
- ZFS_LINUX_TEST_RESULT([fpu_initialized], [
- AC_MSG_RESULT(fpu.initialized)
- AC_DEFINE(HAVE_KERNEL_FPU_INITIALIZED, 1,
- [kernel fpu.initialized exists])
+ ZFS_LINUX_TEST_RESULT([fpu_internal], [
+ AC_MSG_RESULT(internal)
+ AC_DEFINE(HAVE_KERNEL_FPU_INTERNAL, 1,
+ [kernel fpu internal])
],[
- dnl #
- dnl # Linux 5.2 kernel
- dnl #
- ZFS_LINUX_TEST_RESULT([tif_need_fpu_load], [
- AC_MSG_RESULT(TIF_NEED_FPU_LOAD)
- AC_DEFINE(
- HAVE_KERNEL_TIF_NEED_FPU_LOAD, 1,
- [kernel TIF_NEED_FPU_LOAD exists])
- ],[
- AC_MSG_RESULT(unavailable)
- ])
+ AC_MSG_RESULT(unavailable)
])
])
])
diff --git a/include/linux/simd_x86.h b/include/linux/simd_x86.h
index 641f43955..d1ded3af2 100644
--- a/include/linux/simd_x86.h
+++ b/include/linux/simd_x86.h
@@ -126,38 +126,68 @@
#endif
#else /* defined(KERNEL_EXPORTS_X86_FPU) */
+
/*
* When the kernel_fpu_* symbols are unavailable then provide our own
* versions which allow the FPU to be safely used in kernel threads.
* In practice, this is not a significant restriction for ZFS since the
* vast majority of SIMD operations are performed by the IO pipeline.
*/
+#if defined(HAVE_KERNEL_FPU_INTERNAL)
/*
- * Returns non-zero if FPU operations are allowed in the current context.
+ * FPU usage only allowed in dedicated kernel threads.
*/
-#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD)
-#define kfpu_allowed() ((current->flags & PF_KTHREAD) && \
- test_thread_flag(TIF_NEED_FPU_LOAD))
-#elif defined(HAVE_KERNEL_FPU_INITIALIZED)
-#define kfpu_allowed() ((current->flags & PF_KTHREAD) && \
- current->thread.fpu.initialized)
-#else
-#define kfpu_allowed() 0
-#endif
+#define kfpu_allowed() (current->flags & PF_KTHREAD)
+#define ex_handler_fprestore ex_handler_default
+
+/*
+ * FPU save and restore instructions.
+ */
+#define __asm __asm__ __volatile__
+#define kfpu_fxsave(addr) __asm("fxsave %0" : "=m" (*(addr)))
+#define kfpu_fxsaveq(addr) __asm("fxsaveq %0" : "=m" (*(addr)))
+#define kfpu_fnsave(addr) __asm("fnsave %0; fwait" : "=m" (*(addr)))
+#define kfpu_fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr)))
+#define kfpu_fxrstorq(addr) __asm("fxrstorq %0" : : "m" (*(addr)))
+#define kfpu_frstor(addr) __asm("frstor %0" : : "m" (*(addr)))
+#define kfpu_fxsr_clean(rval) __asm("fnclex; emms; fildl %P[addr]" \
+ : : [addr] "m" (rval));
static inline void
kfpu_initialize(void)
{
WARN_ON_ONCE(!(current->flags & PF_KTHREAD));
-#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD)
- __fpu_invalidate_fpregs_state(&current->thread.fpu);
- set_thread_flag(TIF_NEED_FPU_LOAD);
-#elif defined(HAVE_KERNEL_FPU_INITIALIZED)
- __fpu_invalidate_fpregs_state(&current->thread.fpu);
- current->thread.fpu.initialized = 1;
-#endif
+ /* Invalidate the task's FPU state */
+ current->thread.fpu.last_cpu = -1;
+}
+
+static inline void
+kfpu_save_xsave(struct xregs_state *addr, uint64_t mask)
+{
+ uint32_t low, hi;
+ int err;
+
+ low = mask;
+ hi = mask >> 32;
+ XSTATE_XSAVE(addr, low, hi, err);
+ WARN_ON_ONCE(err);
+}
+
+static inline void
+kfpu_save_fxsr(struct fxregs_state *addr)
+{
+ if (IS_ENABLED(CONFIG_X86_32))
+ kfpu_fxsave(addr);
+ else
+ kfpu_fxsaveq(addr);
+}
+
+static inline void
+kfpu_save_fsave(struct fregs_state *addr)
+{
+ kfpu_fnsave(addr);
}
static inline void
@@ -172,46 +202,86 @@ kfpu_begin(void)
preempt_disable();
local_irq_disable();
-#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD)
/*
* The current FPU registers need to be preserved by kfpu_begin()
- * and restored by kfpu_end(). This is required because we can
- * not call __cpu_invalidate_fpregs_state() to invalidate the
- * per-cpu FPU state and force them to be restored during a
- * context switch.
+ * and restored by kfpu_end(). This is always required because we
+ * can not call __cpu_invalidate_fpregs_state() to invalidate the
+ * per-cpu FPU state and force them to be restored. Furthermore,
+ * this implementation relies on the space provided in the task
+ * structure to store the user FPU state. As such, it can only
+ * be used with dedicated kernels which by definition will never
+ * store user FPU state.
*/
- copy_fpregs_to_fpstate(&current->thread.fpu);
-#elif defined(HAVE_KERNEL_FPU_INITIALIZED)
+ if (static_cpu_has(X86_FEATURE_XSAVE)) {
+ kfpu_save_xsave(&current->thread.fpu.state.xsave, ~0);
+ } else if (static_cpu_has(X86_FEATURE_FXSR)) {
+ kfpu_save_fxsr(&current->thread.fpu.state.fxsave);
+ } else {
+ kfpu_save_fsave(&current->thread.fpu.state.fsave);
+ }
+}
+
+static inline void
+kfpu_restore_xsave(struct xregs_state *addr, uint64_t mask)
+{
+ uint32_t low, hi;
+
+ low = mask;
+ hi = mask >> 32;
+ XSTATE_XRESTORE(addr, low, hi);
+}
+
+static inline void
+kfpu_restore_fxsr(struct fxregs_state *addr)
+{
/*
- * There is no need to preserve and restore the FPU registers.
- * They will always be restored from the task's stored FPU state
- * when switching contexts.
+ * On AuthenticAMD K7 and K8 processors the fxrstor instruction only
+ * restores the _x87 FOP, FIP, and FDP registers when an exception
+ * is pending. Clean the _x87 state to force the restore.
*/
- WARN_ON_ONCE(current->thread.fpu.initialized == 0);
-#endif
+ if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK)))
+ kfpu_fxsr_clean(addr);
+
+ if (IS_ENABLED(CONFIG_X86_32)) {
+ kfpu_fxrstor(addr);
+ } else {
+ kfpu_fxrstorq(addr);
+ }
}
static inline void
-kfpu_end(void)
+kfpu_restore_fsave(struct fregs_state *addr)
{
-#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD)
- union fpregs_state *state = &current->thread.fpu.state;
- int error;
+ kfpu_frstor(addr);
+}
- if (use_xsave()) {
- error = copy_kernel_to_xregs_err(&state->xsave, -1);
- } else if (use_fxsr()) {
- error = copy_kernel_to_fxregs_err(&state->fxsave);
+static inline void
+kfpu_end(void)
+{
+ if (static_cpu_has(X86_FEATURE_XSAVE)) {
+ kfpu_restore_xsave(&current->thread.fpu.state.xsave, ~0);
+ } else if (static_cpu_has(X86_FEATURE_FXSR)) {
+ kfpu_restore_fxsr(&current->thread.fpu.state.fxsave);
} else {
- error = copy_kernel_to_fregs_err(&state->fsave);
+ kfpu_restore_fsave(&current->thread.fpu.state.fsave);
}
- WARN_ON_ONCE(error);
-#endif
local_irq_enable();
preempt_enable();
}
-#endif /* defined(HAVE_KERNEL_FPU) */
+
+#else
+
+/*
+ * FPU support is unavailable.
+ */
+#define kfpu_allowed() 0
+#define kfpu_initialize(tsk) do {} while (0)
+#define kfpu_begin() do {} while (0)
+#define kfpu_end() do {} while (0)
+
+#endif /* defined(HAVE_KERNEL_FPU_INTERNAL) */
+#endif /* defined(KERNEL_EXPORTS_X86_FPU) */
#else /* defined(_KERNEL) */
/*

View File

@ -1,59 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Thomas Lamprecht <t.lamprecht@proxmox.com>
Date: Wed, 25 Sep 2019 10:48:48 +0200
Subject: [PATCH] [SIMD]: FPU register save/restore is also required on 5.0
kernels
NOTE: the kernel needs to have the copy_kernel_to_xregs_err,
copy_kernel_to_fxregs_err and copy_kernel_to_fregs_err functions
backported for this to work.
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
include/linux/simd_x86.h | 11 ++++-------
1 file changed, 4 insertions(+), 7 deletions(-)
diff --git a/include/linux/simd_x86.h b/include/linux/simd_x86.h
index edd456098..98503a29e 100644
--- a/include/linux/simd_x86.h
+++ b/include/linux/simd_x86.h
@@ -181,7 +181,6 @@ kfpu_begin(void)
preempt_disable();
local_irq_disable();
-#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD)
/*
* The current FPU registers need to be preserved by kfpu_begin()
* and restored by kfpu_end(). This is required because we can
@@ -190,11 +189,11 @@ kfpu_begin(void)
* context switch.
*/
copy_fpregs_to_fpstate(&current->thread.fpu);
-#elif defined(HAVE_KERNEL_FPU_INITIALIZED)
+
+
+#if defined(HAVE_KERNEL_FPU_INITIALIZED)
/*
- * There is no need to preserve and restore the FPU registers.
- * They will always be restored from the task's stored FPU state
- * when switching contexts.
+ * Was removed with 5.2 as it was always set to 1 there
*/
WARN_ON_ONCE(current->thread.fpu.initialized == 0);
#endif
@@ -203,7 +202,6 @@ kfpu_begin(void)
static inline void
kfpu_end(void)
{
-#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD)
union fpregs_state *state = &current->thread.fpu.state;
int error;
@@ -215,7 +213,6 @@ kfpu_end(void)
error = copy_kernel_to_fregs_err(&state->fsave);
}
WARN_ON_ONCE(error);
-#endif
local_irq_enable();
preempt_enable();

View File

@ -2,9 +2,6 @@ From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Brian Behlendorf <behlendorf1@llnl.gov>
Date: Wed, 17 Jul 2019 09:14:36 -0700
Subject: [PATCH] Fix CONFIG_X86_DEBUG_FPU build failure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
When CONFIG_X86_DEBUG_FPU is defined the alternatives_patched symbol
is pulled in as a dependency which results in a build failure. To
@ -16,15 +13,13 @@ Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #9041
Closes #9049
(cherry picked from commit 095b5412b31c07cad5cec74a4eb5ace011c92b27)
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
include/linux/simd_x86.h | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/include/linux/simd_x86.h b/include/linux/simd_x86.h
index 641f43955..edd456098 100644
index d1ded3af2..b32bccc41 100644
--- a/include/linux/simd_x86.h
+++ b/include/linux/simd_x86.h
@@ -84,6 +84,15 @@

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,327 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Brian Behlendorf <behlendorf1@llnl.gov>
Date: Wed, 9 Oct 2019 22:36:19 +0000
Subject: [PATCH] Allow FPU usage in user and kernel thread contexts
Even for dedicated kernel threads we apparently cannot be
guaranteed that the kernel won't modify the FPU state which
we saved in the task struck. Allocate our own per-cpu state
to preserve the saved register state. Aside from correctness,
this allows use of the FPU in user threads again.
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
(cherry picked from commit 11170d9073edcbb613f5a4c992293cbb4e3c8e31)
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
config/kernel-fpu.m4 | 7 --
include/os/linux/kernel/linux/simd.h | 1 -
include/os/linux/kernel/linux/simd_aarch64.h | 2 -
include/os/linux/kernel/linux/simd_x86.h | 87 +++++++++++++-------
module/spl/spl-taskq.c | 2 -
module/spl/spl-thread.c | 2 -
module/zcommon/zfs_prop.c | 18 ++++
7 files changed, 73 insertions(+), 46 deletions(-)
diff --git a/config/kernel-fpu.m4 b/config/kernel-fpu.m4
index 9ed9b14ad..15bea3c22 100644
--- a/config/kernel-fpu.m4
+++ b/config/kernel-fpu.m4
@@ -67,12 +67,6 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_FPU], [
#error Unsupported architecture
#endif
- #include <linux/sched.h>
-
- #if !defined(PF_KTHREAD)
- #error PF_KTHREAD not defined
- #endif
-
#ifdef HAVE_KERNEL_FPU_API_HEADER
#include <asm/fpu/api.h>
#include <asm/fpu/internal.h>
@@ -94,7 +88,6 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_FPU], [
struct fregs_state *fr __attribute__ ((unused)) = &st->fsave;
struct fxregs_state *fxr __attribute__ ((unused)) = &st->fxsave;
struct xregs_state *xr __attribute__ ((unused)) = &st->xsave;
- fpu->last_cpu = -1;
])
])
diff --git a/include/os/linux/kernel/linux/simd.h b/include/os/linux/kernel/linux/simd.h
index 1f6574a90..5138a908b 100644
--- a/include/os/linux/kernel/linux/simd.h
+++ b/include/os/linux/kernel/linux/simd.h
@@ -33,7 +33,6 @@
#else
#define kfpu_allowed() 0
-#define kfpu_initialize(tsk) do {} while (0)
#define kfpu_begin() do {} while (0)
#define kfpu_end() do {} while (0)
diff --git a/include/os/linux/kernel/linux/simd_aarch64.h b/include/os/linux/kernel/linux/simd_aarch64.h
index ac530d920..f6cf3c377 100644
--- a/include/os/linux/kernel/linux/simd_aarch64.h
+++ b/include/os/linux/kernel/linux/simd_aarch64.h
@@ -27,7 +27,6 @@
*
* Kernel fpu methods:
* kfpu_allowed()
- * kfpu_initialize()
* kfpu_begin()
* kfpu_end()
*/
@@ -43,7 +42,6 @@
#include <asm/neon.h>
#define kfpu_allowed() 1
-#define kfpu_initialize(tsk) do {} while (0)
#define kfpu_begin() kernel_neon_begin()
#define kfpu_end() kernel_neon_end()
diff --git a/include/os/linux/kernel/linux/simd_x86.h b/include/os/linux/kernel/linux/simd_x86.h
index 486e31845..c42ea918e 100644
--- a/include/os/linux/kernel/linux/simd_x86.h
+++ b/include/os/linux/kernel/linux/simd_x86.h
@@ -27,7 +27,6 @@
*
* Kernel fpu methods:
* kfpu_allowed()
- * kfpu_initialize()
* kfpu_begin()
* kfpu_end()
*
@@ -99,7 +98,6 @@
#if defined(KERNEL_EXPORTS_X86_FPU)
#define kfpu_allowed() 1
-#define kfpu_initialize(tsk) do {} while (0)
#if defined(HAVE_UNDERSCORE_KERNEL_FPU)
#define kfpu_begin() \
@@ -129,16 +127,52 @@
/*
* When the kernel_fpu_* symbols are unavailable then provide our own
- * versions which allow the FPU to be safely used in kernel threads.
- * In practice, this is not a significant restriction for ZFS since the
- * vast majority of SIMD operations are performed by the IO pipeline.
+ * versions which allow the FPU to be safely used.
*/
#if defined(HAVE_KERNEL_FPU_INTERNAL)
+extern struct fpu **zfs_kfpu_fpregs;
+
/*
- * FPU usage only allowed in dedicated kernel threads.
+ * Initialize per-cpu variables to store FPU state.
*/
-#define kfpu_allowed() (current->flags & PF_KTHREAD)
+static inline void
+kfpu_fini(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ if (zfs_kfpu_fpregs[cpu] != NULL) {
+ kfree(zfs_kfpu_fpregs[cpu]);
+ }
+ }
+
+ kfree(zfs_kfpu_fpregs);
+}
+
+static inline int
+kfpu_init(void)
+{
+ int cpu;
+
+ zfs_kfpu_fpregs = kzalloc(num_possible_cpus() *
+ sizeof (struct fpu *), GFP_KERNEL);
+ if (zfs_kfpu_fpregs == NULL)
+ return (ENOMEM);
+
+ for_each_possible_cpu(cpu) {
+ zfs_kfpu_fpregs[cpu] = kmalloc_node(sizeof (struct fpu),
+ GFP_KERNEL, cpu_to_node(cpu));
+ if (zfs_kfpu_fpregs[cpu] == NULL) {
+ kfpu_fini();
+ return (ENOMEM);
+ }
+ }
+
+ return (0);
+}
+
+#define kfpu_allowed() 1
#define ex_handler_fprestore ex_handler_default
/*
@@ -154,15 +188,6 @@
#define kfpu_fxsr_clean(rval) __asm("fnclex; emms; fildl %P[addr]" \
: : [addr] "m" (rval));
-static inline void
-kfpu_initialize(void)
-{
- WARN_ON_ONCE(!(current->flags & PF_KTHREAD));
-
- /* Invalidate the task's FPU state */
- current->thread.fpu.last_cpu = -1;
-}
-
static inline void
kfpu_save_xsave(struct xregs_state *addr, uint64_t mask)
{
@@ -193,8 +218,6 @@ kfpu_save_fsave(struct fregs_state *addr)
static inline void
kfpu_begin(void)
{
- WARN_ON_ONCE(!kfpu_allowed());
-
/*
* Preemption and interrupts must be disabled for the critical
* region where the FPU state is being modified.
@@ -204,20 +227,18 @@ kfpu_begin(void)
/*
* The current FPU registers need to be preserved by kfpu_begin()
- * and restored by kfpu_end(). This is always required because we
- * can not call __cpu_invalidate_fpregs_state() to invalidate the
- * per-cpu FPU state and force them to be restored. Furthermore,
- * this implementation relies on the space provided in the task
- * structure to store the user FPU state. As such, it can only
- * be used with dedicated kernels which by definition will never
- * store user FPU state.
+ * and restored by kfpu_end(). They are stored in a dedicated
+ * per-cpu variable, not in the task struct, this allows any user
+ * FPU state to be correctly preserved and restored.
*/
+ struct fpu *fpu = zfs_kfpu_fpregs[smp_processor_id()];
+
if (static_cpu_has(X86_FEATURE_XSAVE)) {
- kfpu_save_xsave(&current->thread.fpu.state.xsave, ~0);
+ kfpu_save_xsave(&fpu->state.xsave, ~0);
} else if (static_cpu_has(X86_FEATURE_FXSR)) {
- kfpu_save_fxsr(&current->thread.fpu.state.fxsave);
+ kfpu_save_fxsr(&fpu->state.fxsave);
} else {
- kfpu_save_fsave(&current->thread.fpu.state.fsave);
+ kfpu_save_fsave(&fpu->state.fsave);
}
}
@@ -258,12 +279,14 @@ kfpu_restore_fsave(struct fregs_state *addr)
static inline void
kfpu_end(void)
{
+ struct fpu *fpu = zfs_kfpu_fpregs[smp_processor_id()];
+
if (static_cpu_has(X86_FEATURE_XSAVE)) {
- kfpu_restore_xsave(&current->thread.fpu.state.xsave, ~0);
+ kfpu_restore_xsave(&fpu->state.xsave, ~0);
} else if (static_cpu_has(X86_FEATURE_FXSR)) {
- kfpu_restore_fxsr(&current->thread.fpu.state.fxsave);
+ kfpu_restore_fxsr(&fpu->state.fxsave);
} else {
- kfpu_restore_fsave(&current->thread.fpu.state.fsave);
+ kfpu_restore_fsave(&fpu->state.fsave);
}
local_irq_enable();
@@ -276,7 +299,6 @@ kfpu_end(void)
* FPU support is unavailable.
*/
#define kfpu_allowed() 0
-#define kfpu_initialize(tsk) do {} while (0)
#define kfpu_begin() do {} while (0)
#define kfpu_end() do {} while (0)
@@ -286,6 +308,7 @@ kfpu_end(void)
/*
* Linux kernel provides an interface for CPU feature testing.
*/
+
/*
* Detect register set support
*/
diff --git a/module/spl/spl-taskq.c b/module/spl/spl-taskq.c
index 90e1d0a4d..a39f94e4c 100644
--- a/module/spl/spl-taskq.c
+++ b/module/spl/spl-taskq.c
@@ -28,7 +28,6 @@
#include <sys/taskq.h>
#include <sys/kmem.h>
#include <sys/tsd.h>
-#include <sys/simd.h>
int spl_taskq_thread_bind = 0;
module_param(spl_taskq_thread_bind, int, 0644);
@@ -854,7 +853,6 @@ taskq_thread(void *args)
sigfillset(&blocked);
sigprocmask(SIG_BLOCK, &blocked, NULL);
flush_signals(current);
- kfpu_initialize();
tsd_set(taskq_tsd, tq);
spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
diff --git a/module/spl/spl-thread.c b/module/spl/spl-thread.c
index 29de9252a..0352a31ea 100644
--- a/module/spl/spl-thread.c
+++ b/module/spl/spl-thread.c
@@ -27,7 +27,6 @@
#include <sys/thread.h>
#include <sys/kmem.h>
#include <sys/tsd.h>
-#include <sys/simd.h>
/*
* Thread interfaces
@@ -55,7 +54,6 @@ thread_generic_wrapper(void *arg)
args = tp->tp_args;
set_current_state(tp->tp_state);
set_user_nice((kthread_t *)current, PRIO_TO_NICE(tp->tp_pri));
- kfpu_initialize();
kmem_free(tp->tp_name, tp->tp_name_size);
kmem_free(tp, sizeof (thread_priv_t));
diff --git a/module/zcommon/zfs_prop.c b/module/zcommon/zfs_prop.c
index dab749138..b5fa1c2f4 100644
--- a/module/zcommon/zfs_prop.c
+++ b/module/zcommon/zfs_prop.c
@@ -853,10 +853,27 @@ zfs_prop_align_right(zfs_prop_t prop)
#endif
#if defined(_KERNEL)
+
+#if defined(HAVE_KERNEL_FPU_INTERNAL)
+#include <linux/simd.h>
+
+struct fpu **zfs_kfpu_fpregs;
+EXPORT_SYMBOL(zfs_kfpu_fpregs);
+
+#else
+#define kfpu_init() 0
+#define kfpu_fini() ((void) 0)
+#endif /* HAVE_KERNEL_FPU_INTERNAL */
+
static int __init
zcommon_init(void)
{
+ int error = kfpu_init();
+ if (error)
+ return (-error);
+
fletcher_4_init();
+
return (0);
}
@@ -864,6 +881,7 @@ static void __exit
zcommon_fini(void)
{
fletcher_4_fini();
+ kfpu_fini();
}
module_init(zcommon_init);

View File

@ -4,6 +4,8 @@
0004-import-with-d-dev-disk-by-id-in-scan-service.patch
0005-Enable-zed-emails.patch
0006-Linux-5.0-compat-SIMD-compatibility.patch
0007-Fix-CONFIG_X86_DEBUG_FPU-build-failure.patch
0008-SIMD-FPU-register-save-restore-is-also-required-on-5.patch
0009-Perform-KABI-checks-in-parallel.patch
0007-Perform-KABI-checks-in-parallel.patch
0008-Linux-4.14-4.19-5.0-compat-SIMD-save-restore.patch
0009-Fix-CONFIG_X86_DEBUG_FPU-build-failure.patch
0010-OpenZFS-restructuring-move-platform-specific-headers.patch
0011-Allow-FPU-usage-in-user-and-kernel-thread-contexts.patch