diff --git a/config/kernel-fpu.m4 b/config/kernel-fpu.m4 index faa64f1ec..06280239d 100644 --- a/config/kernel-fpu.m4 +++ b/config/kernel-fpu.m4 @@ -1,9 +1,12 @@ -dnl # +dnl # dnl # Handle differences in kernel FPU code. dnl # dnl # Kernel dnl # 5.16: XCR code put into asm/fpu/xcr.h -dnl # HAVE_KERNEL_FPU_XCR_HEADER +dnl # HAVE_KERNEL_FPU_XCR_HEADER +dnl # +dnl # XSTATE_XSAVE and XSTATE_XRESTORE aren't accessible any more +dnl # HAVE_KERNEL_FPU_XSAVE_INTERNAL dnl # dnl # 5.0: Wrappers have been introduced to save/restore the FPU state. dnl # This change was made to the 4.19.38 and 4.14.120 LTS kernels. @@ -107,6 +110,36 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_FPU], [ struct fxregs_state *fxr __attribute__ ((unused)) = &st->fxsave; struct xregs_state *xr __attribute__ ((unused)) = &st->xsave; ]) + + ZFS_LINUX_TEST_SRC([fpu_xsave_internal], [ + #include + #if defined(__x86_64) || defined(__x86_64__) || \ + defined(__i386) || defined(__i386__) + #if !defined(__x86) + #define __x86 + #endif + #endif + + #if !defined(__x86) + #error Unsupported architecture + #endif + + #include + #ifdef HAVE_KERNEL_FPU_API_HEADER + #include + #include + #else + #include + #include + #endif + + ],[ + struct fpu *fpu = ¤t->thread.fpu; + union fpregs_state *st = &fpu->fpstate->regs; + struct fregs_state *fr __attribute__ ((unused)) = &st->fsave; + struct fxregs_state *fxr __attribute__ ((unused)) = &st->fxsave; + struct xregs_state *xr __attribute__ ((unused)) = &st->xsave; + ]) ]) AC_DEFUN([ZFS_AC_KERNEL_FPU], [ @@ -139,7 +172,13 @@ AC_DEFUN([ZFS_AC_KERNEL_FPU], [ AC_DEFINE(HAVE_KERNEL_FPU_INTERNAL, 1, [kernel fpu internal]) ],[ + ZFS_LINUX_TEST_RESULT([fpu_xsave_internal], [ + AC_MSG_RESULT(internal with internal XSAVE) + AC_DEFINE(HAVE_KERNEL_FPU_XSAVE_INTERNAL, 1, + [kernel fpu and XSAVE internal]) + ],[ AC_MSG_RESULT(unavailable) + ]) ]) ]) ]) diff --git a/config/toolchain-simd.m4 b/config/toolchain-simd.m4 index 1153cd694..061576fd9 100644 --- a/config/toolchain-simd.m4 +++ b/config/toolchain-simd.m4 @@ -24,6 +24,9 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_TOOLCHAIN_SIMD], [ ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AES ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_PCLMULQDQ ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_MOVBE + ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVE + ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVEOPT + ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVES ;; esac ]) @@ -422,3 +425,66 @@ AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_MOVBE], [ AC_MSG_RESULT([no]) ]) ]) + +dnl # +dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVE +dnl # +AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVE], [ + AC_MSG_CHECKING([whether host toolchain supports XSAVE]) + + AC_LINK_IFELSE([AC_LANG_SOURCE([ + [ + void main() + { + char b[4096] __attribute__ ((aligned (64))); + __asm__ __volatile__("xsave %[b]\n" : : [b] "m" (*b) : "memory"); + } + ]])], [ + AC_MSG_RESULT([yes]) + AC_DEFINE([HAVE_XSAVE], 1, [Define if host toolchain supports XSAVE]) + ], [ + AC_MSG_RESULT([no]) + ]) +]) + +dnl # +dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVEOPT +dnl # +AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVEOPT], [ + AC_MSG_CHECKING([whether host toolchain supports XSAVEOPT]) + + AC_LINK_IFELSE([AC_LANG_SOURCE([ + [ + void main() + { + char b[4096] __attribute__ ((aligned (64))); + __asm__ __volatile__("xsaveopt %[b]\n" : : [b] "m" (*b) : "memory"); + } + ]])], [ + AC_MSG_RESULT([yes]) + AC_DEFINE([HAVE_XSAVEOPT], 1, [Define if host toolchain supports XSAVEOPT]) + ], [ + AC_MSG_RESULT([no]) + ]) +]) + +dnl # +dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVES +dnl # +AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVES], [ + AC_MSG_CHECKING([whether host toolchain supports XSAVES]) + + AC_LINK_IFELSE([AC_LANG_SOURCE([ + [ + void main() + { + char b[4096] __attribute__ ((aligned (64))); + __asm__ __volatile__("xsaves %[b]\n" : : [b] "m" (*b) : "memory"); + } + ]])], [ + AC_MSG_RESULT([yes]) + AC_DEFINE([HAVE_XSAVES], 1, [Define if host toolchain supports XSAVES]) + ], [ + AC_MSG_RESULT([no]) + ]) +]) diff --git a/configure.ac b/configure.ac index ebc7b276a..2671434af 100644 --- a/configure.ac +++ b/configure.ac @@ -383,6 +383,7 @@ AC_CONFIG_FILES([ tests/zfs-tests/tests/functional/rootpool/Makefile tests/zfs-tests/tests/functional/rsend/Makefile tests/zfs-tests/tests/functional/scrub_mirror/Makefile + tests/zfs-tests/tests/functional/simd/Makefile tests/zfs-tests/tests/functional/slog/Makefile tests/zfs-tests/tests/functional/snapshot/Makefile tests/zfs-tests/tests/functional/snapused/Makefile diff --git a/include/os/linux/kernel/linux/simd_x86.h b/include/os/linux/kernel/linux/simd_x86.h index f2ae0fcbc..7c806bf2c 100644 --- a/include/os/linux/kernel/linux/simd_x86.h +++ b/include/os/linux/kernel/linux/simd_x86.h @@ -136,9 +136,27 @@ * When the kernel_fpu_* symbols are unavailable then provide our own * versions which allow the FPU to be safely used. */ +#if defined(HAVE_KERNEL_FPU_INTERNAL) || defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL) + +#if defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL) +/* + * Some sanity checks. + * HAVE_KERNEL_FPU_INTERNAL and HAVE_KERNEL_FPU_XSAVE_INTERNAL are exclusive. + */ #if defined(HAVE_KERNEL_FPU_INTERNAL) +#error "HAVE_KERNEL_FPU_INTERNAL and HAVE_KERNEL_FPU_XSAVE_INTERNAL defined" +#endif +/* + * For kernels >= 5.16 we have to use inline assembly with the XSAVE{,OPT,S} + * instructions, so we need the toolchain to support at least XSAVE. + */ +#if !defined(HAVE_XSAVE) +#error "Toolchain needs to support the XSAVE assembler instruction" +#endif +#endif #include +#include extern union fpregs_state **zfs_kfpu_fpregs; @@ -191,7 +209,9 @@ kfpu_init(void) } #define kfpu_allowed() 1 +#if defined(HAVE_KERNEL_FPU_INTERNAL) #define ex_handler_fprestore ex_handler_default +#endif /* * FPU save and restore instructions. @@ -206,6 +226,7 @@ kfpu_init(void) #define kfpu_fxsr_clean(rval) __asm("fnclex; emms; fildl %P[addr]" \ : : [addr] "m" (rval)); +#if defined(HAVE_KERNEL_FPU_INTERNAL) static inline void kfpu_save_xsave(struct xregs_state *addr, uint64_t mask) { @@ -217,6 +238,21 @@ kfpu_save_xsave(struct xregs_state *addr, uint64_t mask) XSTATE_XSAVE(addr, low, hi, err); WARN_ON_ONCE(err); } +#endif /* defined(HAVE_KERNEL_FPU_INTERNAL) */ + +#if defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL) +#define kfpu_do_xsave(instruction, addr, mask) \ +{ \ + uint32_t low, hi; \ + \ + low = mask; \ + hi = (uint64_t)(mask) >> 32; \ + __asm(instruction " %[dst]\n\t" \ + : \ + : [dst] "m" (*(addr)), "a" (low), "d" (hi) \ + : "memory"); \ +} +#endif /* defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL) */ static inline void kfpu_save_fxsr(struct fxregs_state *addr) @@ -233,6 +269,7 @@ kfpu_save_fsave(struct fregs_state *addr) kfpu_fnsave(addr); } +#if defined(HAVE_KERNEL_FPU_INTERNAL) static inline void kfpu_begin(void) { @@ -250,7 +287,6 @@ kfpu_begin(void) * FPU state to be correctly preserved and restored. */ union fpregs_state *state = zfs_kfpu_fpregs[smp_processor_id()]; - if (static_cpu_has(X86_FEATURE_XSAVE)) { kfpu_save_xsave(&state->xsave, ~0); } else if (static_cpu_has(X86_FEATURE_FXSR)) { @@ -259,7 +295,50 @@ kfpu_begin(void) kfpu_save_fsave(&state->fsave); } } +#endif /* defined(HAVE_KERNEL_FPU_INTERNAL) */ +#if defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL) +static inline void +kfpu_begin(void) +{ + /* + * Preemption and interrupts must be disabled for the critical + * region where the FPU state is being modified. + */ + preempt_disable(); + local_irq_disable(); + + /* + * The current FPU registers need to be preserved by kfpu_begin() + * and restored by kfpu_end(). They are stored in a dedicated + * per-cpu variable, not in the task struct, this allows any user + * FPU state to be correctly preserved and restored. + */ + union fpregs_state *state = zfs_kfpu_fpregs[smp_processor_id()]; +#if defined(HAVE_XSAVES) + if (static_cpu_has(X86_FEATURE_XSAVES)) { + kfpu_do_xsave("xsaves", &state->xsave, ~0); + goto out; + } +#endif +#if defined(HAVE_XSAVEOPT) + if (static_cpu_has(X86_FEATURE_XSAVEOPT)) { + kfpu_do_xsave("xsaveopt", &state->xsave, ~0); + goto out; + } +#endif + if (static_cpu_has(X86_FEATURE_XSAVE)) { + kfpu_do_xsave("xsave", &state->xsave, ~0); + } else if (static_cpu_has(X86_FEATURE_FXSR)) { + kfpu_save_fxsr(&state->fxsave); + } else { + kfpu_save_fsave(&state->fsave); + } +out: +} +#endif /* defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL) */ + +#if defined(HAVE_KERNEL_FPU_INTERNAL) static inline void kfpu_restore_xsave(struct xregs_state *addr, uint64_t mask) { @@ -269,6 +348,21 @@ kfpu_restore_xsave(struct xregs_state *addr, uint64_t mask) hi = mask >> 32; XSTATE_XRESTORE(addr, low, hi); } +#endif /* defined(HAVE_KERNEL_FPU_INTERNAL) */ + +#if defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL) +#define kfpu_do_xrstor(instruction, addr, mask) \ +{ \ + uint32_t low, hi; \ + \ + low = mask; \ + hi = (uint64_t)(mask) >> 32; \ + __asm(instruction " %[src]" \ + : \ + : [src] "m" (*(addr)), "a" (low), "d" (hi) \ + : "memory"); \ +} +#endif /* defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL) */ static inline void kfpu_restore_fxsr(struct fxregs_state *addr) @@ -294,6 +388,7 @@ kfpu_restore_fsave(struct fregs_state *addr) kfpu_frstor(addr); } +#if defined(HAVE_KERNEL_FPU_INTERNAL) static inline void kfpu_end(void) { @@ -310,6 +405,32 @@ kfpu_end(void) local_irq_enable(); preempt_enable(); } +#endif /* defined(HAVE_KERNEL_FPU_INTERNAL) */ + +#if defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL) +static inline void +kfpu_end(void) +{ + union fpregs_state *state = zfs_kfpu_fpregs[smp_processor_id()]; +#if defined(HAVE_XSAVES) + if (static_cpu_has(X86_FEATURE_XSAVES)) { + kfpu_do_xrstor("xrstors", &state->xsave, ~0); + goto out; + } +#endif + if (static_cpu_has(X86_FEATURE_XSAVE)) { + kfpu_do_xrstor("xrstor", &state->xsave, ~0); + } else if (static_cpu_has(X86_FEATURE_FXSR)) { + kfpu_save_fxsr(&state->fxsave); + } else { + kfpu_save_fsave(&state->fsave); + } +out: + local_irq_enable(); + preempt_enable(); + +} +#endif /* defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL) */ #else @@ -322,7 +443,7 @@ kfpu_end(void) #define kfpu_init() 0 #define kfpu_fini() ((void) 0) -#endif /* defined(HAVE_KERNEL_FPU_INTERNAL) */ +#endif /* defined(HAVE_KERNEL_FPU_INTERNAL || HAVE_KERNEL_FPU_XSAVE_INTERNAL) */ #endif /* defined(KERNEL_EXPORTS_X86_FPU) */ /* diff --git a/module/zcommon/zfs_prop.c b/module/zcommon/zfs_prop.c index d17321990..5f88bd020 100644 --- a/module/zcommon/zfs_prop.c +++ b/module/zcommon/zfs_prop.c @@ -996,10 +996,10 @@ zfs_prop_align_right(zfs_prop_t prop) #include -#if defined(HAVE_KERNEL_FPU_INTERNAL) +#if defined(HAVE_KERNEL_FPU_INTERNAL) || defined(HAVE_KERNEL_FPU_XSAVE_INTERNAL) union fpregs_state **zfs_kfpu_fpregs; EXPORT_SYMBOL(zfs_kfpu_fpregs); -#endif /* HAVE_KERNEL_FPU_INTERNAL */ +#endif /* HAVE_KERNEL_FPU_INTERNAL || HAVE_KERNEL_FPU_XSAVE_INTERNAL */ static int __init zcommon_init(void) diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index b7831c3ac..5a9fbe994 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -151,6 +151,12 @@ tags = ['functional', 'projectquota'] tests = ['send_realloc_dnode_size', 'send_encrypted_files'] tags = ['functional', 'rsend'] +[tests/functional/simd:Linux] +pre = +post = +tests = ['simd_supported'] +tags = ['functional', 'simd'] + [tests/functional/snapshot:Linux] tests = ['snapshot_015_pos', 'snapshot_016_pos'] tags = ['functional', 'snapshot'] diff --git a/tests/zfs-tests/tests/functional/Makefile.am b/tests/zfs-tests/tests/functional/Makefile.am index 137cddd5f..fd586ecee 100644 --- a/tests/zfs-tests/tests/functional/Makefile.am +++ b/tests/zfs-tests/tests/functional/Makefile.am @@ -88,5 +88,6 @@ SUBDIRS = \ if BUILD_LINUX SUBDIRS += \ + simd \ tmpfile endif diff --git a/tests/zfs-tests/tests/functional/simd/Makefile.am b/tests/zfs-tests/tests/functional/simd/Makefile.am new file mode 100644 index 000000000..bfc288680 --- /dev/null +++ b/tests/zfs-tests/tests/functional/simd/Makefile.am @@ -0,0 +1,2 @@ +pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/simd +dist_pkgdata_SCRIPTS = simd_supported.ksh diff --git a/tests/zfs-tests/tests/functional/simd/simd_supported.ksh b/tests/zfs-tests/tests/functional/simd/simd_supported.ksh new file mode 100755 index 000000000..d88bc582b --- /dev/null +++ b/tests/zfs-tests/tests/functional/simd/simd_supported.ksh @@ -0,0 +1,58 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2022 by Attila Fülöp +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Make sure we have SIMD support, so it will not go away without notice +# +# STRATEGY: +# 1. Test if we are running on a Linux x86 system with SSE support +# 2. If so, check if the zfs_fletcher_4_impl module parameter contains +# a sse implementation +# 3. If not fail the test, otherwise pass it + +log_note "Testing if we support SIMD instructions (Linux x86 only)" + +if !is_linux; then + log_unsupported "Not a Linux System" +fi + +case "$(uname -m)" in +i386|i686|x86_64) + typeset -R modparam="/sys/module/zcommon/parameters/zfs_fletcher_4_impl" + if cat /proc/cpuinfo | awk '/^flags/ {print; exit;}' | grep -q sse; then + log_must grep -q sse "$modparam" + log_pass "SIMD instructions supported" + else + log_unsupported "No FPU present" + fi + ;; +*) + log_unsupported "Not a x86 CPU" + ;; +esac