mirror_zfs/include/linux/simd_x86.h
Brian Behlendorf e5db313494
Linux 5.0 compat: SIMD compatibility
Restore the SIMD optimization for 4.19.38 LTS, 4.14.120 LTS,
and 5.0 and newer kernels.  This is accomplished by leveraging
the fact that by definition dedicated kernel threads never need
to concern themselves with saving and restoring the user FPU state.
Therefore, they may use the FPU as long as we can guarantee user
tasks always restore their FPU state before context switching back
to user space.

For the 5.0 and 5.1 kernels disabling preemption and local
interrupts is sufficient to allow the FPU to be used.  All non-kernel
threads will restore the preserved user FPU state.

For 5.2 and latter kernels the user FPU state restoration will be
skipped if the kernel determines the registers have not changed.
Therefore, for these kernels we need to perform the additional
step of saving and restoring the FPU registers.  Invalidating the
per-cpu global tracking the FPU state would force a restore but
that functionality is private to the core x86 FPU implementation
and unavailable.

In practice, restricting SIMD to kernel threads is not a major
restriction for ZFS.  The vast majority of SIMD operations are
already performed by the IO pipeline.  The remaining cases are
relatively infrequent and can be handled by the generic code
without significant impact.  The two most noteworthy cases are:

  1) Decrypting the wrapping key for an encrypted dataset,
     i.e. `zfs load-key`.  All other encryption and decryption
     operations will use the SIMD optimized implementations.

  2) Generating the payload checksums for a `zfs send` stream.

In order to avoid making any changes to the higher layers of ZFS
all of the `*_get_ops()` functions were updated to take in to
consideration the calling context.  This allows for the fastest
implementation to be used as appropriate (see kfpu_allowed()).

The only other notable instance of SIMD operations being used
outside a kernel thread was at module load time.  This code
was moved in to a taskq in order to accommodate the new kernel
thread restriction.

Finally, a few other modifications were made in order to further
harden this code and facilitate testing.  They include updating
each implementations operations structure to be declared as a
constant.  And allowing "cycle" to be set when selecting the
preferred ops in the kernel as well as user space.

Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #8754 
Closes #8793 
Closes #8965
2019-07-12 09:31:20 -07:00

804 lines
18 KiB
C

/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (C) 2016 Gvozden Neskovic <neskovic@compeng.uni-frankfurt.de>.
*/
/*
* USER API:
*
* Kernel fpu methods:
* kfpu_begin()
* kfpu_end()
*
* SIMD support:
*
* Following functions should be called to determine whether CPU feature
* is supported. All functions are usable in kernel and user space.
* If a SIMD algorithm is using more than one instruction set
* all relevant feature test functions should be called.
*
* Supported features:
* zfs_sse_available()
* zfs_sse2_available()
* zfs_sse3_available()
* zfs_ssse3_available()
* zfs_sse4_1_available()
* zfs_sse4_2_available()
*
* zfs_avx_available()
* zfs_avx2_available()
*
* zfs_bmi1_available()
* zfs_bmi2_available()
*
* zfs_avx512f_available()
* zfs_avx512cd_available()
* zfs_avx512er_available()
* zfs_avx512pf_available()
* zfs_avx512bw_available()
* zfs_avx512dq_available()
* zfs_avx512vl_available()
* zfs_avx512ifma_available()
* zfs_avx512vbmi_available()
*
* NOTE(AVX-512VL): If using AVX-512 instructions with 128Bit registers
* also add zfs_avx512vl_available() to feature check.
*/
#ifndef _SIMD_X86_H
#define _SIMD_X86_H
#include <sys/isa_defs.h>
/* only for __x86 */
#if defined(__x86)
#include <sys/types.h>
#if defined(_KERNEL)
#include <asm/cpufeature.h>
#else
#include <cpuid.h>
#endif
#if defined(_KERNEL)
#if defined(HAVE_KERNEL_FPU_API_HEADER)
#include <asm/fpu/api.h>
#include <asm/fpu/internal.h>
#else
#include <asm/i387.h>
#include <asm/xcr.h>
#endif
/*
* The following cases are for kernels which export either the
* kernel_fpu_* or __kernel_fpu_* functions.
*/
#if defined(KERNEL_EXPORTS_X86_FPU)
#define kfpu_allowed() 1
#define kfpu_initialize(tsk) do {} while (0)
#if defined(HAVE_UNDERSCORE_KERNEL_FPU)
#define kfpu_begin() \
{ \
preempt_disable(); \
__kernel_fpu_begin(); \
}
#define kfpu_end() \
{ \
__kernel_fpu_end(); \
preempt_enable(); \
}
#elif defined(HAVE_KERNEL_FPU)
#define kfpu_begin() kernel_fpu_begin()
#define kfpu_end() kernel_fpu_end()
#else
/*
* This case is unreachable. When KERNEL_EXPORTS_X86_FPU is defined then
* either HAVE_UNDERSCORE_KERNEL_FPU or HAVE_KERNEL_FPU must be defined.
*/
#error "Unreachable kernel configuration"
#endif
#else /* defined(KERNEL_EXPORTS_X86_FPU) */
/*
* When the kernel_fpu_* symbols are unavailable then provide our own
* versions which allow the FPU to be safely used in kernel threads.
* In practice, this is not a significant restriction for ZFS since the
* vast majority of SIMD operations are performed by the IO pipeline.
*/
/*
* Returns non-zero if FPU operations are allowed in the current context.
*/
#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD)
#define kfpu_allowed() ((current->flags & PF_KTHREAD) && \
test_thread_flag(TIF_NEED_FPU_LOAD))
#elif defined(HAVE_KERNEL_FPU_INITIALIZED)
#define kfpu_allowed() ((current->flags & PF_KTHREAD) && \
current->thread.fpu.initialized)
#else
#define kfpu_allowed() 0
#endif
static inline void
kfpu_initialize(void)
{
WARN_ON_ONCE(!(current->flags & PF_KTHREAD));
#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD)
__fpu_invalidate_fpregs_state(&current->thread.fpu);
set_thread_flag(TIF_NEED_FPU_LOAD);
#elif defined(HAVE_KERNEL_FPU_INITIALIZED)
__fpu_invalidate_fpregs_state(&current->thread.fpu);
current->thread.fpu.initialized = 1;
#endif
}
static inline void
kfpu_begin(void)
{
WARN_ON_ONCE(!kfpu_allowed());
/*
* Preemption and interrupts must be disabled for the critical
* region where the FPU state is being modified.
*/
preempt_disable();
local_irq_disable();
#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD)
/*
* The current FPU registers need to be preserved by kfpu_begin()
* and restored by kfpu_end(). This is required because we can
* not call __cpu_invalidate_fpregs_state() to invalidate the
* per-cpu FPU state and force them to be restored during a
* context switch.
*/
copy_fpregs_to_fpstate(&current->thread.fpu);
#elif defined(HAVE_KERNEL_FPU_INITIALIZED)
/*
* There is no need to preserve and restore the FPU registers.
* They will always be restored from the task's stored FPU state
* when switching contexts.
*/
WARN_ON_ONCE(current->thread.fpu.initialized == 0);
#endif
}
static inline void
kfpu_end(void)
{
#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD)
union fpregs_state *state = &current->thread.fpu.state;
int error;
if (use_xsave()) {
error = copy_kernel_to_xregs_err(&state->xsave, -1);
} else if (use_fxsr()) {
error = copy_kernel_to_fxregs_err(&state->fxsave);
} else {
error = copy_kernel_to_fregs_err(&state->fsave);
}
WARN_ON_ONCE(error);
#endif
local_irq_enable();
preempt_enable();
}
#endif /* defined(HAVE_KERNEL_FPU) */
#else /* defined(_KERNEL) */
/*
* FPU dummy methods for user space.
*/
#define kfpu_allowed() 1
#define kfpu_initialize(tsk) do {} while (0)
#define kfpu_begin() do {} while (0)
#define kfpu_end() do {} while (0)
#endif /* defined(_KERNEL) */
/*
* CPUID feature tests for user-space. Linux kernel provides an interface for
* CPU feature testing.
*/
#if !defined(_KERNEL)
/*
* x86 registers used implicitly by CPUID
*/
typedef enum cpuid_regs {
EAX = 0,
EBX,
ECX,
EDX,
CPUID_REG_CNT = 4
} cpuid_regs_t;
/*
* List of instruction sets identified by CPUID
*/
typedef enum cpuid_inst_sets {
SSE = 0,
SSE2,
SSE3,
SSSE3,
SSE4_1,
SSE4_2,
OSXSAVE,
AVX,
AVX2,
BMI1,
BMI2,
AVX512F,
AVX512CD,
AVX512DQ,
AVX512BW,
AVX512IFMA,
AVX512VBMI,
AVX512PF,
AVX512ER,
AVX512VL,
AES,
PCLMULQDQ
} cpuid_inst_sets_t;
/*
* Instruction set descriptor.
*/
typedef struct cpuid_feature_desc {
uint32_t leaf; /* CPUID leaf */
uint32_t subleaf; /* CPUID sub-leaf */
uint32_t flag; /* bit mask of the feature */
cpuid_regs_t reg; /* which CPUID return register to test */
} cpuid_feature_desc_t;
#define _AVX512F_BIT (1U << 16)
#define _AVX512CD_BIT (_AVX512F_BIT | (1U << 28))
#define _AVX512DQ_BIT (_AVX512F_BIT | (1U << 17))
#define _AVX512BW_BIT (_AVX512F_BIT | (1U << 30))
#define _AVX512IFMA_BIT (_AVX512F_BIT | (1U << 21))
#define _AVX512VBMI_BIT (1U << 1) /* AVX512F_BIT is on another leaf */
#define _AVX512PF_BIT (_AVX512F_BIT | (1U << 26))
#define _AVX512ER_BIT (_AVX512F_BIT | (1U << 27))
#define _AVX512VL_BIT (1U << 31) /* if used also check other levels */
#define _AES_BIT (1U << 25)
#define _PCLMULQDQ_BIT (1U << 1)
/*
* Descriptions of supported instruction sets
*/
static const cpuid_feature_desc_t cpuid_features[] = {
[SSE] = {1U, 0U, 1U << 25, EDX },
[SSE2] = {1U, 0U, 1U << 26, EDX },
[SSE3] = {1U, 0U, 1U << 0, ECX },
[SSSE3] = {1U, 0U, 1U << 9, ECX },
[SSE4_1] = {1U, 0U, 1U << 19, ECX },
[SSE4_2] = {1U, 0U, 1U << 20, ECX },
[OSXSAVE] = {1U, 0U, 1U << 27, ECX },
[AVX] = {1U, 0U, 1U << 28, ECX },
[AVX2] = {7U, 0U, 1U << 5, EBX },
[BMI1] = {7U, 0U, 1U << 3, EBX },
[BMI2] = {7U, 0U, 1U << 8, EBX },
[AVX512F] = {7U, 0U, _AVX512F_BIT, EBX },
[AVX512CD] = {7U, 0U, _AVX512CD_BIT, EBX },
[AVX512DQ] = {7U, 0U, _AVX512DQ_BIT, EBX },
[AVX512BW] = {7U, 0U, _AVX512BW_BIT, EBX },
[AVX512IFMA] = {7U, 0U, _AVX512IFMA_BIT, EBX },
[AVX512VBMI] = {7U, 0U, _AVX512VBMI_BIT, ECX },
[AVX512PF] = {7U, 0U, _AVX512PF_BIT, EBX },
[AVX512ER] = {7U, 0U, _AVX512ER_BIT, EBX },
[AVX512VL] = {7U, 0U, _AVX512ER_BIT, EBX },
[AES] = {1U, 0U, _AES_BIT, ECX },
[PCLMULQDQ] = {1U, 0U, _PCLMULQDQ_BIT, ECX },
};
/*
* Check if OS supports AVX and AVX2 by checking XCR0
* Only call this function if CPUID indicates that AVX feature is
* supported by the CPU, otherwise it might be an illegal instruction.
*/
static inline uint64_t
xgetbv(uint32_t index)
{
uint32_t eax, edx;
/* xgetbv - instruction byte code */
__asm__ __volatile__(".byte 0x0f; .byte 0x01; .byte 0xd0"
: "=a" (eax), "=d" (edx)
: "c" (index));
return ((((uint64_t)edx)<<32) | (uint64_t)eax);
}
/*
* Check if CPU supports a feature
*/
static inline boolean_t
__cpuid_check_feature(const cpuid_feature_desc_t *desc)
{
uint32_t r[CPUID_REG_CNT];
if (__get_cpuid_max(0, NULL) >= desc->leaf) {
/*
* __cpuid_count is needed to properly check
* for AVX2. It is a macro, so return parameters
* are passed by value.
*/
__cpuid_count(desc->leaf, desc->subleaf,
r[EAX], r[EBX], r[ECX], r[EDX]);
return ((r[desc->reg] & desc->flag) == desc->flag);
}
return (B_FALSE);
}
#define CPUID_FEATURE_CHECK(name, id) \
static inline boolean_t \
__cpuid_has_ ## name(void) \
{ \
return (__cpuid_check_feature(&cpuid_features[id])); \
}
/*
* Define functions for user-space CPUID features testing
*/
CPUID_FEATURE_CHECK(sse, SSE);
CPUID_FEATURE_CHECK(sse2, SSE2);
CPUID_FEATURE_CHECK(sse3, SSE3);
CPUID_FEATURE_CHECK(ssse3, SSSE3);
CPUID_FEATURE_CHECK(sse4_1, SSE4_1);
CPUID_FEATURE_CHECK(sse4_2, SSE4_2);
CPUID_FEATURE_CHECK(avx, AVX);
CPUID_FEATURE_CHECK(avx2, AVX2);
CPUID_FEATURE_CHECK(osxsave, OSXSAVE);
CPUID_FEATURE_CHECK(bmi1, BMI1);
CPUID_FEATURE_CHECK(bmi2, BMI2);
CPUID_FEATURE_CHECK(avx512f, AVX512F);
CPUID_FEATURE_CHECK(avx512cd, AVX512CD);
CPUID_FEATURE_CHECK(avx512dq, AVX512DQ);
CPUID_FEATURE_CHECK(avx512bw, AVX512BW);
CPUID_FEATURE_CHECK(avx512ifma, AVX512IFMA);
CPUID_FEATURE_CHECK(avx512vbmi, AVX512VBMI);
CPUID_FEATURE_CHECK(avx512pf, AVX512PF);
CPUID_FEATURE_CHECK(avx512er, AVX512ER);
CPUID_FEATURE_CHECK(avx512vl, AVX512VL);
CPUID_FEATURE_CHECK(aes, AES);
CPUID_FEATURE_CHECK(pclmulqdq, PCLMULQDQ);
#endif /* !defined(_KERNEL) */
/*
* Detect register set support
*/
static inline boolean_t
__simd_state_enabled(const uint64_t state)
{
boolean_t has_osxsave;
uint64_t xcr0;
#if defined(_KERNEL)
#if defined(X86_FEATURE_OSXSAVE)
has_osxsave = !!boot_cpu_has(X86_FEATURE_OSXSAVE);
#else
has_osxsave = B_FALSE;
#endif
#elif !defined(_KERNEL)
has_osxsave = __cpuid_has_osxsave();
#endif
if (!has_osxsave)
return (B_FALSE);
xcr0 = xgetbv(0);
return ((xcr0 & state) == state);
}
#define _XSTATE_SSE_AVX (0x2 | 0x4)
#define _XSTATE_AVX512 (0xE0 | _XSTATE_SSE_AVX)
#define __ymm_enabled() __simd_state_enabled(_XSTATE_SSE_AVX)
#define __zmm_enabled() __simd_state_enabled(_XSTATE_AVX512)
/*
* Check if SSE instruction set is available
*/
static inline boolean_t
zfs_sse_available(void)
{
#if defined(_KERNEL)
return (!!boot_cpu_has(X86_FEATURE_XMM));
#elif !defined(_KERNEL)
return (__cpuid_has_sse());
#endif
}
/*
* Check if SSE2 instruction set is available
*/
static inline boolean_t
zfs_sse2_available(void)
{
#if defined(_KERNEL)
return (!!boot_cpu_has(X86_FEATURE_XMM2));
#elif !defined(_KERNEL)
return (__cpuid_has_sse2());
#endif
}
/*
* Check if SSE3 instruction set is available
*/
static inline boolean_t
zfs_sse3_available(void)
{
#if defined(_KERNEL)
return (!!boot_cpu_has(X86_FEATURE_XMM3));
#elif !defined(_KERNEL)
return (__cpuid_has_sse3());
#endif
}
/*
* Check if SSSE3 instruction set is available
*/
static inline boolean_t
zfs_ssse3_available(void)
{
#if defined(_KERNEL)
return (!!boot_cpu_has(X86_FEATURE_SSSE3));
#elif !defined(_KERNEL)
return (__cpuid_has_ssse3());
#endif
}
/*
* Check if SSE4.1 instruction set is available
*/
static inline boolean_t
zfs_sse4_1_available(void)
{
#if defined(_KERNEL)
return (!!boot_cpu_has(X86_FEATURE_XMM4_1));
#elif !defined(_KERNEL)
return (__cpuid_has_sse4_1());
#endif
}
/*
* Check if SSE4.2 instruction set is available
*/
static inline boolean_t
zfs_sse4_2_available(void)
{
#if defined(_KERNEL)
return (!!boot_cpu_has(X86_FEATURE_XMM4_2));
#elif !defined(_KERNEL)
return (__cpuid_has_sse4_2());
#endif
}
/*
* Check if AVX instruction set is available
*/
static inline boolean_t
zfs_avx_available(void)
{
boolean_t has_avx;
#if defined(_KERNEL)
has_avx = !!boot_cpu_has(X86_FEATURE_AVX);
#elif !defined(_KERNEL)
has_avx = __cpuid_has_avx();
#endif
return (has_avx && __ymm_enabled());
}
/*
* Check if AVX2 instruction set is available
*/
static inline boolean_t
zfs_avx2_available(void)
{
boolean_t has_avx2;
#if defined(_KERNEL)
has_avx2 = !!boot_cpu_has(X86_FEATURE_AVX2);
#elif !defined(_KERNEL)
has_avx2 = __cpuid_has_avx2();
#endif
return (has_avx2 && __ymm_enabled());
}
/*
* Check if BMI1 instruction set is available
*/
static inline boolean_t
zfs_bmi1_available(void)
{
#if defined(_KERNEL)
#if defined(X86_FEATURE_BMI1)
return (!!boot_cpu_has(X86_FEATURE_BMI1));
#else
return (B_FALSE);
#endif
#elif !defined(_KERNEL)
return (__cpuid_has_bmi1());
#endif
}
/*
* Check if BMI2 instruction set is available
*/
static inline boolean_t
zfs_bmi2_available(void)
{
#if defined(_KERNEL)
#if defined(X86_FEATURE_BMI2)
return (!!boot_cpu_has(X86_FEATURE_BMI2));
#else
return (B_FALSE);
#endif
#elif !defined(_KERNEL)
return (__cpuid_has_bmi2());
#endif
}
/*
* Check if AES instruction set is available
*/
static inline boolean_t
zfs_aes_available(void)
{
#if defined(_KERNEL)
#if defined(X86_FEATURE_AES)
return (!!boot_cpu_has(X86_FEATURE_AES));
#else
return (B_FALSE);
#endif
#elif !defined(_KERNEL)
return (__cpuid_has_aes());
#endif
}
/*
* Check if PCLMULQDQ instruction set is available
*/
static inline boolean_t
zfs_pclmulqdq_available(void)
{
#if defined(_KERNEL)
#if defined(X86_FEATURE_PCLMULQDQ)
return (!!boot_cpu_has(X86_FEATURE_PCLMULQDQ));
#else
return (B_FALSE);
#endif
#elif !defined(_KERNEL)
return (__cpuid_has_pclmulqdq());
#endif
}
/*
* AVX-512 family of instruction sets:
*
* AVX512F Foundation
* AVX512CD Conflict Detection Instructions
* AVX512ER Exponential and Reciprocal Instructions
* AVX512PF Prefetch Instructions
*
* AVX512BW Byte and Word Instructions
* AVX512DQ Double-word and Quadword Instructions
* AVX512VL Vector Length Extensions
*
* AVX512IFMA Integer Fused Multiply Add (Not supported by kernel 4.4)
* AVX512VBMI Vector Byte Manipulation Instructions
*/
/* Check if AVX512F instruction set is available */
static inline boolean_t
zfs_avx512f_available(void)
{
boolean_t has_avx512 = B_FALSE;
#if defined(_KERNEL)
#if defined(X86_FEATURE_AVX512F)
has_avx512 = !!boot_cpu_has(X86_FEATURE_AVX512F);
#else
has_avx512 = B_FALSE;
#endif
#elif !defined(_KERNEL)
has_avx512 = __cpuid_has_avx512f();
#endif
return (has_avx512 && __zmm_enabled());
}
/* Check if AVX512CD instruction set is available */
static inline boolean_t
zfs_avx512cd_available(void)
{
boolean_t has_avx512 = B_FALSE;
#if defined(_KERNEL)
#if defined(X86_FEATURE_AVX512CD)
has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
boot_cpu_has(X86_FEATURE_AVX512CD);
#else
has_avx512 = B_FALSE;
#endif
#elif !defined(_KERNEL)
has_avx512 = __cpuid_has_avx512cd();
#endif
return (has_avx512 && __zmm_enabled());
}
/* Check if AVX512ER instruction set is available */
static inline boolean_t
zfs_avx512er_available(void)
{
boolean_t has_avx512 = B_FALSE;
#if defined(_KERNEL)
#if defined(X86_FEATURE_AVX512ER)
has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
boot_cpu_has(X86_FEATURE_AVX512ER);
#else
has_avx512 = B_FALSE;
#endif
#elif !defined(_KERNEL)
has_avx512 = __cpuid_has_avx512er();
#endif
return (has_avx512 && __zmm_enabled());
}
/* Check if AVX512PF instruction set is available */
static inline boolean_t
zfs_avx512pf_available(void)
{
boolean_t has_avx512 = B_FALSE;
#if defined(_KERNEL)
#if defined(X86_FEATURE_AVX512PF)
has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
boot_cpu_has(X86_FEATURE_AVX512PF);
#else
has_avx512 = B_FALSE;
#endif
#elif !defined(_KERNEL)
has_avx512 = __cpuid_has_avx512pf();
#endif
return (has_avx512 && __zmm_enabled());
}
/* Check if AVX512BW instruction set is available */
static inline boolean_t
zfs_avx512bw_available(void)
{
boolean_t has_avx512 = B_FALSE;
#if defined(_KERNEL)
#if defined(X86_FEATURE_AVX512BW)
has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
boot_cpu_has(X86_FEATURE_AVX512BW);
#else
has_avx512 = B_FALSE;
#endif
#elif !defined(_KERNEL)
has_avx512 = __cpuid_has_avx512bw();
#endif
return (has_avx512 && __zmm_enabled());
}
/* Check if AVX512DQ instruction set is available */
static inline boolean_t
zfs_avx512dq_available(void)
{
boolean_t has_avx512 = B_FALSE;
#if defined(_KERNEL)
#if defined(X86_FEATURE_AVX512DQ)
has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
boot_cpu_has(X86_FEATURE_AVX512DQ);
#else
has_avx512 = B_FALSE;
#endif
#elif !defined(_KERNEL)
has_avx512 = __cpuid_has_avx512dq();
#endif
return (has_avx512 && __zmm_enabled());
}
/* Check if AVX512VL instruction set is available */
static inline boolean_t
zfs_avx512vl_available(void)
{
boolean_t has_avx512 = B_FALSE;
#if defined(_KERNEL)
#if defined(X86_FEATURE_AVX512VL)
has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
boot_cpu_has(X86_FEATURE_AVX512VL);
#else
has_avx512 = B_FALSE;
#endif
#elif !defined(_KERNEL)
has_avx512 = __cpuid_has_avx512vl();
#endif
return (has_avx512 && __zmm_enabled());
}
/* Check if AVX512IFMA instruction set is available */
static inline boolean_t
zfs_avx512ifma_available(void)
{
boolean_t has_avx512 = B_FALSE;
#if defined(_KERNEL)
#if defined(X86_FEATURE_AVX512IFMA)
has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
boot_cpu_has(X86_FEATURE_AVX512IFMA);
#else
has_avx512 = B_FALSE;
#endif
#elif !defined(_KERNEL)
has_avx512 = __cpuid_has_avx512ifma();
#endif
return (has_avx512 && __zmm_enabled());
}
/* Check if AVX512VBMI instruction set is available */
static inline boolean_t
zfs_avx512vbmi_available(void)
{
boolean_t has_avx512 = B_FALSE;
#if defined(_KERNEL)
#if defined(X86_FEATURE_AVX512VBMI)
has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
boot_cpu_has(X86_FEATURE_AVX512VBMI);
#else
has_avx512 = B_FALSE;
#endif
#elif !defined(_KERNEL)
has_avx512 = __cpuid_has_avx512f() &&
__cpuid_has_avx512vbmi();
#endif
return (has_avx512 && __zmm_enabled());
}
#endif /* defined(__x86) */
#endif /* _SIMD_X86_H */