mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-23 19:04:45 +03:00
Linux 5.0 compat: SIMD compatibility
Restore the SIMD optimization for 4.19.38 LTS, 4.14.120 LTS, and 5.0 and newer kernels. This commit squashes the following commits from master in to a single commit which can be applied to 0.8.2.10fa2545- Linux 4.14, 4.19, 5.0+ compat: SIMD save/restoreb88ca2ac- Enable SIMD for encryption095b5412- Fix CONFIG_X86_DEBUG_FPU build failuree5db3134- Linux 5.0 compat: SIMD compatibility Reviewed-by: Fabian Grünbichler <f.gruenbichler@proxmox.com> Reviewed-by: Tony Hutter <hutter2@llnl.gov> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> TEST_ZIMPORT_SKIP="yes"
This commit is contained in:
committed by
Tony Hutter
parent
988b040476
commit
62c034f6d4
@@ -140,6 +140,7 @@
|
||||
#include <sys/zio_checksum.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <zfs_fletcher.h>
|
||||
#include <linux/simd.h>
|
||||
|
||||
#define FLETCHER_MIN_SIMD_SIZE 64
|
||||
|
||||
@@ -205,21 +206,19 @@ static struct fletcher_4_impl_selector {
|
||||
const char *fis_name;
|
||||
uint32_t fis_sel;
|
||||
} fletcher_4_impl_selectors[] = {
|
||||
#if !defined(_KERNEL)
|
||||
{ "cycle", IMPL_CYCLE },
|
||||
#endif
|
||||
{ "fastest", IMPL_FASTEST },
|
||||
{ "scalar", IMPL_SCALAR }
|
||||
};
|
||||
|
||||
#if defined(_KERNEL)
|
||||
static kstat_t *fletcher_4_kstat;
|
||||
#endif
|
||||
|
||||
static struct fletcher_4_kstat {
|
||||
uint64_t native;
|
||||
uint64_t byteswap;
|
||||
} fletcher_4_stat_data[ARRAY_SIZE(fletcher_4_impls) + 1];
|
||||
#endif
|
||||
|
||||
/* Indicate that benchmark has been completed */
|
||||
static boolean_t fletcher_4_initialized = B_FALSE;
|
||||
@@ -408,32 +407,36 @@ fletcher_4_impl_set(const char *val)
|
||||
return (err);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the Fletcher 4 operations for checksums. When a SIMD
|
||||
* implementation is not allowed in the current context, then fallback
|
||||
* to the fastest generic implementation.
|
||||
*/
|
||||
static inline const fletcher_4_ops_t *
|
||||
fletcher_4_impl_get(void)
|
||||
{
|
||||
fletcher_4_ops_t *ops = NULL;
|
||||
const uint32_t impl = IMPL_READ(fletcher_4_impl_chosen);
|
||||
if (!kfpu_allowed())
|
||||
return (&fletcher_4_superscalar4_ops);
|
||||
|
||||
const fletcher_4_ops_t *ops = NULL;
|
||||
uint32_t impl = IMPL_READ(fletcher_4_impl_chosen);
|
||||
|
||||
switch (impl) {
|
||||
case IMPL_FASTEST:
|
||||
ASSERT(fletcher_4_initialized);
|
||||
ops = &fletcher_4_fastest_impl;
|
||||
break;
|
||||
#if !defined(_KERNEL)
|
||||
case IMPL_CYCLE: {
|
||||
case IMPL_CYCLE:
|
||||
/* Cycle through supported implementations */
|
||||
ASSERT(fletcher_4_initialized);
|
||||
ASSERT3U(fletcher_4_supp_impls_cnt, >, 0);
|
||||
|
||||
static uint32_t cycle_count = 0;
|
||||
uint32_t idx = (++cycle_count) % fletcher_4_supp_impls_cnt;
|
||||
ops = fletcher_4_supp_impls[idx];
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
break;
|
||||
default:
|
||||
ASSERT3U(fletcher_4_supp_impls_cnt, >, 0);
|
||||
ASSERT3U(impl, <, fletcher_4_supp_impls_cnt);
|
||||
|
||||
ops = fletcher_4_supp_impls[impl];
|
||||
break;
|
||||
}
|
||||
@@ -659,6 +662,7 @@ fletcher_4_kstat_addr(kstat_t *ksp, loff_t n)
|
||||
typedef void fletcher_checksum_func_t(const void *, uint64_t, const void *,
|
||||
zio_cksum_t *);
|
||||
|
||||
#if defined(_KERNEL)
|
||||
static void
|
||||
fletcher_4_benchmark_impl(boolean_t native, char *data, uint64_t data_size)
|
||||
{
|
||||
@@ -716,16 +720,18 @@ fletcher_4_benchmark_impl(boolean_t native, char *data, uint64_t data_size)
|
||||
/* restore original selection */
|
||||
atomic_swap_32(&fletcher_4_impl_chosen, sel_save);
|
||||
}
|
||||
#endif /* _KERNEL */
|
||||
|
||||
void
|
||||
fletcher_4_init(void)
|
||||
/*
|
||||
* Initialize and benchmark all supported implementations.
|
||||
*/
|
||||
static void
|
||||
fletcher_4_benchmark(void)
|
||||
{
|
||||
static const size_t data_size = 1 << SPA_OLD_MAXBLOCKSHIFT; /* 128kiB */
|
||||
fletcher_4_ops_t *curr_impl;
|
||||
char *databuf;
|
||||
int i, c;
|
||||
|
||||
/* move supported impl into fletcher_4_supp_impls */
|
||||
/* Move supported implementations into fletcher_4_supp_impls */
|
||||
for (i = 0, c = 0; i < ARRAY_SIZE(fletcher_4_impls); i++) {
|
||||
curr_impl = (fletcher_4_ops_t *)fletcher_4_impls[i];
|
||||
|
||||
@@ -735,19 +741,10 @@ fletcher_4_init(void)
|
||||
membar_producer(); /* complete fletcher_4_supp_impls[] init */
|
||||
fletcher_4_supp_impls_cnt = c; /* number of supported impl */
|
||||
|
||||
#if !defined(_KERNEL)
|
||||
/* Skip benchmarking and use last implementation as fastest */
|
||||
memcpy(&fletcher_4_fastest_impl,
|
||||
fletcher_4_supp_impls[fletcher_4_supp_impls_cnt-1],
|
||||
sizeof (fletcher_4_fastest_impl));
|
||||
fletcher_4_fastest_impl.name = "fastest";
|
||||
membar_producer();
|
||||
#if defined(_KERNEL)
|
||||
static const size_t data_size = 1 << SPA_OLD_MAXBLOCKSHIFT; /* 128kiB */
|
||||
char *databuf = vmem_alloc(data_size, KM_SLEEP);
|
||||
|
||||
fletcher_4_initialized = B_TRUE;
|
||||
return;
|
||||
#endif
|
||||
/* Benchmark all supported implementations */
|
||||
databuf = vmem_alloc(data_size, KM_SLEEP);
|
||||
for (i = 0; i < data_size / sizeof (uint64_t); i++)
|
||||
((uint64_t *)databuf)[i] = (uintptr_t)(databuf+i); /* warm-up */
|
||||
|
||||
@@ -755,9 +752,28 @@ fletcher_4_init(void)
|
||||
fletcher_4_benchmark_impl(B_TRUE, databuf, data_size);
|
||||
|
||||
vmem_free(databuf, data_size);
|
||||
#else
|
||||
/*
|
||||
* Skip the benchmark in user space to avoid impacting libzpool
|
||||
* consumers (zdb, zhack, zinject, ztest). The last implementation
|
||||
* is assumed to be the fastest and used by default.
|
||||
*/
|
||||
memcpy(&fletcher_4_fastest_impl,
|
||||
fletcher_4_supp_impls[fletcher_4_supp_impls_cnt - 1],
|
||||
sizeof (fletcher_4_fastest_impl));
|
||||
fletcher_4_fastest_impl.name = "fastest";
|
||||
membar_producer();
|
||||
#endif /* _KERNEL */
|
||||
}
|
||||
|
||||
void
|
||||
fletcher_4_init(void)
|
||||
{
|
||||
/* Determine the fastest available implementation. */
|
||||
fletcher_4_benchmark();
|
||||
|
||||
#if defined(_KERNEL)
|
||||
/* install kstats for all implementations */
|
||||
/* Install kstats for all implementations */
|
||||
fletcher_4_kstat = kstat_create("zfs", 0, "fletcher_4_bench", "misc",
|
||||
KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
|
||||
if (fletcher_4_kstat != NULL) {
|
||||
|
||||
@@ -198,7 +198,7 @@ unsigned char SRC __attribute__((vector_size(16)));
|
||||
|
||||
static boolean_t fletcher_4_aarch64_neon_valid(void)
|
||||
{
|
||||
return (B_TRUE);
|
||||
return (kfpu_allowed());
|
||||
}
|
||||
|
||||
const fletcher_4_ops_t fletcher_4_aarch64_neon_ops = {
|
||||
|
||||
@@ -157,7 +157,7 @@ STACK_FRAME_NON_STANDARD(fletcher_4_avx512f_byteswap);
|
||||
static boolean_t
|
||||
fletcher_4_avx512f_valid(void)
|
||||
{
|
||||
return (zfs_avx512f_available());
|
||||
return (kfpu_allowed() && zfs_avx512f_available());
|
||||
}
|
||||
|
||||
const fletcher_4_ops_t fletcher_4_avx512f_ops = {
|
||||
|
||||
@@ -156,7 +156,7 @@ fletcher_4_avx2_byteswap(fletcher_4_ctx_t *ctx, const void *buf, uint64_t size)
|
||||
|
||||
static boolean_t fletcher_4_avx2_valid(void)
|
||||
{
|
||||
return (zfs_avx_available() && zfs_avx2_available());
|
||||
return (kfpu_allowed() && zfs_avx_available() && zfs_avx2_available());
|
||||
}
|
||||
|
||||
const fletcher_4_ops_t fletcher_4_avx2_ops = {
|
||||
|
||||
@@ -157,7 +157,7 @@ fletcher_4_sse2_byteswap(fletcher_4_ctx_t *ctx, const void *buf, uint64_t size)
|
||||
|
||||
static boolean_t fletcher_4_sse2_valid(void)
|
||||
{
|
||||
return (zfs_sse2_available());
|
||||
return (kfpu_allowed() && zfs_sse2_available());
|
||||
}
|
||||
|
||||
const fletcher_4_ops_t fletcher_4_sse2_ops = {
|
||||
@@ -214,7 +214,8 @@ fletcher_4_ssse3_byteswap(fletcher_4_ctx_t *ctx, const void *buf, uint64_t size)
|
||||
|
||||
static boolean_t fletcher_4_ssse3_valid(void)
|
||||
{
|
||||
return (zfs_sse2_available() && zfs_ssse3_available());
|
||||
return (kfpu_allowed() && zfs_sse2_available() &&
|
||||
zfs_ssse3_available());
|
||||
}
|
||||
|
||||
const fletcher_4_ops_t fletcher_4_ssse3_ops = {
|
||||
|
||||
@@ -853,10 +853,23 @@ zfs_prop_align_right(zfs_prop_t prop)
|
||||
#endif
|
||||
|
||||
#if defined(_KERNEL)
|
||||
|
||||
#include <linux/simd.h>
|
||||
|
||||
#if defined(HAVE_KERNEL_FPU_INTERNAL)
|
||||
union fpregs_state **zfs_kfpu_fpregs;
|
||||
EXPORT_SYMBOL(zfs_kfpu_fpregs);
|
||||
#endif /* HAVE_KERNEL_FPU_INTERNAL */
|
||||
|
||||
static int __init
|
||||
zcommon_init(void)
|
||||
{
|
||||
int error = kfpu_init();
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
fletcher_4_init();
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
@@ -864,6 +877,7 @@ static void __exit
|
||||
zcommon_fini(void)
|
||||
{
|
||||
fletcher_4_fini();
|
||||
kfpu_fini();
|
||||
}
|
||||
|
||||
module_init(zcommon_init);
|
||||
|
||||
Reference in New Issue
Block a user