Linux 5.0 compat: SIMD compatibility

Restore the SIMD optimization for 4.19.38 LTS, 4.14.120 LTS,
and 5.0 and newer kernels.

This commit squashes the following commits from master in to
a single commit which can be applied to 0.8.2.

10fa2545 - Linux 4.14, 4.19, 5.0+ compat: SIMD save/restore
b88ca2ac - Enable SIMD for encryption
095b5412 - Fix CONFIG_X86_DEBUG_FPU build failure
e5db3134 - Linux 5.0 compat: SIMD compatibility

Reviewed-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
TEST_ZIMPORT_SKIP="yes"
This commit is contained in:
Brian Behlendorf
2019-07-12 09:31:20 -07:00
committed by Tony Hutter
parent 988b040476
commit 62c034f6d4
30 changed files with 548 additions and 206 deletions
+19 -12
View File
@@ -27,6 +27,7 @@
#include <sys/crypto/spi.h>
#include <modes/modes.h>
#include <aes/aes_impl.h>
#include <linux/simd.h>
/*
* Initialize AES encryption and decryption key schedules.
@@ -40,9 +41,9 @@
void
aes_init_keysched(const uint8_t *cipherKey, uint_t keyBits, void *keysched)
{
aes_impl_ops_t *ops = aes_impl_get_ops();
aes_key_t *newbie = keysched;
uint_t keysize, i, j;
const aes_impl_ops_t *ops = aes_impl_get_ops();
aes_key_t *newbie = keysched;
uint_t keysize, i, j;
union {
uint64_t ka64[4];
uint32_t ka32[8];
@@ -252,12 +253,17 @@ static size_t aes_supp_impl_cnt = 0;
static aes_impl_ops_t *aes_supp_impl[ARRAY_SIZE(aes_all_impl)];
/*
* Selects the aes operations for encrypt/decrypt/key setup
* Returns the AES operations for encrypt/decrypt/key setup. When a
* SIMD implementation is not allowed in the current context, then
* fallback to the fastest generic implementation.
*/
aes_impl_ops_t *
aes_impl_get_ops()
const aes_impl_ops_t *
aes_impl_get_ops(void)
{
aes_impl_ops_t *ops = NULL;
if (!kfpu_allowed())
return (&aes_generic_impl);
const aes_impl_ops_t *ops = NULL;
const uint32_t impl = AES_IMPL_READ(icp_aes_impl);
switch (impl) {
@@ -266,15 +272,13 @@ aes_impl_get_ops()
ops = &aes_fastest_impl;
break;
case IMPL_CYCLE:
{
/* Cycle through supported implementations */
ASSERT(aes_impl_initialized);
ASSERT3U(aes_supp_impl_cnt, >, 0);
/* Cycle through supported implementations */
static size_t cycle_impl_idx = 0;
size_t idx = (++cycle_impl_idx) % aes_supp_impl_cnt;
ops = aes_supp_impl[idx];
}
break;
break;
default:
ASSERT3U(impl, <, aes_supp_impl_cnt);
ASSERT3U(aes_supp_impl_cnt, >, 0);
@@ -288,13 +292,16 @@ aes_impl_get_ops()
return (ops);
}
/*
* Initialize all supported implementations.
*/
void
aes_impl_init(void)
{
aes_impl_ops_t *curr_impl;
int i, c;
/* move supported impl into aes_supp_impls */
/* Move supported implementations into aes_supp_impls */
for (i = 0, c = 0; i < ARRAY_SIZE(aes_all_impl); i++) {
curr_impl = (aes_impl_ops_t *)aes_all_impl[i];
+1 -1
View File
@@ -108,7 +108,7 @@ aes_aesni_decrypt(const uint32_t rk[], int Nr, const uint32_t ct[4],
static boolean_t
aes_aesni_will_work(void)
{
return (zfs_aes_available());
return (kfpu_allowed() && zfs_aes_available());
}
const aes_impl_ops_t aes_aesni_impl = {
+24 -14
View File
@@ -29,6 +29,7 @@
#include <sys/crypto/impl.h>
#include <sys/byteorder.h>
#include <modes/gcm_impl.h>
#include <linux/simd.h>
#define GHASH(c, d, t, o) \
xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \
@@ -46,7 +47,7 @@ gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
void (*copy_block)(uint8_t *, uint8_t *),
void (*xor_block)(uint8_t *, uint8_t *))
{
gcm_impl_ops_t *gops;
const gcm_impl_ops_t *gops;
size_t remainder = length;
size_t need = 0;
uint8_t *datap = (uint8_t *)data;
@@ -168,7 +169,7 @@ gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
void (*copy_block)(uint8_t *, uint8_t *),
void (*xor_block)(uint8_t *, uint8_t *))
{
gcm_impl_ops_t *gops;
const gcm_impl_ops_t *gops;
uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
uint8_t *ghash, *macp = NULL;
int i, rv;
@@ -320,7 +321,7 @@ gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
void (*xor_block)(uint8_t *, uint8_t *))
{
gcm_impl_ops_t *gops;
const gcm_impl_ops_t *gops;
size_t pt_len;
size_t remainder;
uint8_t *ghash;
@@ -427,7 +428,7 @@ gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len,
void (*copy_block)(uint8_t *, uint8_t *),
void (*xor_block)(uint8_t *, uint8_t *))
{
gcm_impl_ops_t *gops;
const gcm_impl_ops_t *gops;
uint8_t *cb;
ulong_t remainder = iv_len;
ulong_t processed = 0;
@@ -481,7 +482,7 @@ gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
void (*copy_block)(uint8_t *, uint8_t *),
void (*xor_block)(uint8_t *, uint8_t *))
{
gcm_impl_ops_t *gops;
const gcm_impl_ops_t *gops;
uint8_t *ghash, *datap, *authp;
size_t remainder, processed;
@@ -660,12 +661,17 @@ static size_t gcm_supp_impl_cnt = 0;
static gcm_impl_ops_t *gcm_supp_impl[ARRAY_SIZE(gcm_all_impl)];
/*
* Selects the gcm operation
* Returns the GCM operations for encrypt/decrypt/key setup. When a
* SIMD implementation is not allowed in the current context, then
* fallback to the fastest generic implementation.
*/
gcm_impl_ops_t *
const gcm_impl_ops_t *
gcm_impl_get_ops()
{
gcm_impl_ops_t *ops = NULL;
if (!kfpu_allowed())
return (&gcm_generic_impl);
const gcm_impl_ops_t *ops = NULL;
const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl);
switch (impl) {
@@ -674,15 +680,13 @@ gcm_impl_get_ops()
ops = &gcm_fastest_impl;
break;
case IMPL_CYCLE:
{
/* Cycle through supported implementations */
ASSERT(gcm_impl_initialized);
ASSERT3U(gcm_supp_impl_cnt, >, 0);
/* Cycle through supported implementations */
static size_t cycle_impl_idx = 0;
size_t idx = (++cycle_impl_idx) % gcm_supp_impl_cnt;
ops = gcm_supp_impl[idx];
}
break;
break;
default:
ASSERT3U(impl, <, gcm_supp_impl_cnt);
ASSERT3U(gcm_supp_impl_cnt, >, 0);
@@ -696,13 +700,16 @@ gcm_impl_get_ops()
return (ops);
}
/*
* Initialize all supported implementations.
*/
void
gcm_impl_init(void)
{
gcm_impl_ops_t *curr_impl;
int i, c;
/* move supported impl into aes_supp_impls */
/* Move supported implementations into gcm_supp_impls */
for (i = 0, c = 0; i < ARRAY_SIZE(gcm_all_impl); i++) {
curr_impl = (gcm_impl_ops_t *)gcm_all_impl[i];
@@ -711,7 +718,10 @@ gcm_impl_init(void)
}
gcm_supp_impl_cnt = c;
/* set fastest implementation. assume hardware accelerated is fastest */
/*
* Set the fastest implementation given the assumption that the
* hardware accelerated version is the fastest.
*/
#if defined(__x86_64) && defined(HAVE_PCLMULQDQ)
if (gcm_pclmulqdq_impl.is_supported()) {
memcpy(&gcm_fastest_impl, &gcm_pclmulqdq_impl,
+1 -1
View File
@@ -52,7 +52,7 @@ gcm_pclmulqdq_mul(uint64_t *x_in, uint64_t *y, uint64_t *res)
static boolean_t
gcm_pclmulqdq_will_work(void)
{
return (zfs_pclmulqdq_available());
return (kfpu_allowed() && zfs_pclmulqdq_available());
}
const gcm_impl_ops_t gcm_pclmulqdq_impl = {
+2 -2
View File
@@ -201,9 +201,9 @@ extern const aes_impl_ops_t aes_aesni_impl;
void aes_impl_init(void);
/*
* Get selected aes implementation
* Returns optimal allowed AES implementation
*/
struct aes_impl_ops *aes_impl_get_ops(void);
const struct aes_impl_ops *aes_impl_get_ops(void);
#ifdef __cplusplus
}
+2 -2
View File
@@ -64,9 +64,9 @@ extern const gcm_impl_ops_t gcm_pclmulqdq_impl;
void gcm_impl_init(void);
/*
* Get selected aes implementation
* Returns optimal allowed GCM implementation
*/
struct gcm_impl_ops *gcm_impl_get_ops(void);
const struct gcm_impl_ops *gcm_impl_get_ops(void);
#ifdef __cplusplus
}
+1 -1
View File
@@ -206,7 +206,7 @@ aes_mod_init(void)
{
int ret;
/* find fastest implementations and set any requested implementations */
/* Determine the fastest available implementation. */
aes_impl_init();
gcm_impl_init();
+46 -30
View File
@@ -140,6 +140,7 @@
#include <sys/zio_checksum.h>
#include <sys/zfs_context.h>
#include <zfs_fletcher.h>
#include <linux/simd.h>
#define FLETCHER_MIN_SIMD_SIZE 64
@@ -205,21 +206,19 @@ static struct fletcher_4_impl_selector {
const char *fis_name;
uint32_t fis_sel;
} fletcher_4_impl_selectors[] = {
#if !defined(_KERNEL)
{ "cycle", IMPL_CYCLE },
#endif
{ "fastest", IMPL_FASTEST },
{ "scalar", IMPL_SCALAR }
};
#if defined(_KERNEL)
static kstat_t *fletcher_4_kstat;
#endif
static struct fletcher_4_kstat {
uint64_t native;
uint64_t byteswap;
} fletcher_4_stat_data[ARRAY_SIZE(fletcher_4_impls) + 1];
#endif
/* Indicate that benchmark has been completed */
static boolean_t fletcher_4_initialized = B_FALSE;
@@ -408,32 +407,36 @@ fletcher_4_impl_set(const char *val)
return (err);
}
/*
* Returns the Fletcher 4 operations for checksums. When a SIMD
* implementation is not allowed in the current context, then fallback
* to the fastest generic implementation.
*/
static inline const fletcher_4_ops_t *
fletcher_4_impl_get(void)
{
fletcher_4_ops_t *ops = NULL;
const uint32_t impl = IMPL_READ(fletcher_4_impl_chosen);
if (!kfpu_allowed())
return (&fletcher_4_superscalar4_ops);
const fletcher_4_ops_t *ops = NULL;
uint32_t impl = IMPL_READ(fletcher_4_impl_chosen);
switch (impl) {
case IMPL_FASTEST:
ASSERT(fletcher_4_initialized);
ops = &fletcher_4_fastest_impl;
break;
#if !defined(_KERNEL)
case IMPL_CYCLE: {
case IMPL_CYCLE:
/* Cycle through supported implementations */
ASSERT(fletcher_4_initialized);
ASSERT3U(fletcher_4_supp_impls_cnt, >, 0);
static uint32_t cycle_count = 0;
uint32_t idx = (++cycle_count) % fletcher_4_supp_impls_cnt;
ops = fletcher_4_supp_impls[idx];
}
break;
#endif
break;
default:
ASSERT3U(fletcher_4_supp_impls_cnt, >, 0);
ASSERT3U(impl, <, fletcher_4_supp_impls_cnt);
ops = fletcher_4_supp_impls[impl];
break;
}
@@ -659,6 +662,7 @@ fletcher_4_kstat_addr(kstat_t *ksp, loff_t n)
typedef void fletcher_checksum_func_t(const void *, uint64_t, const void *,
zio_cksum_t *);
#if defined(_KERNEL)
static void
fletcher_4_benchmark_impl(boolean_t native, char *data, uint64_t data_size)
{
@@ -716,16 +720,18 @@ fletcher_4_benchmark_impl(boolean_t native, char *data, uint64_t data_size)
/* restore original selection */
atomic_swap_32(&fletcher_4_impl_chosen, sel_save);
}
#endif /* _KERNEL */
void
fletcher_4_init(void)
/*
* Initialize and benchmark all supported implementations.
*/
static void
fletcher_4_benchmark(void)
{
static const size_t data_size = 1 << SPA_OLD_MAXBLOCKSHIFT; /* 128kiB */
fletcher_4_ops_t *curr_impl;
char *databuf;
int i, c;
/* move supported impl into fletcher_4_supp_impls */
/* Move supported implementations into fletcher_4_supp_impls */
for (i = 0, c = 0; i < ARRAY_SIZE(fletcher_4_impls); i++) {
curr_impl = (fletcher_4_ops_t *)fletcher_4_impls[i];
@@ -735,19 +741,10 @@ fletcher_4_init(void)
membar_producer(); /* complete fletcher_4_supp_impls[] init */
fletcher_4_supp_impls_cnt = c; /* number of supported impl */
#if !defined(_KERNEL)
/* Skip benchmarking and use last implementation as fastest */
memcpy(&fletcher_4_fastest_impl,
fletcher_4_supp_impls[fletcher_4_supp_impls_cnt-1],
sizeof (fletcher_4_fastest_impl));
fletcher_4_fastest_impl.name = "fastest";
membar_producer();
#if defined(_KERNEL)
static const size_t data_size = 1 << SPA_OLD_MAXBLOCKSHIFT; /* 128kiB */
char *databuf = vmem_alloc(data_size, KM_SLEEP);
fletcher_4_initialized = B_TRUE;
return;
#endif
/* Benchmark all supported implementations */
databuf = vmem_alloc(data_size, KM_SLEEP);
for (i = 0; i < data_size / sizeof (uint64_t); i++)
((uint64_t *)databuf)[i] = (uintptr_t)(databuf+i); /* warm-up */
@@ -755,9 +752,28 @@ fletcher_4_init(void)
fletcher_4_benchmark_impl(B_TRUE, databuf, data_size);
vmem_free(databuf, data_size);
#else
/*
* Skip the benchmark in user space to avoid impacting libzpool
* consumers (zdb, zhack, zinject, ztest). The last implementation
* is assumed to be the fastest and used by default.
*/
memcpy(&fletcher_4_fastest_impl,
fletcher_4_supp_impls[fletcher_4_supp_impls_cnt - 1],
sizeof (fletcher_4_fastest_impl));
fletcher_4_fastest_impl.name = "fastest";
membar_producer();
#endif /* _KERNEL */
}
void
fletcher_4_init(void)
{
/* Determine the fastest available implementation. */
fletcher_4_benchmark();
#if defined(_KERNEL)
/* install kstats for all implementations */
/* Install kstats for all implementations */
fletcher_4_kstat = kstat_create("zfs", 0, "fletcher_4_bench", "misc",
KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
if (fletcher_4_kstat != NULL) {
+1 -1
View File
@@ -198,7 +198,7 @@ unsigned char SRC __attribute__((vector_size(16)));
static boolean_t fletcher_4_aarch64_neon_valid(void)
{
return (B_TRUE);
return (kfpu_allowed());
}
const fletcher_4_ops_t fletcher_4_aarch64_neon_ops = {
+1 -1
View File
@@ -157,7 +157,7 @@ STACK_FRAME_NON_STANDARD(fletcher_4_avx512f_byteswap);
static boolean_t
fletcher_4_avx512f_valid(void)
{
return (zfs_avx512f_available());
return (kfpu_allowed() && zfs_avx512f_available());
}
const fletcher_4_ops_t fletcher_4_avx512f_ops = {
+1 -1
View File
@@ -156,7 +156,7 @@ fletcher_4_avx2_byteswap(fletcher_4_ctx_t *ctx, const void *buf, uint64_t size)
static boolean_t fletcher_4_avx2_valid(void)
{
return (zfs_avx_available() && zfs_avx2_available());
return (kfpu_allowed() && zfs_avx_available() && zfs_avx2_available());
}
const fletcher_4_ops_t fletcher_4_avx2_ops = {
+3 -2
View File
@@ -157,7 +157,7 @@ fletcher_4_sse2_byteswap(fletcher_4_ctx_t *ctx, const void *buf, uint64_t size)
static boolean_t fletcher_4_sse2_valid(void)
{
return (zfs_sse2_available());
return (kfpu_allowed() && zfs_sse2_available());
}
const fletcher_4_ops_t fletcher_4_sse2_ops = {
@@ -214,7 +214,8 @@ fletcher_4_ssse3_byteswap(fletcher_4_ctx_t *ctx, const void *buf, uint64_t size)
static boolean_t fletcher_4_ssse3_valid(void)
{
return (zfs_sse2_available() && zfs_ssse3_available());
return (kfpu_allowed() && zfs_sse2_available() &&
zfs_ssse3_available());
}
const fletcher_4_ops_t fletcher_4_ssse3_ops = {
+14
View File
@@ -853,10 +853,23 @@ zfs_prop_align_right(zfs_prop_t prop)
#endif
#if defined(_KERNEL)
#include <linux/simd.h>
#if defined(HAVE_KERNEL_FPU_INTERNAL)
union fpregs_state **zfs_kfpu_fpregs;
EXPORT_SYMBOL(zfs_kfpu_fpregs);
#endif /* HAVE_KERNEL_FPU_INTERNAL */
static int __init
zcommon_init(void)
{
int error = kfpu_init();
if (error)
return (error);
fletcher_4_init();
return (0);
}
@@ -864,6 +877,7 @@ static void __exit
zcommon_fini(void)
{
fletcher_4_fini();
kfpu_fini();
}
module_init(zcommon_init);
+53 -40
View File
@@ -27,9 +27,9 @@
#include <sys/zio.h>
#include <sys/debug.h>
#include <sys/zfs_debug.h>
#include <sys/vdev_raidz.h>
#include <sys/vdev_raidz_impl.h>
#include <linux/simd.h>
extern boolean_t raidz_will_scalar_work(void);
@@ -87,6 +87,7 @@ static uint32_t user_sel_impl = IMPL_FASTEST;
static size_t raidz_supp_impl_cnt = 0;
static raidz_impl_ops_t *raidz_supp_impl[ARRAY_SIZE(raidz_all_maths)];
#if defined(_KERNEL)
/*
* kstats values for supported implementations
* Values represent per disk throughput of 8 disk+parity raidz vdev [B/s]
@@ -95,14 +96,19 @@ static raidz_impl_kstat_t raidz_impl_kstats[ARRAY_SIZE(raidz_all_maths) + 1];
/* kstat for benchmarked implementations */
static kstat_t *raidz_math_kstat = NULL;
#endif
/*
* Selects the raidz operation for raidz_map
* If rm_ops is set to NULL original raidz implementation will be used
* Returns the RAIDZ operations for raidz_map() parity calculations. When
* a SIMD implementation is not allowed in the current context, then fallback
* to the fastest generic implementation.
*/
raidz_impl_ops_t *
vdev_raidz_math_get_ops()
const raidz_impl_ops_t *
vdev_raidz_math_get_ops(void)
{
if (!kfpu_allowed())
return (&vdev_raidz_scalar_impl);
raidz_impl_ops_t *ops = NULL;
const uint32_t impl = RAIDZ_IMPL_READ(zfs_vdev_raidz_impl);
@@ -111,18 +117,14 @@ vdev_raidz_math_get_ops()
ASSERT(raidz_math_initialized);
ops = &vdev_raidz_fastest_impl;
break;
#if !defined(_KERNEL)
case IMPL_CYCLE:
{
/* Cycle through all supported implementations */
ASSERT(raidz_math_initialized);
ASSERT3U(raidz_supp_impl_cnt, >, 0);
/* Cycle through all supported implementations */
static size_t cycle_impl_idx = 0;
size_t idx = (++cycle_impl_idx) % raidz_supp_impl_cnt;
ops = raidz_supp_impl[idx];
}
break;
#endif
break;
case IMPL_ORIGINAL:
ops = (raidz_impl_ops_t *)&vdev_raidz_original_impl;
break;
@@ -273,6 +275,8 @@ const char *raidz_rec_name[] = {
"rec_pq", "rec_pr", "rec_qr", "rec_pqr"
};
#if defined(_KERNEL)
#define RAIDZ_KSTAT_LINE_LEN (17 + 10*12 + 1)
static int
@@ -435,21 +439,21 @@ benchmark_raidz_impl(raidz_map_t *bench_rm, const int fn, benchmark_fn bench_fn)
}
}
}
#endif
void
vdev_raidz_math_init(void)
/*
* Initialize and benchmark all supported implementations.
*/
static void
benchmark_raidz(void)
{
raidz_impl_ops_t *curr_impl;
zio_t *bench_zio = NULL;
raidz_map_t *bench_rm = NULL;
uint64_t bench_parity;
int i, c, fn;
int i, c;
/* move supported impl into raidz_supp_impl */
/* Move supported impl into raidz_supp_impl */
for (i = 0, c = 0; i < ARRAY_SIZE(raidz_all_maths); i++) {
curr_impl = (raidz_impl_ops_t *)raidz_all_maths[i];
/* initialize impl */
if (curr_impl->init)
curr_impl->init();
@@ -459,18 +463,10 @@ vdev_raidz_math_init(void)
membar_producer(); /* complete raidz_supp_impl[] init */
raidz_supp_impl_cnt = c; /* number of supported impl */
#if !defined(_KERNEL)
/* Skip benchmarking and use last implementation as fastest */
memcpy(&vdev_raidz_fastest_impl, raidz_supp_impl[raidz_supp_impl_cnt-1],
sizeof (vdev_raidz_fastest_impl));
strcpy(vdev_raidz_fastest_impl.name, "fastest");
raidz_math_initialized = B_TRUE;
/* Use 'cycle' math selection method for userspace */
VERIFY0(vdev_raidz_impl_set("cycle"));
return;
#endif
#if defined(_KERNEL)
zio_t *bench_zio = NULL;
raidz_map_t *bench_rm = NULL;
uint64_t bench_parity;
/* Fake a zio and run the benchmark on a warmed up buffer */
bench_zio = kmem_zalloc(sizeof (zio_t), KM_SLEEP);
@@ -480,7 +476,7 @@ vdev_raidz_math_init(void)
memset(abd_to_buf(bench_zio->io_abd), 0xAA, BENCH_ZIO_SIZE);
/* Benchmark parity generation methods */
for (fn = 0; fn < RAIDZ_GEN_NUM; fn++) {
for (int fn = 0; fn < RAIDZ_GEN_NUM; fn++) {
bench_parity = fn + 1;
/* New raidz_map is needed for each generate_p/q/r */
bench_rm = vdev_raidz_map_alloc(bench_zio, SPA_MINBLOCKSHIFT,
@@ -495,7 +491,7 @@ vdev_raidz_math_init(void)
bench_rm = vdev_raidz_map_alloc(bench_zio, SPA_MINBLOCKSHIFT,
BENCH_COLS, PARITY_PQR);
for (fn = 0; fn < RAIDZ_REC_NUM; fn++)
for (int fn = 0; fn < RAIDZ_REC_NUM; fn++)
benchmark_raidz_impl(bench_rm, fn, benchmark_rec_impl);
vdev_raidz_map_free(bench_rm);
@@ -503,11 +499,29 @@ vdev_raidz_math_init(void)
/* cleanup the bench zio */
abd_free(bench_zio->io_abd);
kmem_free(bench_zio, sizeof (zio_t));
#else
/*
* Skip the benchmark in user space to avoid impacting libzpool
* consumers (zdb, zhack, zinject, ztest). The last implementation
* is assumed to be the fastest and used by default.
*/
memcpy(&vdev_raidz_fastest_impl,
raidz_supp_impl[raidz_supp_impl_cnt - 1],
sizeof (vdev_raidz_fastest_impl));
strcpy(vdev_raidz_fastest_impl.name, "fastest");
#endif /* _KERNEL */
}
/* install kstats for all impl */
void
vdev_raidz_math_init(void)
{
/* Determine the fastest available implementation. */
benchmark_raidz();
#if defined(_KERNEL)
/* Install kstats for all implementations */
raidz_math_kstat = kstat_create("zfs", 0, "vdev_raidz_bench", "misc",
KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
if (raidz_math_kstat != NULL) {
raidz_math_kstat->ks_data = NULL;
raidz_math_kstat->ks_ndata = UINT32_MAX;
@@ -517,6 +531,7 @@ vdev_raidz_math_init(void)
raidz_math_kstat_addr);
kstat_install(raidz_math_kstat);
}
#endif
/* Finish initialization */
atomic_swap_32(&zfs_vdev_raidz_impl, user_sel_impl);
@@ -527,15 +542,15 @@ void
vdev_raidz_math_fini(void)
{
raidz_impl_ops_t const *curr_impl;
int i;
#if defined(_KERNEL)
if (raidz_math_kstat != NULL) {
kstat_delete(raidz_math_kstat);
raidz_math_kstat = NULL;
}
#endif
/* fini impl */
for (i = 0; i < ARRAY_SIZE(raidz_all_maths); i++) {
for (int i = 0; i < ARRAY_SIZE(raidz_all_maths); i++) {
curr_impl = raidz_all_maths[i];
if (curr_impl->fini)
curr_impl->fini();
@@ -546,9 +561,7 @@ static const struct {
char *name;
uint32_t sel;
} math_impl_opts[] = {
#if !defined(_KERNEL)
{ "cycle", IMPL_CYCLE },
#endif
{ "fastest", IMPL_FASTEST },
{ "original", IMPL_ORIGINAL },
{ "scalar", IMPL_SCALAR }
+1 -1
View File
@@ -207,7 +207,7 @@ DEFINE_REC_METHODS(aarch64_neon);
static boolean_t
raidz_will_aarch64_neon_work(void)
{
return (B_TRUE); // __arch64__ requires NEON
return (kfpu_allowed());
}
const raidz_impl_ops_t vdev_raidz_aarch64_neon_impl = {
+1 -1
View File
@@ -217,7 +217,7 @@ DEFINE_REC_METHODS(aarch64_neonx2);
static boolean_t
raidz_will_aarch64_neonx2_work(void)
{
return (B_TRUE); // __arch64__ requires NEON
return (kfpu_allowed());
}
const raidz_impl_ops_t vdev_raidz_aarch64_neonx2_impl = {
+1 -1
View File
@@ -396,7 +396,7 @@ DEFINE_REC_METHODS(avx2);
static boolean_t
raidz_will_avx2_work(void)
{
return (zfs_avx_available() && zfs_avx2_available());
return (kfpu_allowed() && zfs_avx_available() && zfs_avx2_available());
}
const raidz_impl_ops_t vdev_raidz_avx2_impl = {
+2 -3
View File
@@ -393,9 +393,8 @@ DEFINE_REC_METHODS(avx512bw);
static boolean_t
raidz_will_avx512bw_work(void)
{
return (zfs_avx_available() &&
zfs_avx512f_available() &&
zfs_avx512bw_available());
return (kfpu_allowed() && zfs_avx_available() &&
zfs_avx512f_available() && zfs_avx512bw_available());
}
const raidz_impl_ops_t vdev_raidz_avx512bw_impl = {
+2 -3
View File
@@ -470,9 +470,8 @@ DEFINE_REC_METHODS(avx512f);
static boolean_t
raidz_will_avx512f_work(void)
{
return (zfs_avx_available() &&
zfs_avx2_available() &&
zfs_avx512f_available());
return (kfpu_allowed() && zfs_avx_available() &&
zfs_avx2_available() && zfs_avx512f_available());
}
const raidz_impl_ops_t vdev_raidz_avx512f_impl = {
+1 -1
View File
@@ -607,7 +607,7 @@ DEFINE_REC_METHODS(sse2);
static boolean_t
raidz_will_sse2_work(void)
{
return (zfs_sse_available() && zfs_sse2_available());
return (kfpu_allowed() && zfs_sse_available() && zfs_sse2_available());
}
const raidz_impl_ops_t vdev_raidz_sse2_impl = {
+2 -2
View File
@@ -399,8 +399,8 @@ DEFINE_REC_METHODS(ssse3);
static boolean_t
raidz_will_ssse3_work(void)
{
return (zfs_sse_available() && zfs_sse2_available() &&
zfs_ssse3_available());
return (kfpu_allowed() && zfs_sse_available() &&
zfs_sse2_available() && zfs_ssse3_available());
}
const raidz_impl_ops_t vdev_raidz_ssse3_impl = {
+1 -1
View File
@@ -549,12 +549,12 @@ zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t version,
uint64_t guid, uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv,
uint8_t *mac, zio_crypt_key_t *key)
{
int ret;
crypto_mechanism_t mech;
uio_t puio, cuio;
uint64_t aad[3];
iovec_t plain_iovecs[2], cipher_iovecs[3];
uint_t enc_len, keydata_len, aad_len;
int ret;
ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS);
ASSERT3U(cwkey->ck_format, ==, CRYPTO_KEY_RAW);