mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-23 02:44:41 +03:00
Linux 5.0 compat: SIMD compatibility
Restore the SIMD optimization for 4.19.38 LTS, 4.14.120 LTS, and 5.0 and newer kernels. This commit squashes the following commits from master in to a single commit which can be applied to 0.8.2.10fa2545- Linux 4.14, 4.19, 5.0+ compat: SIMD save/restoreb88ca2ac- Enable SIMD for encryption095b5412- Fix CONFIG_X86_DEBUG_FPU build failuree5db3134- Linux 5.0 compat: SIMD compatibility Reviewed-by: Fabian Grünbichler <f.gruenbichler@proxmox.com> Reviewed-by: Tony Hutter <hutter2@llnl.gov> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> TEST_ZIMPORT_SKIP="yes"
This commit is contained in:
committed by
Tony Hutter
parent
988b040476
commit
62c034f6d4
@@ -27,9 +27,9 @@
|
||||
#include <sys/zio.h>
|
||||
#include <sys/debug.h>
|
||||
#include <sys/zfs_debug.h>
|
||||
|
||||
#include <sys/vdev_raidz.h>
|
||||
#include <sys/vdev_raidz_impl.h>
|
||||
#include <linux/simd.h>
|
||||
|
||||
extern boolean_t raidz_will_scalar_work(void);
|
||||
|
||||
@@ -87,6 +87,7 @@ static uint32_t user_sel_impl = IMPL_FASTEST;
|
||||
static size_t raidz_supp_impl_cnt = 0;
|
||||
static raidz_impl_ops_t *raidz_supp_impl[ARRAY_SIZE(raidz_all_maths)];
|
||||
|
||||
#if defined(_KERNEL)
|
||||
/*
|
||||
* kstats values for supported implementations
|
||||
* Values represent per disk throughput of 8 disk+parity raidz vdev [B/s]
|
||||
@@ -95,14 +96,19 @@ static raidz_impl_kstat_t raidz_impl_kstats[ARRAY_SIZE(raidz_all_maths) + 1];
|
||||
|
||||
/* kstat for benchmarked implementations */
|
||||
static kstat_t *raidz_math_kstat = NULL;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Selects the raidz operation for raidz_map
|
||||
* If rm_ops is set to NULL original raidz implementation will be used
|
||||
* Returns the RAIDZ operations for raidz_map() parity calculations. When
|
||||
* a SIMD implementation is not allowed in the current context, then fallback
|
||||
* to the fastest generic implementation.
|
||||
*/
|
||||
raidz_impl_ops_t *
|
||||
vdev_raidz_math_get_ops()
|
||||
const raidz_impl_ops_t *
|
||||
vdev_raidz_math_get_ops(void)
|
||||
{
|
||||
if (!kfpu_allowed())
|
||||
return (&vdev_raidz_scalar_impl);
|
||||
|
||||
raidz_impl_ops_t *ops = NULL;
|
||||
const uint32_t impl = RAIDZ_IMPL_READ(zfs_vdev_raidz_impl);
|
||||
|
||||
@@ -111,18 +117,14 @@ vdev_raidz_math_get_ops()
|
||||
ASSERT(raidz_math_initialized);
|
||||
ops = &vdev_raidz_fastest_impl;
|
||||
break;
|
||||
#if !defined(_KERNEL)
|
||||
case IMPL_CYCLE:
|
||||
{
|
||||
/* Cycle through all supported implementations */
|
||||
ASSERT(raidz_math_initialized);
|
||||
ASSERT3U(raidz_supp_impl_cnt, >, 0);
|
||||
/* Cycle through all supported implementations */
|
||||
static size_t cycle_impl_idx = 0;
|
||||
size_t idx = (++cycle_impl_idx) % raidz_supp_impl_cnt;
|
||||
ops = raidz_supp_impl[idx];
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
break;
|
||||
case IMPL_ORIGINAL:
|
||||
ops = (raidz_impl_ops_t *)&vdev_raidz_original_impl;
|
||||
break;
|
||||
@@ -273,6 +275,8 @@ const char *raidz_rec_name[] = {
|
||||
"rec_pq", "rec_pr", "rec_qr", "rec_pqr"
|
||||
};
|
||||
|
||||
#if defined(_KERNEL)
|
||||
|
||||
#define RAIDZ_KSTAT_LINE_LEN (17 + 10*12 + 1)
|
||||
|
||||
static int
|
||||
@@ -435,21 +439,21 @@ benchmark_raidz_impl(raidz_map_t *bench_rm, const int fn, benchmark_fn bench_fn)
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void
|
||||
vdev_raidz_math_init(void)
|
||||
/*
|
||||
* Initialize and benchmark all supported implementations.
|
||||
*/
|
||||
static void
|
||||
benchmark_raidz(void)
|
||||
{
|
||||
raidz_impl_ops_t *curr_impl;
|
||||
zio_t *bench_zio = NULL;
|
||||
raidz_map_t *bench_rm = NULL;
|
||||
uint64_t bench_parity;
|
||||
int i, c, fn;
|
||||
int i, c;
|
||||
|
||||
/* move supported impl into raidz_supp_impl */
|
||||
/* Move supported impl into raidz_supp_impl */
|
||||
for (i = 0, c = 0; i < ARRAY_SIZE(raidz_all_maths); i++) {
|
||||
curr_impl = (raidz_impl_ops_t *)raidz_all_maths[i];
|
||||
|
||||
/* initialize impl */
|
||||
if (curr_impl->init)
|
||||
curr_impl->init();
|
||||
|
||||
@@ -459,18 +463,10 @@ vdev_raidz_math_init(void)
|
||||
membar_producer(); /* complete raidz_supp_impl[] init */
|
||||
raidz_supp_impl_cnt = c; /* number of supported impl */
|
||||
|
||||
#if !defined(_KERNEL)
|
||||
/* Skip benchmarking and use last implementation as fastest */
|
||||
memcpy(&vdev_raidz_fastest_impl, raidz_supp_impl[raidz_supp_impl_cnt-1],
|
||||
sizeof (vdev_raidz_fastest_impl));
|
||||
strcpy(vdev_raidz_fastest_impl.name, "fastest");
|
||||
|
||||
raidz_math_initialized = B_TRUE;
|
||||
|
||||
/* Use 'cycle' math selection method for userspace */
|
||||
VERIFY0(vdev_raidz_impl_set("cycle"));
|
||||
return;
|
||||
#endif
|
||||
#if defined(_KERNEL)
|
||||
zio_t *bench_zio = NULL;
|
||||
raidz_map_t *bench_rm = NULL;
|
||||
uint64_t bench_parity;
|
||||
|
||||
/* Fake a zio and run the benchmark on a warmed up buffer */
|
||||
bench_zio = kmem_zalloc(sizeof (zio_t), KM_SLEEP);
|
||||
@@ -480,7 +476,7 @@ vdev_raidz_math_init(void)
|
||||
memset(abd_to_buf(bench_zio->io_abd), 0xAA, BENCH_ZIO_SIZE);
|
||||
|
||||
/* Benchmark parity generation methods */
|
||||
for (fn = 0; fn < RAIDZ_GEN_NUM; fn++) {
|
||||
for (int fn = 0; fn < RAIDZ_GEN_NUM; fn++) {
|
||||
bench_parity = fn + 1;
|
||||
/* New raidz_map is needed for each generate_p/q/r */
|
||||
bench_rm = vdev_raidz_map_alloc(bench_zio, SPA_MINBLOCKSHIFT,
|
||||
@@ -495,7 +491,7 @@ vdev_raidz_math_init(void)
|
||||
bench_rm = vdev_raidz_map_alloc(bench_zio, SPA_MINBLOCKSHIFT,
|
||||
BENCH_COLS, PARITY_PQR);
|
||||
|
||||
for (fn = 0; fn < RAIDZ_REC_NUM; fn++)
|
||||
for (int fn = 0; fn < RAIDZ_REC_NUM; fn++)
|
||||
benchmark_raidz_impl(bench_rm, fn, benchmark_rec_impl);
|
||||
|
||||
vdev_raidz_map_free(bench_rm);
|
||||
@@ -503,11 +499,29 @@ vdev_raidz_math_init(void)
|
||||
/* cleanup the bench zio */
|
||||
abd_free(bench_zio->io_abd);
|
||||
kmem_free(bench_zio, sizeof (zio_t));
|
||||
#else
|
||||
/*
|
||||
* Skip the benchmark in user space to avoid impacting libzpool
|
||||
* consumers (zdb, zhack, zinject, ztest). The last implementation
|
||||
* is assumed to be the fastest and used by default.
|
||||
*/
|
||||
memcpy(&vdev_raidz_fastest_impl,
|
||||
raidz_supp_impl[raidz_supp_impl_cnt - 1],
|
||||
sizeof (vdev_raidz_fastest_impl));
|
||||
strcpy(vdev_raidz_fastest_impl.name, "fastest");
|
||||
#endif /* _KERNEL */
|
||||
}
|
||||
|
||||
/* install kstats for all impl */
|
||||
void
|
||||
vdev_raidz_math_init(void)
|
||||
{
|
||||
/* Determine the fastest available implementation. */
|
||||
benchmark_raidz();
|
||||
|
||||
#if defined(_KERNEL)
|
||||
/* Install kstats for all implementations */
|
||||
raidz_math_kstat = kstat_create("zfs", 0, "vdev_raidz_bench", "misc",
|
||||
KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
|
||||
|
||||
if (raidz_math_kstat != NULL) {
|
||||
raidz_math_kstat->ks_data = NULL;
|
||||
raidz_math_kstat->ks_ndata = UINT32_MAX;
|
||||
@@ -517,6 +531,7 @@ vdev_raidz_math_init(void)
|
||||
raidz_math_kstat_addr);
|
||||
kstat_install(raidz_math_kstat);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Finish initialization */
|
||||
atomic_swap_32(&zfs_vdev_raidz_impl, user_sel_impl);
|
||||
@@ -527,15 +542,15 @@ void
|
||||
vdev_raidz_math_fini(void)
|
||||
{
|
||||
raidz_impl_ops_t const *curr_impl;
|
||||
int i;
|
||||
|
||||
#if defined(_KERNEL)
|
||||
if (raidz_math_kstat != NULL) {
|
||||
kstat_delete(raidz_math_kstat);
|
||||
raidz_math_kstat = NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* fini impl */
|
||||
for (i = 0; i < ARRAY_SIZE(raidz_all_maths); i++) {
|
||||
for (int i = 0; i < ARRAY_SIZE(raidz_all_maths); i++) {
|
||||
curr_impl = raidz_all_maths[i];
|
||||
if (curr_impl->fini)
|
||||
curr_impl->fini();
|
||||
@@ -546,9 +561,7 @@ static const struct {
|
||||
char *name;
|
||||
uint32_t sel;
|
||||
} math_impl_opts[] = {
|
||||
#if !defined(_KERNEL)
|
||||
{ "cycle", IMPL_CYCLE },
|
||||
#endif
|
||||
{ "fastest", IMPL_FASTEST },
|
||||
{ "original", IMPL_ORIGINAL },
|
||||
{ "scalar", IMPL_SCALAR }
|
||||
|
||||
@@ -207,7 +207,7 @@ DEFINE_REC_METHODS(aarch64_neon);
|
||||
static boolean_t
|
||||
raidz_will_aarch64_neon_work(void)
|
||||
{
|
||||
return (B_TRUE); // __arch64__ requires NEON
|
||||
return (kfpu_allowed());
|
||||
}
|
||||
|
||||
const raidz_impl_ops_t vdev_raidz_aarch64_neon_impl = {
|
||||
|
||||
@@ -217,7 +217,7 @@ DEFINE_REC_METHODS(aarch64_neonx2);
|
||||
static boolean_t
|
||||
raidz_will_aarch64_neonx2_work(void)
|
||||
{
|
||||
return (B_TRUE); // __arch64__ requires NEON
|
||||
return (kfpu_allowed());
|
||||
}
|
||||
|
||||
const raidz_impl_ops_t vdev_raidz_aarch64_neonx2_impl = {
|
||||
|
||||
@@ -396,7 +396,7 @@ DEFINE_REC_METHODS(avx2);
|
||||
static boolean_t
|
||||
raidz_will_avx2_work(void)
|
||||
{
|
||||
return (zfs_avx_available() && zfs_avx2_available());
|
||||
return (kfpu_allowed() && zfs_avx_available() && zfs_avx2_available());
|
||||
}
|
||||
|
||||
const raidz_impl_ops_t vdev_raidz_avx2_impl = {
|
||||
|
||||
@@ -393,9 +393,8 @@ DEFINE_REC_METHODS(avx512bw);
|
||||
static boolean_t
|
||||
raidz_will_avx512bw_work(void)
|
||||
{
|
||||
return (zfs_avx_available() &&
|
||||
zfs_avx512f_available() &&
|
||||
zfs_avx512bw_available());
|
||||
return (kfpu_allowed() && zfs_avx_available() &&
|
||||
zfs_avx512f_available() && zfs_avx512bw_available());
|
||||
}
|
||||
|
||||
const raidz_impl_ops_t vdev_raidz_avx512bw_impl = {
|
||||
|
||||
@@ -470,9 +470,8 @@ DEFINE_REC_METHODS(avx512f);
|
||||
static boolean_t
|
||||
raidz_will_avx512f_work(void)
|
||||
{
|
||||
return (zfs_avx_available() &&
|
||||
zfs_avx2_available() &&
|
||||
zfs_avx512f_available());
|
||||
return (kfpu_allowed() && zfs_avx_available() &&
|
||||
zfs_avx2_available() && zfs_avx512f_available());
|
||||
}
|
||||
|
||||
const raidz_impl_ops_t vdev_raidz_avx512f_impl = {
|
||||
|
||||
@@ -607,7 +607,7 @@ DEFINE_REC_METHODS(sse2);
|
||||
static boolean_t
|
||||
raidz_will_sse2_work(void)
|
||||
{
|
||||
return (zfs_sse_available() && zfs_sse2_available());
|
||||
return (kfpu_allowed() && zfs_sse_available() && zfs_sse2_available());
|
||||
}
|
||||
|
||||
const raidz_impl_ops_t vdev_raidz_sse2_impl = {
|
||||
|
||||
@@ -399,8 +399,8 @@ DEFINE_REC_METHODS(ssse3);
|
||||
static boolean_t
|
||||
raidz_will_ssse3_work(void)
|
||||
{
|
||||
return (zfs_sse_available() && zfs_sse2_available() &&
|
||||
zfs_ssse3_available());
|
||||
return (kfpu_allowed() && zfs_sse_available() &&
|
||||
zfs_sse2_available() && zfs_ssse3_available());
|
||||
}
|
||||
|
||||
const raidz_impl_ops_t vdev_raidz_ssse3_impl = {
|
||||
|
||||
@@ -549,12 +549,12 @@ zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t version,
|
||||
uint64_t guid, uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv,
|
||||
uint8_t *mac, zio_crypt_key_t *key)
|
||||
{
|
||||
int ret;
|
||||
crypto_mechanism_t mech;
|
||||
uio_t puio, cuio;
|
||||
uint64_t aad[3];
|
||||
iovec_t plain_iovecs[2], cipher_iovecs[3];
|
||||
uint_t enc_len, keydata_len, aad_len;
|
||||
int ret;
|
||||
|
||||
ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS);
|
||||
ASSERT3U(cwkey->ck_format, ==, CRYPTO_KEY_RAW);
|
||||
|
||||
Reference in New Issue
Block a user