From 75e8b5ad847ed7fd9e40ffdf33989b6578469903 Mon Sep 17 00:00:00 2001 From: Tino Reichardt Date: Wed, 3 Aug 2022 18:36:41 +0200 Subject: [PATCH] Fix BLAKE3 tuneable and module loading on Linux and FreeBSD Apply similar options to BLAKE3 as it is done for zfs_fletcher_4_impl. The zfs module parameter on Linux changes from icp_blake3_impl to zfs_blake3_impl. You can check and set it on Linux via sysfs like this: ``` [bash]# cat /sys/module/zfs/parameters/zfs_blake3_impl cycle [fastest] generic sse2 sse41 avx2 [bash]# echo sse2 > /sys/module/zfs/parameters/zfs_blake3_impl [bash]# cat /sys/module/zfs/parameters/zfs_blake3_impl cycle fastest generic [sse2] sse41 avx2 ``` The modprobe module parameters may also be used now: ``` [bash]# modprobe zfs zfs_blake3_impl=sse41 [bash]# cat /sys/module/zfs/parameters/zfs_blake3_impl cycle fastest generic sse2 [sse41] avx2 ``` On FreeBSD the BLAKE3 implementation can be set via sysctl like this: ``` [bsd]# sysctl vfs.zfs.blake3_impl vfs.zfs.blake3_impl: cycle [fastest] generic sse2 sse41 avx2 [bsd]# sysctl vfs.zfs.blake3_impl=sse2 vfs.zfs.blake3_impl: cycle [fastest] generic sse2 sse41 avx2 \ -> cycle fastest generic [sse2] sse41 avx2 ``` This commit changes also some Blake3 internals like these: - blake3_impl_ops_t was renamed to blake3_ops_t - all functions are named blake3_impl_NAME() now Reviewed-by: Brian Behlendorf Reviewed-by: Ryan Moeller Co-authored-by: Ryan Moeller Signed-off-by: Tino Reichardt Closes #13725 --- cmd/ztest.c | 4 +- include/os/freebsd/spl/sys/mod_os.h | 7 +- include/sys/blake3.h | 23 +- module/icp/algs/blake3/blake3.c | 14 +- module/icp/algs/blake3/blake3_generic.c | 2 +- module/icp/algs/blake3/blake3_impl.c | 366 +++++++++++++-------- module/icp/algs/blake3/blake3_impl.h | 14 +- module/icp/algs/blake3/blake3_x86-64.c | 8 +- module/zfs/zfs_chksum.c | 20 +- tests/zfs-tests/cmd/checksum/blake3_test.c | 12 +- 10 files changed, 273 insertions(+), 197 deletions(-) diff --git a/cmd/ztest.c b/cmd/ztest.c index 847c3a5b0..0712f286b 100644 --- a/cmd/ztest.c +++ b/cmd/ztest.c @@ -6413,7 +6413,7 @@ ztest_blake3(ztest_ds_t *zd, uint64_t id) void *res2 = &zc_res2; /* BLAKE3_KEY_LEN = 32 */ - VERIFY0(blake3_set_impl_name("generic")); + VERIFY0(blake3_impl_setname("generic")); templ = abd_checksum_blake3_tmpl_init(&salt); Blake3_InitKeyed(&ctx, salt_ptr); Blake3_Update(&ctx, buf, size); @@ -6422,7 +6422,7 @@ ztest_blake3(ztest_ds_t *zd, uint64_t id) ZIO_CHECKSUM_BSWAP(&zc_ref2); abd_checksum_blake3_tmpl_free(templ); - VERIFY0(blake3_set_impl_name("cycle")); + VERIFY0(blake3_impl_setname("cycle")); while (run_count-- > 0) { /* Test current implementation */ diff --git a/include/os/freebsd/spl/sys/mod_os.h b/include/os/freebsd/spl/sys/mod_os.h index 95a19cc94..e2815ce9e 100644 --- a/include/os/freebsd/spl/sys/mod_os.h +++ b/include/os/freebsd/spl/sys/mod_os.h @@ -31,10 +31,6 @@ #include -#define EXPORT_SYMBOL(x) -#define module_param(a, b, c) -#define MODULE_PARM_DESC(a, b) - #define ZMOD_RW CTLFLAG_RWTUN #define ZMOD_RD CTLFLAG_RDTUN @@ -98,6 +94,9 @@ #define fletcher_4_param_set_args(var) \ CTLTYPE_STRING, NULL, 0, fletcher_4_param, "A" +#define blake3_param_set_args(var) \ + CTLTYPE_STRING, NULL, 0, blake3_param, "A" + #include #define module_init(fn) \ static void \ diff --git a/include/sys/blake3.h b/include/sys/blake3.h index 19500585f..ad65fc8db 100644 --- a/include/sys/blake3.h +++ b/include/sys/blake3.h @@ -72,7 +72,7 @@ typedef struct { */ uint8_t cv_stack[(BLAKE3_MAX_DEPTH + 1) * BLAKE3_OUT_LEN]; - /* const blake3_impl_ops_t *ops */ + /* const blake3_ops_t *ops */ const void *ops; } BLAKE3_CTX; @@ -97,26 +97,23 @@ extern void **blake3_per_cpu_ctx; extern void blake3_per_cpu_ctx_init(void); extern void blake3_per_cpu_ctx_fini(void); -/* return number of supported implementations */ -extern int blake3_get_impl_count(void); +/* get count of supported implementations */ +extern uint32_t blake3_impl_getcnt(void); -/* return id of selected implementation */ -extern int blake3_get_impl_id(void); +/* get id of selected implementation */ +extern uint32_t blake3_impl_getid(void); -/* return name of selected implementation */ -extern const char *blake3_get_impl_name(void); +/* get name of selected implementation */ +extern const char *blake3_impl_getname(void); /* setup id as fastest implementation */ -extern void blake3_set_impl_fastest(uint32_t id); +extern void blake3_impl_set_fastest(uint32_t id); /* set implementation by id */ -extern void blake3_set_impl_id(uint32_t id); +extern void blake3_impl_setid(uint32_t id); /* set implementation by name */ -extern int blake3_set_impl_name(const char *name); - -/* set startup implementation */ -extern void blake3_setup_impl(void); +extern int blake3_impl_setname(const char *name); #ifdef __cplusplus } diff --git a/module/icp/algs/blake3/blake3.c b/module/icp/algs/blake3/blake3.c index b9600207b..5f7018598 100644 --- a/module/icp/algs/blake3/blake3.c +++ b/module/icp/algs/blake3/blake3.c @@ -129,7 +129,7 @@ static output_t make_output(const uint32_t input_cv[8], * bytes. For that reason, chaining values in the CV stack are represented as * bytes. */ -static void output_chaining_value(const blake3_impl_ops_t *ops, +static void output_chaining_value(const blake3_ops_t *ops, const output_t *ctx, uint8_t cv[32]) { uint32_t cv_words[8]; @@ -139,7 +139,7 @@ static void output_chaining_value(const blake3_impl_ops_t *ops, store_cv_words(cv, cv_words); } -static void output_root_bytes(const blake3_impl_ops_t *ops, const output_t *ctx, +static void output_root_bytes(const blake3_ops_t *ops, const output_t *ctx, uint64_t seek, uint8_t *out, size_t out_len) { uint64_t output_block_counter = seek / 64; @@ -163,7 +163,7 @@ static void output_root_bytes(const blake3_impl_ops_t *ops, const output_t *ctx, } } -static void chunk_state_update(const blake3_impl_ops_t *ops, +static void chunk_state_update(const blake3_ops_t *ops, blake3_chunk_state_t *ctx, const uint8_t *input, size_t input_len) { if (ctx->buf_len > 0) { @@ -230,7 +230,7 @@ static size_t left_len(size_t content_len) * number of chunks hashed. These chunks are never the root and never empty; * those cases use a different codepath. */ -static size_t compress_chunks_parallel(const blake3_impl_ops_t *ops, +static size_t compress_chunks_parallel(const blake3_ops_t *ops, const uint8_t *input, size_t input_len, const uint32_t key[8], uint64_t chunk_counter, uint8_t flags, uint8_t *out) { @@ -274,7 +274,7 @@ static size_t compress_chunks_parallel(const blake3_impl_ops_t *ops, * return it as an additional output.) These parents are never the root and * never empty; those cases use a different codepath. */ -static size_t compress_parents_parallel(const blake3_impl_ops_t *ops, +static size_t compress_parents_parallel(const blake3_ops_t *ops, const uint8_t *child_chaining_values, size_t num_chaining_values, const uint32_t key[8], uint8_t flags, uint8_t *out) { @@ -320,7 +320,7 @@ static size_t compress_parents_parallel(const blake3_impl_ops_t *ops, * of implementing this special rule? Because we don't want to limit SIMD or * multi-threading parallelism for that update(). */ -static size_t blake3_compress_subtree_wide(const blake3_impl_ops_t *ops, +static size_t blake3_compress_subtree_wide(const blake3_ops_t *ops, const uint8_t *input, size_t input_len, const uint32_t key[8], uint64_t chunk_counter, uint8_t flags, uint8_t *out) { @@ -406,7 +406,7 @@ static size_t blake3_compress_subtree_wide(const blake3_impl_ops_t *ops, * As with compress_subtree_wide(), this function is not used on inputs of 1 * chunk or less. That's a different codepath. */ -static void compress_subtree_to_parent_node(const blake3_impl_ops_t *ops, +static void compress_subtree_to_parent_node(const blake3_ops_t *ops, const uint8_t *input, size_t input_len, const uint32_t key[8], uint64_t chunk_counter, uint8_t flags, uint8_t out[2 * BLAKE3_OUT_LEN]) { diff --git a/module/icp/algs/blake3/blake3_generic.c b/module/icp/algs/blake3/blake3_generic.c index 6c1eb33e8..94a1f1082 100644 --- a/module/icp/algs/blake3/blake3_generic.c +++ b/module/icp/algs/blake3/blake3_generic.c @@ -192,7 +192,7 @@ static inline boolean_t blake3_is_generic_supported(void) return (B_TRUE); } -const blake3_impl_ops_t blake3_generic_impl = { +const blake3_ops_t blake3_generic_impl = { .compress_in_place = blake3_compress_in_place_generic, .compress_xof = blake3_compress_xof_generic, .hash_many = blake3_hash_many_generic, diff --git a/module/icp/algs/blake3/blake3_impl.c b/module/icp/algs/blake3/blake3_impl.c index 10741c82d..5276fd88f 100644 --- a/module/icp/algs/blake3/blake3_impl.c +++ b/module/icp/algs/blake3/blake3_impl.c @@ -28,7 +28,7 @@ #include "blake3_impl.h" -static const blake3_impl_ops_t *const blake3_impls[] = { +static const blake3_ops_t *const blake3_impls[] = { &blake3_generic_impl, #if defined(__aarch64__) || \ (defined(__x86_64) && defined(HAVE_SSE2)) || \ @@ -48,160 +48,199 @@ static const blake3_impl_ops_t *const blake3_impls[] = { #endif }; -/* this pointer holds current ops for implementation */ -static const blake3_impl_ops_t *blake3_selected_impl = &blake3_generic_impl; - -/* special implementation selections */ +/* Select BLAKE3 implementation */ #define IMPL_FASTEST (UINT32_MAX) -#define IMPL_CYCLE (UINT32_MAX-1) -#define IMPL_USER (UINT32_MAX-2) -#define IMPL_PARAM (UINT32_MAX-3) +#define IMPL_CYCLE (UINT32_MAX - 1) -#define IMPL_READ(i) (*(volatile uint32_t *) &(i)) -static uint32_t icp_blake3_impl = IMPL_FASTEST; +#define IMPL_READ(i) (*(volatile uint32_t *) &(i)) -#define BLAKE3_IMPL_NAME_MAX 16 +/* Indicate that benchmark has been done */ +static boolean_t blake3_initialized = B_FALSE; -/* id of fastest implementation */ -static uint32_t blake3_fastest_id = 0; +/* Implementation that contains the fastest methods */ +static blake3_ops_t blake3_fastest_impl = { + .name = "fastest" +}; -/* currently used id */ -static uint32_t blake3_current_id = 0; +/* Hold all supported implementations */ +static const blake3_ops_t *blake3_supp_impls[ARRAY_SIZE(blake3_impls)]; +static uint32_t blake3_supp_impls_cnt = 0; -/* id of module parameter (-1 == unused) */ -static int blake3_param_id = -1; +/* Currently selected implementation */ +static uint32_t blake3_impl_chosen = IMPL_FASTEST; -/* return number of supported implementations */ -int -blake3_get_impl_count(void) +static struct blake3_impl_selector { + const char *name; + uint32_t sel; +} blake3_impl_selectors[] = { + { "cycle", IMPL_CYCLE }, + { "fastest", IMPL_FASTEST } +}; + +/* check the supported implementations */ +static void blake3_impl_init(void) { - static int impls = 0; - int i; + int i, c; - if (impls) - return (impls); + /* init only once */ + if (likely(blake3_initialized)) + return; - for (i = 0; i < ARRAY_SIZE(blake3_impls); i++) { - if (!blake3_impls[i]->is_supported()) continue; - impls++; + /* move supported implementations into blake3_supp_impls */ + for (i = 0, c = 0; i < ARRAY_SIZE(blake3_impls); i++) { + const blake3_ops_t *impl = blake3_impls[i]; + + if (impl->is_supported && impl->is_supported()) + blake3_supp_impls[c++] = impl; } + blake3_supp_impls_cnt = c; - return (impls); + /* first init generic impl, may be changed via set_fastest() */ + memcpy(&blake3_fastest_impl, blake3_impls[0], + sizeof (blake3_fastest_impl)); + blake3_initialized = B_TRUE; } -/* return id of selected implementation */ -int -blake3_get_impl_id(void) +/* get number of supported implementations */ +uint32_t +blake3_impl_getcnt(void) { - return (blake3_current_id); + blake3_impl_init(); + return (blake3_supp_impls_cnt); } -/* return name of selected implementation */ +/* get id of selected implementation */ +uint32_t +blake3_impl_getid(void) +{ + return (IMPL_READ(blake3_impl_chosen)); +} + +/* get name of selected implementation */ const char * -blake3_get_impl_name(void) +blake3_impl_getname(void) { - return (blake3_selected_impl->name); + uint32_t impl = IMPL_READ(blake3_impl_chosen); + + blake3_impl_init(); + switch (impl) { + case IMPL_FASTEST: + return ("fastest"); + case IMPL_CYCLE: + return ("cycle"); + default: + return (blake3_supp_impls[impl]->name); + } } /* setup id as fastest implementation */ void -blake3_set_impl_fastest(uint32_t id) +blake3_impl_set_fastest(uint32_t id) { - blake3_fastest_id = id; + /* setup fastest impl */ + memcpy(&blake3_fastest_impl, blake3_supp_impls[id], + sizeof (blake3_fastest_impl)); } /* set implementation by id */ void -blake3_set_impl_id(uint32_t id) +blake3_impl_setid(uint32_t id) { - int i, cid; - - /* select fastest */ - if (id == IMPL_FASTEST) - id = blake3_fastest_id; - - /* select next or first */ - if (id == IMPL_CYCLE) - id = (++blake3_current_id) % blake3_get_impl_count(); - - /* 0..N for the real impl */ - for (i = 0, cid = 0; i < ARRAY_SIZE(blake3_impls); i++) { - if (!blake3_impls[i]->is_supported()) continue; - if (cid == id) { - blake3_current_id = cid; - blake3_selected_impl = blake3_impls[i]; - return; - } - cid++; + blake3_impl_init(); + switch (id) { + case IMPL_FASTEST: + atomic_swap_32(&blake3_impl_chosen, IMPL_FASTEST); + break; + case IMPL_CYCLE: + atomic_swap_32(&blake3_impl_chosen, IMPL_CYCLE); + break; + default: + ASSERT3U(id, >=, 0); + ASSERT3U(id, <, blake3_supp_impls_cnt); + atomic_swap_32(&blake3_impl_chosen, id); + break; } } /* set implementation by name */ int -blake3_set_impl_name(const char *name) +blake3_impl_setname(const char *val) { - int i, cid; + uint32_t impl = IMPL_READ(blake3_impl_chosen); + size_t val_len; + int i, err = -EINVAL; - if (strcmp(name, "fastest") == 0) { - atomic_swap_32(&icp_blake3_impl, IMPL_FASTEST); - blake3_set_impl_id(IMPL_FASTEST); - return (0); - } else if (strcmp(name, "cycle") == 0) { - atomic_swap_32(&icp_blake3_impl, IMPL_CYCLE); - blake3_set_impl_id(IMPL_CYCLE); - return (0); - } + blake3_impl_init(); + val_len = strlen(val); + while ((val_len > 0) && !!isspace(val[val_len-1])) /* trim '\n' */ + val_len--; - for (i = 0, cid = 0; i < ARRAY_SIZE(blake3_impls); i++) { - if (!blake3_impls[i]->is_supported()) continue; - if (strcmp(name, blake3_impls[i]->name) == 0) { - if (icp_blake3_impl == IMPL_PARAM) { - blake3_param_id = cid; - return (0); - } - blake3_selected_impl = blake3_impls[i]; - blake3_current_id = cid; - return (0); + /* check mandatory implementations */ + for (i = 0; i < ARRAY_SIZE(blake3_impl_selectors); i++) { + const char *name = blake3_impl_selectors[i].name; + + if (val_len == strlen(name) && + strncmp(val, name, val_len) == 0) { + impl = blake3_impl_selectors[i].sel; + err = 0; + break; } - cid++; } - return (-EINVAL); -} + if (err != 0 && blake3_initialized) { + /* check all supported implementations */ + for (i = 0; i < blake3_supp_impls_cnt; i++) { + const char *name = blake3_supp_impls[i]->name; -/* setup implementation */ -void -blake3_setup_impl(void) -{ - switch (IMPL_READ(icp_blake3_impl)) { - case IMPL_PARAM: - blake3_set_impl_id(blake3_param_id); - atomic_swap_32(&icp_blake3_impl, IMPL_USER); - break; - case IMPL_FASTEST: - blake3_set_impl_id(IMPL_FASTEST); - break; - case IMPL_CYCLE: - blake3_set_impl_id(IMPL_CYCLE); - break; - default: - blake3_set_impl_id(blake3_current_id); - break; + if (val_len == strlen(name) && + strncmp(val, name, val_len) == 0) { + impl = i; + err = 0; + break; + } + } } + + if (err == 0) { + atomic_swap_32(&blake3_impl_chosen, impl); + } + + return (err); } -/* return selected implementation */ -const blake3_impl_ops_t * +const blake3_ops_t * blake3_impl_get_ops(void) { - /* each call to ops will cycle */ - if (icp_blake3_impl == IMPL_CYCLE) - blake3_set_impl_id(IMPL_CYCLE); + const blake3_ops_t *ops = NULL; + uint32_t impl = IMPL_READ(blake3_impl_chosen); - return (blake3_selected_impl); + blake3_impl_init(); + switch (impl) { + case IMPL_FASTEST: + ASSERT(blake3_initialized); + ops = &blake3_fastest_impl; + break; + case IMPL_CYCLE: + /* Cycle through supported implementations */ + ASSERT(blake3_initialized); + ASSERT3U(blake3_supp_impls_cnt, >, 0); + static uint32_t cycle_count = 0; + uint32_t idx = (++cycle_count) % blake3_supp_impls_cnt; + ops = blake3_supp_impls[idx]; + break; + default: + ASSERT3U(blake3_supp_impls_cnt, >, 0); + ASSERT3U(impl, <, blake3_supp_impls_cnt); + ops = blake3_supp_impls[impl]; + break; + } + + ASSERT3P(ops, !=, NULL); + return (ops); } #if defined(_KERNEL) + void **blake3_per_cpu_ctx; void @@ -215,6 +254,9 @@ blake3_per_cpu_ctx_init(void) blake3_per_cpu_ctx[i] = kmem_alloc(sizeof (BLAKE3_CTX), KM_SLEEP); } + + /* init once in kernel mode */ + blake3_impl_init(); } void @@ -227,58 +269,94 @@ blake3_per_cpu_ctx_fini(void) memset(blake3_per_cpu_ctx, 0, max_ncpus * sizeof (void *)); kmem_free(blake3_per_cpu_ctx, max_ncpus * sizeof (void *)); } -#endif -#if defined(_KERNEL) && defined(__linux__) -static int -icp_blake3_impl_set(const char *name, zfs_kernel_param_t *kp) -{ - char req_name[BLAKE3_IMPL_NAME_MAX]; - size_t i; +#define IMPL_FMT(impl, i) (((impl) == (i)) ? "[%s] " : "%s ") - /* sanitize input */ - i = strnlen(name, BLAKE3_IMPL_NAME_MAX); - if (i == 0 || i >= BLAKE3_IMPL_NAME_MAX) - return (-EINVAL); - - strlcpy(req_name, name, BLAKE3_IMPL_NAME_MAX); - while (i > 0 && isspace(req_name[i-1])) - i--; - req_name[i] = '\0'; - - atomic_swap_32(&icp_blake3_impl, IMPL_PARAM); - return (blake3_set_impl_name(req_name)); -} +#if defined(__linux__) static int -icp_blake3_impl_get(char *buffer, zfs_kernel_param_t *kp) +blake3_param_get(char *buffer, zfs_kernel_param_t *unused) { - int i, cid, cnt = 0; + const uint32_t impl = IMPL_READ(blake3_impl_chosen); char *fmt; + int cnt = 0; /* cycling */ - fmt = (icp_blake3_impl == IMPL_CYCLE) ? "[cycle] " : "cycle "; - cnt += sprintf(buffer + cnt, fmt); + fmt = IMPL_FMT(impl, IMPL_CYCLE); + cnt += sprintf(buffer + cnt, fmt, "cycle"); - /* fastest one */ - fmt = (icp_blake3_impl == IMPL_FASTEST) ? "[fastest] " : "fastest "; - cnt += sprintf(buffer + cnt, fmt); + /* list fastest */ + fmt = IMPL_FMT(impl, IMPL_FASTEST); + cnt += sprintf(buffer + cnt, fmt, "fastest"); - /* user selected */ - for (i = 0, cid = 0; i < ARRAY_SIZE(blake3_impls); i++) { - if (!blake3_impls[i]->is_supported()) continue; - fmt = (icp_blake3_impl == IMPL_USER && - cid == blake3_current_id) ? "[%s] " : "%s "; - cnt += sprintf(buffer + cnt, fmt, blake3_impls[i]->name); - cid++; + /* list all supported implementations */ + for (uint32_t i = 0; i < blake3_supp_impls_cnt; ++i) { + fmt = IMPL_FMT(impl, i); + cnt += sprintf(buffer + cnt, fmt, + blake3_supp_impls[i]->name); } - buffer[cnt] = 0; - return (cnt); } -module_param_call(icp_blake3_impl, icp_blake3_impl_set, icp_blake3_impl_get, - NULL, 0644); -MODULE_PARM_DESC(icp_blake3_impl, "Select BLAKE3 implementation."); +static int +blake3_param_set(const char *val, zfs_kernel_param_t *unused) +{ + (void) unused; + return (blake3_impl_setname(val)); +} + +#elif defined(__FreeBSD__) + +#include + +static int +blake3_param(ZFS_MODULE_PARAM_ARGS) +{ + int err; + + if (req->newptr == NULL) { + const uint32_t impl = IMPL_READ(blake3_impl_chosen); + const int init_buflen = 64; + const char *fmt; + struct sbuf *s; + + s = sbuf_new_for_sysctl(NULL, NULL, init_buflen, req); + + /* cycling */ + fmt = IMPL_FMT(impl, IMPL_CYCLE); + (void) sbuf_printf(s, fmt, "cycle"); + + /* list fastest */ + fmt = IMPL_FMT(impl, IMPL_FASTEST); + (void) sbuf_printf(s, fmt, "fastest"); + + /* list all supported implementations */ + for (uint32_t i = 0; i < blake3_supp_impls_cnt; ++i) { + fmt = IMPL_FMT(impl, i); + (void) sbuf_printf(s, fmt, blake3_supp_impls[i]->name); + } + + err = sbuf_finish(s); + sbuf_delete(s); + + return (err); + } + + char buf[16]; + + err = sysctl_handle_string(oidp, buf, sizeof (buf), req); + if (err) { + return (err); + } + + return (-blake3_impl_setname(buf)); +} +#endif + +#undef IMPL_FMT + +ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs, zfs_, blake3_impl, + blake3_param_set, blake3_param_get, ZMOD_RW, \ + "Select BLAKE3 implementation."); #endif diff --git a/module/icp/algs/blake3/blake3_impl.h b/module/icp/algs/blake3/blake3_impl.h index 5254061c7..eef74eaa9 100644 --- a/module/icp/algs/blake3/blake3_impl.h +++ b/module/icp/algs/blake3/blake3_impl.h @@ -62,31 +62,31 @@ typedef struct blake3_impl_ops { blake3_is_supported_f is_supported; int degree; const char *name; -} blake3_impl_ops_t; +} blake3_ops_t; /* Return selected BLAKE3 implementation ops */ -extern const blake3_impl_ops_t *blake3_impl_get_ops(void); +extern const blake3_ops_t *blake3_impl_get_ops(void); -extern const blake3_impl_ops_t blake3_generic_impl; +extern const blake3_ops_t blake3_generic_impl; #if defined(__aarch64__) || \ (defined(__x86_64) && defined(HAVE_SSE2)) || \ (defined(__PPC64__) && defined(__LITTLE_ENDIAN__)) -extern const blake3_impl_ops_t blake3_sse2_impl; +extern const blake3_ops_t blake3_sse2_impl; #endif #if defined(__aarch64__) || \ (defined(__x86_64) && defined(HAVE_SSE4_1)) || \ (defined(__PPC64__) && defined(__LITTLE_ENDIAN__)) -extern const blake3_impl_ops_t blake3_sse41_impl; +extern const blake3_ops_t blake3_sse41_impl; #endif #if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2) -extern const blake3_impl_ops_t blake3_avx2_impl; +extern const blake3_ops_t blake3_avx2_impl; #endif #if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL) -extern const blake3_impl_ops_t blake3_avx512_impl; +extern const blake3_ops_t blake3_avx512_impl; #endif #if defined(__x86_64) diff --git a/module/icp/algs/blake3/blake3_x86-64.c b/module/icp/algs/blake3/blake3_x86-64.c index aecd29edb..8139789fd 100644 --- a/module/icp/algs/blake3/blake3_x86-64.c +++ b/module/icp/algs/blake3/blake3_x86-64.c @@ -81,7 +81,7 @@ static boolean_t blake3_is_sse2_supported(void) #endif } -const blake3_impl_ops_t blake3_sse2_impl = { +const blake3_ops_t blake3_sse2_impl = { .compress_in_place = blake3_compress_in_place_sse2, .compress_xof = blake3_compress_xof_sse2, .hash_many = blake3_hash_many_sse2, @@ -147,7 +147,7 @@ static boolean_t blake3_is_sse41_supported(void) #endif } -const blake3_impl_ops_t blake3_sse41_impl = { +const blake3_ops_t blake3_sse41_impl = { .compress_in_place = blake3_compress_in_place_sse41, .compress_xof = blake3_compress_xof_sse41, .hash_many = blake3_hash_many_sse41, @@ -179,7 +179,7 @@ static boolean_t blake3_is_avx2_supported(void) zfs_avx2_available()); } -const blake3_impl_ops_t blake3_avx2_impl = { +const blake3_ops_t blake3_avx2_impl = { .compress_in_place = blake3_compress_in_place_sse41, .compress_xof = blake3_compress_xof_sse41, .hash_many = blake3_hash_many_avx2, @@ -237,7 +237,7 @@ static boolean_t blake3_is_avx512_supported(void) zfs_avx512vl_available()); } -const blake3_impl_ops_t blake3_avx512_impl = { +const blake3_ops_t blake3_avx512_impl = { .compress_in_place = blake3_compress_in_place_avx512, .compress_xof = blake3_compress_xof_avx512, .hash_many = blake3_hash_many_avx512, diff --git a/module/zfs/zfs_chksum.c b/module/zfs/zfs_chksum.c index b9dc907af..74b4cb8d2 100644 --- a/module/zfs/zfs_chksum.c +++ b/module/zfs/zfs_chksum.c @@ -244,12 +244,13 @@ chksum_benchmark(void) #endif chksum_stat_t *cs; - int cbid = 0, id; + int cbid = 0; uint64_t max = 0; + uint32_t id, id_save; /* space for the benchmark times */ chksum_stat_cnt = 4; - chksum_stat_cnt += blake3_get_impl_count(); + chksum_stat_cnt += blake3_impl_getcnt(); chksum_stat_data = (chksum_stat_t *)kmem_zalloc( sizeof (chksum_stat_t) * chksum_stat_cnt, KM_SLEEP); @@ -290,20 +291,24 @@ chksum_benchmark(void) chksum_benchit(cs); /* blake3 */ - for (id = 0; id < blake3_get_impl_count(); id++) { - blake3_set_impl_id(id); + id_save = blake3_impl_getid(); + for (id = 0; id < blake3_impl_getcnt(); id++) { + blake3_impl_setid(id); cs = &chksum_stat_data[cbid++]; cs->init = abd_checksum_blake3_tmpl_init; cs->func = abd_checksum_blake3_native; cs->free = abd_checksum_blake3_tmpl_free; cs->name = "blake3"; - cs->impl = blake3_get_impl_name(); + cs->impl = blake3_impl_getname(); chksum_benchit(cs); if (cs->bs256k > max) { max = cs->bs256k; - blake3_set_impl_fastest(id); + blake3_impl_set_fastest(id); } } + + /* restore initial value */ + blake3_impl_setid(id_save); } void @@ -329,9 +334,6 @@ chksum_init(void) chksum_kstat_addr); kstat_install(chksum_kstat); } - - /* setup implementations */ - blake3_setup_impl(); } void diff --git a/tests/zfs-tests/cmd/checksum/blake3_test.c b/tests/zfs-tests/cmd/checksum/blake3_test.c index d57d0e047..648e1faaa 100644 --- a/tests/zfs-tests/cmd/checksum/blake3_test.c +++ b/tests/zfs-tests/cmd/checksum/blake3_test.c @@ -497,9 +497,9 @@ main(int argc, char *argv[]) } (void) printf("Running algorithm correctness tests:\n"); - for (id = 0; id < blake3_get_impl_count(); id++) { - blake3_set_impl_id(id); - const char *name = blake3_get_impl_name(); + for (id = 0; id < blake3_impl_getcnt(); id++) { + blake3_impl_setid(id); + const char *name = blake3_impl_getname(); dprintf("Result for BLAKE3-%s:\n", name); for (i = 0; TestArray[i].hash; i++) { blake3_test_t *cur = &TestArray[i]; @@ -565,9 +565,9 @@ main(int argc, char *argv[]) } while (0) printf("Running performance tests (hashing 1024 MiB of data):\n"); - for (id = 0; id < blake3_get_impl_count(); id++) { - blake3_set_impl_id(id); - const char *name = blake3_get_impl_name(); + for (id = 0; id < blake3_impl_getcnt(); id++) { + blake3_impl_setid(id); + const char *name = blake3_impl_getname(); BLAKE3_PERF_TEST(name, 256); }