Fix BLAKE3 tuneable and module loading on Linux and FreeBSD

Apply similar options to BLAKE3 as it is done for zfs_fletcher_4_impl.

The zfs module parameter on Linux changes from icp_blake3_impl to
zfs_blake3_impl.

You can check and set it on Linux via sysfs like this:
```
[bash]# cat /sys/module/zfs/parameters/zfs_blake3_impl
cycle [fastest] generic sse2 sse41 avx2

[bash]# echo sse2 > /sys/module/zfs/parameters/zfs_blake3_impl
[bash]# cat /sys/module/zfs/parameters/zfs_blake3_impl
cycle fastest generic [sse2] sse41 avx2
```

The modprobe module parameters may also be used now:
```
[bash]# modprobe zfs zfs_blake3_impl=sse41
[bash]# cat /sys/module/zfs/parameters/zfs_blake3_impl
cycle fastest generic sse2 [sse41] avx2
```

On FreeBSD the BLAKE3 implementation can be set via sysctl like this:
```
[bsd]# sysctl vfs.zfs.blake3_impl
vfs.zfs.blake3_impl: cycle [fastest] generic sse2 sse41 avx2
[bsd]# sysctl vfs.zfs.blake3_impl=sse2
vfs.zfs.blake3_impl: cycle [fastest] generic sse2 sse41 avx2 \
  -> cycle fastest generic [sse2] sse41 avx2
```

This commit changes also some Blake3 internals like these:
- blake3_impl_ops_t was renamed to blake3_ops_t
- all functions are named blake3_impl_NAME() now

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Co-authored-by: Ryan Moeller <ryan@iXsystems.com>
Signed-off-by: Tino Reichardt <milky-zfs@mcmilk.de>
Closes #13725
This commit is contained in:
Tino Reichardt 2022-08-03 18:36:41 +02:00 committed by Brian Behlendorf
parent 7dee043af5
commit 75e8b5ad84
10 changed files with 273 additions and 197 deletions

View File

@ -6413,7 +6413,7 @@ ztest_blake3(ztest_ds_t *zd, uint64_t id)
void *res2 = &zc_res2;
/* BLAKE3_KEY_LEN = 32 */
VERIFY0(blake3_set_impl_name("generic"));
VERIFY0(blake3_impl_setname("generic"));
templ = abd_checksum_blake3_tmpl_init(&salt);
Blake3_InitKeyed(&ctx, salt_ptr);
Blake3_Update(&ctx, buf, size);
@ -6422,7 +6422,7 @@ ztest_blake3(ztest_ds_t *zd, uint64_t id)
ZIO_CHECKSUM_BSWAP(&zc_ref2);
abd_checksum_blake3_tmpl_free(templ);
VERIFY0(blake3_set_impl_name("cycle"));
VERIFY0(blake3_impl_setname("cycle"));
while (run_count-- > 0) {
/* Test current implementation */

View File

@ -31,10 +31,6 @@
#include <sys/sysctl.h>
#define EXPORT_SYMBOL(x)
#define module_param(a, b, c)
#define MODULE_PARM_DESC(a, b)
#define ZMOD_RW CTLFLAG_RWTUN
#define ZMOD_RD CTLFLAG_RDTUN
@ -98,6 +94,9 @@
#define fletcher_4_param_set_args(var) \
CTLTYPE_STRING, NULL, 0, fletcher_4_param, "A"
#define blake3_param_set_args(var) \
CTLTYPE_STRING, NULL, 0, blake3_param, "A"
#include <sys/kernel.h>
#define module_init(fn) \
static void \

View File

@ -72,7 +72,7 @@ typedef struct {
*/
uint8_t cv_stack[(BLAKE3_MAX_DEPTH + 1) * BLAKE3_OUT_LEN];
/* const blake3_impl_ops_t *ops */
/* const blake3_ops_t *ops */
const void *ops;
} BLAKE3_CTX;
@ -97,26 +97,23 @@ extern void **blake3_per_cpu_ctx;
extern void blake3_per_cpu_ctx_init(void);
extern void blake3_per_cpu_ctx_fini(void);
/* return number of supported implementations */
extern int blake3_get_impl_count(void);
/* get count of supported implementations */
extern uint32_t blake3_impl_getcnt(void);
/* return id of selected implementation */
extern int blake3_get_impl_id(void);
/* get id of selected implementation */
extern uint32_t blake3_impl_getid(void);
/* return name of selected implementation */
extern const char *blake3_get_impl_name(void);
/* get name of selected implementation */
extern const char *blake3_impl_getname(void);
/* setup id as fastest implementation */
extern void blake3_set_impl_fastest(uint32_t id);
extern void blake3_impl_set_fastest(uint32_t id);
/* set implementation by id */
extern void blake3_set_impl_id(uint32_t id);
extern void blake3_impl_setid(uint32_t id);
/* set implementation by name */
extern int blake3_set_impl_name(const char *name);
/* set startup implementation */
extern void blake3_setup_impl(void);
extern int blake3_impl_setname(const char *name);
#ifdef __cplusplus
}

View File

@ -129,7 +129,7 @@ static output_t make_output(const uint32_t input_cv[8],
* bytes. For that reason, chaining values in the CV stack are represented as
* bytes.
*/
static void output_chaining_value(const blake3_impl_ops_t *ops,
static void output_chaining_value(const blake3_ops_t *ops,
const output_t *ctx, uint8_t cv[32])
{
uint32_t cv_words[8];
@ -139,7 +139,7 @@ static void output_chaining_value(const blake3_impl_ops_t *ops,
store_cv_words(cv, cv_words);
}
static void output_root_bytes(const blake3_impl_ops_t *ops, const output_t *ctx,
static void output_root_bytes(const blake3_ops_t *ops, const output_t *ctx,
uint64_t seek, uint8_t *out, size_t out_len)
{
uint64_t output_block_counter = seek / 64;
@ -163,7 +163,7 @@ static void output_root_bytes(const blake3_impl_ops_t *ops, const output_t *ctx,
}
}
static void chunk_state_update(const blake3_impl_ops_t *ops,
static void chunk_state_update(const blake3_ops_t *ops,
blake3_chunk_state_t *ctx, const uint8_t *input, size_t input_len)
{
if (ctx->buf_len > 0) {
@ -230,7 +230,7 @@ static size_t left_len(size_t content_len)
* number of chunks hashed. These chunks are never the root and never empty;
* those cases use a different codepath.
*/
static size_t compress_chunks_parallel(const blake3_impl_ops_t *ops,
static size_t compress_chunks_parallel(const blake3_ops_t *ops,
const uint8_t *input, size_t input_len, const uint32_t key[8],
uint64_t chunk_counter, uint8_t flags, uint8_t *out)
{
@ -274,7 +274,7 @@ static size_t compress_chunks_parallel(const blake3_impl_ops_t *ops,
* return it as an additional output.) These parents are never the root and
* never empty; those cases use a different codepath.
*/
static size_t compress_parents_parallel(const blake3_impl_ops_t *ops,
static size_t compress_parents_parallel(const blake3_ops_t *ops,
const uint8_t *child_chaining_values, size_t num_chaining_values,
const uint32_t key[8], uint8_t flags, uint8_t *out)
{
@ -320,7 +320,7 @@ static size_t compress_parents_parallel(const blake3_impl_ops_t *ops,
* of implementing this special rule? Because we don't want to limit SIMD or
* multi-threading parallelism for that update().
*/
static size_t blake3_compress_subtree_wide(const blake3_impl_ops_t *ops,
static size_t blake3_compress_subtree_wide(const blake3_ops_t *ops,
const uint8_t *input, size_t input_len, const uint32_t key[8],
uint64_t chunk_counter, uint8_t flags, uint8_t *out)
{
@ -406,7 +406,7 @@ static size_t blake3_compress_subtree_wide(const blake3_impl_ops_t *ops,
* As with compress_subtree_wide(), this function is not used on inputs of 1
* chunk or less. That's a different codepath.
*/
static void compress_subtree_to_parent_node(const blake3_impl_ops_t *ops,
static void compress_subtree_to_parent_node(const blake3_ops_t *ops,
const uint8_t *input, size_t input_len, const uint32_t key[8],
uint64_t chunk_counter, uint8_t flags, uint8_t out[2 * BLAKE3_OUT_LEN])
{

View File

@ -192,7 +192,7 @@ static inline boolean_t blake3_is_generic_supported(void)
return (B_TRUE);
}
const blake3_impl_ops_t blake3_generic_impl = {
const blake3_ops_t blake3_generic_impl = {
.compress_in_place = blake3_compress_in_place_generic,
.compress_xof = blake3_compress_xof_generic,
.hash_many = blake3_hash_many_generic,

View File

@ -28,7 +28,7 @@
#include "blake3_impl.h"
static const blake3_impl_ops_t *const blake3_impls[] = {
static const blake3_ops_t *const blake3_impls[] = {
&blake3_generic_impl,
#if defined(__aarch64__) || \
(defined(__x86_64) && defined(HAVE_SSE2)) || \
@ -48,160 +48,199 @@ static const blake3_impl_ops_t *const blake3_impls[] = {
#endif
};
/* this pointer holds current ops for implementation */
static const blake3_impl_ops_t *blake3_selected_impl = &blake3_generic_impl;
/* special implementation selections */
/* Select BLAKE3 implementation */
#define IMPL_FASTEST (UINT32_MAX)
#define IMPL_CYCLE (UINT32_MAX-1)
#define IMPL_USER (UINT32_MAX-2)
#define IMPL_PARAM (UINT32_MAX-3)
#define IMPL_CYCLE (UINT32_MAX - 1)
#define IMPL_READ(i) (*(volatile uint32_t *) &(i))
static uint32_t icp_blake3_impl = IMPL_FASTEST;
#define BLAKE3_IMPL_NAME_MAX 16
/* Indicate that benchmark has been done */
static boolean_t blake3_initialized = B_FALSE;
/* id of fastest implementation */
static uint32_t blake3_fastest_id = 0;
/* Implementation that contains the fastest methods */
static blake3_ops_t blake3_fastest_impl = {
.name = "fastest"
};
/* currently used id */
static uint32_t blake3_current_id = 0;
/* Hold all supported implementations */
static const blake3_ops_t *blake3_supp_impls[ARRAY_SIZE(blake3_impls)];
static uint32_t blake3_supp_impls_cnt = 0;
/* id of module parameter (-1 == unused) */
static int blake3_param_id = -1;
/* Currently selected implementation */
static uint32_t blake3_impl_chosen = IMPL_FASTEST;
/* return number of supported implementations */
int
blake3_get_impl_count(void)
static struct blake3_impl_selector {
const char *name;
uint32_t sel;
} blake3_impl_selectors[] = {
{ "cycle", IMPL_CYCLE },
{ "fastest", IMPL_FASTEST }
};
/* check the supported implementations */
static void blake3_impl_init(void)
{
static int impls = 0;
int i;
int i, c;
if (impls)
return (impls);
/* init only once */
if (likely(blake3_initialized))
return;
for (i = 0; i < ARRAY_SIZE(blake3_impls); i++) {
if (!blake3_impls[i]->is_supported()) continue;
impls++;
/* move supported implementations into blake3_supp_impls */
for (i = 0, c = 0; i < ARRAY_SIZE(blake3_impls); i++) {
const blake3_ops_t *impl = blake3_impls[i];
if (impl->is_supported && impl->is_supported())
blake3_supp_impls[c++] = impl;
}
blake3_supp_impls_cnt = c;
return (impls);
/* first init generic impl, may be changed via set_fastest() */
memcpy(&blake3_fastest_impl, blake3_impls[0],
sizeof (blake3_fastest_impl));
blake3_initialized = B_TRUE;
}
/* return id of selected implementation */
int
blake3_get_impl_id(void)
/* get number of supported implementations */
uint32_t
blake3_impl_getcnt(void)
{
return (blake3_current_id);
blake3_impl_init();
return (blake3_supp_impls_cnt);
}
/* return name of selected implementation */
/* get id of selected implementation */
uint32_t
blake3_impl_getid(void)
{
return (IMPL_READ(blake3_impl_chosen));
}
/* get name of selected implementation */
const char *
blake3_get_impl_name(void)
blake3_impl_getname(void)
{
return (blake3_selected_impl->name);
uint32_t impl = IMPL_READ(blake3_impl_chosen);
blake3_impl_init();
switch (impl) {
case IMPL_FASTEST:
return ("fastest");
case IMPL_CYCLE:
return ("cycle");
default:
return (blake3_supp_impls[impl]->name);
}
}
/* setup id as fastest implementation */
void
blake3_set_impl_fastest(uint32_t id)
blake3_impl_set_fastest(uint32_t id)
{
blake3_fastest_id = id;
/* setup fastest impl */
memcpy(&blake3_fastest_impl, blake3_supp_impls[id],
sizeof (blake3_fastest_impl));
}
/* set implementation by id */
void
blake3_set_impl_id(uint32_t id)
blake3_impl_setid(uint32_t id)
{
int i, cid;
/* select fastest */
if (id == IMPL_FASTEST)
id = blake3_fastest_id;
/* select next or first */
if (id == IMPL_CYCLE)
id = (++blake3_current_id) % blake3_get_impl_count();
/* 0..N for the real impl */
for (i = 0, cid = 0; i < ARRAY_SIZE(blake3_impls); i++) {
if (!blake3_impls[i]->is_supported()) continue;
if (cid == id) {
blake3_current_id = cid;
blake3_selected_impl = blake3_impls[i];
return;
}
cid++;
blake3_impl_init();
switch (id) {
case IMPL_FASTEST:
atomic_swap_32(&blake3_impl_chosen, IMPL_FASTEST);
break;
case IMPL_CYCLE:
atomic_swap_32(&blake3_impl_chosen, IMPL_CYCLE);
break;
default:
ASSERT3U(id, >=, 0);
ASSERT3U(id, <, blake3_supp_impls_cnt);
atomic_swap_32(&blake3_impl_chosen, id);
break;
}
}
/* set implementation by name */
int
blake3_set_impl_name(const char *name)
blake3_impl_setname(const char *val)
{
int i, cid;
uint32_t impl = IMPL_READ(blake3_impl_chosen);
size_t val_len;
int i, err = -EINVAL;
if (strcmp(name, "fastest") == 0) {
atomic_swap_32(&icp_blake3_impl, IMPL_FASTEST);
blake3_set_impl_id(IMPL_FASTEST);
return (0);
} else if (strcmp(name, "cycle") == 0) {
atomic_swap_32(&icp_blake3_impl, IMPL_CYCLE);
blake3_set_impl_id(IMPL_CYCLE);
return (0);
blake3_impl_init();
val_len = strlen(val);
while ((val_len > 0) && !!isspace(val[val_len-1])) /* trim '\n' */
val_len--;
/* check mandatory implementations */
for (i = 0; i < ARRAY_SIZE(blake3_impl_selectors); i++) {
const char *name = blake3_impl_selectors[i].name;
if (val_len == strlen(name) &&
strncmp(val, name, val_len) == 0) {
impl = blake3_impl_selectors[i].sel;
err = 0;
break;
}
}
for (i = 0, cid = 0; i < ARRAY_SIZE(blake3_impls); i++) {
if (!blake3_impls[i]->is_supported()) continue;
if (strcmp(name, blake3_impls[i]->name) == 0) {
if (icp_blake3_impl == IMPL_PARAM) {
blake3_param_id = cid;
return (0);
if (err != 0 && blake3_initialized) {
/* check all supported implementations */
for (i = 0; i < blake3_supp_impls_cnt; i++) {
const char *name = blake3_supp_impls[i]->name;
if (val_len == strlen(name) &&
strncmp(val, name, val_len) == 0) {
impl = i;
err = 0;
break;
}
blake3_selected_impl = blake3_impls[i];
blake3_current_id = cid;
return (0);
}
cid++;
}
return (-EINVAL);
if (err == 0) {
atomic_swap_32(&blake3_impl_chosen, impl);
}
return (err);
}
/* setup implementation */
void
blake3_setup_impl(void)
{
switch (IMPL_READ(icp_blake3_impl)) {
case IMPL_PARAM:
blake3_set_impl_id(blake3_param_id);
atomic_swap_32(&icp_blake3_impl, IMPL_USER);
break;
case IMPL_FASTEST:
blake3_set_impl_id(IMPL_FASTEST);
break;
case IMPL_CYCLE:
blake3_set_impl_id(IMPL_CYCLE);
break;
default:
blake3_set_impl_id(blake3_current_id);
break;
}
}
/* return selected implementation */
const blake3_impl_ops_t *
const blake3_ops_t *
blake3_impl_get_ops(void)
{
/* each call to ops will cycle */
if (icp_blake3_impl == IMPL_CYCLE)
blake3_set_impl_id(IMPL_CYCLE);
const blake3_ops_t *ops = NULL;
uint32_t impl = IMPL_READ(blake3_impl_chosen);
return (blake3_selected_impl);
blake3_impl_init();
switch (impl) {
case IMPL_FASTEST:
ASSERT(blake3_initialized);
ops = &blake3_fastest_impl;
break;
case IMPL_CYCLE:
/* Cycle through supported implementations */
ASSERT(blake3_initialized);
ASSERT3U(blake3_supp_impls_cnt, >, 0);
static uint32_t cycle_count = 0;
uint32_t idx = (++cycle_count) % blake3_supp_impls_cnt;
ops = blake3_supp_impls[idx];
break;
default:
ASSERT3U(blake3_supp_impls_cnt, >, 0);
ASSERT3U(impl, <, blake3_supp_impls_cnt);
ops = blake3_supp_impls[impl];
break;
}
ASSERT3P(ops, !=, NULL);
return (ops);
}
#if defined(_KERNEL)
void **blake3_per_cpu_ctx;
void
@ -215,6 +254,9 @@ blake3_per_cpu_ctx_init(void)
blake3_per_cpu_ctx[i] = kmem_alloc(sizeof (BLAKE3_CTX),
KM_SLEEP);
}
/* init once in kernel mode */
blake3_impl_init();
}
void
@ -227,58 +269,94 @@ blake3_per_cpu_ctx_fini(void)
memset(blake3_per_cpu_ctx, 0, max_ncpus * sizeof (void *));
kmem_free(blake3_per_cpu_ctx, max_ncpus * sizeof (void *));
}
#endif
#if defined(_KERNEL) && defined(__linux__)
static int
icp_blake3_impl_set(const char *name, zfs_kernel_param_t *kp)
{
char req_name[BLAKE3_IMPL_NAME_MAX];
size_t i;
#define IMPL_FMT(impl, i) (((impl) == (i)) ? "[%s] " : "%s ")
/* sanitize input */
i = strnlen(name, BLAKE3_IMPL_NAME_MAX);
if (i == 0 || i >= BLAKE3_IMPL_NAME_MAX)
return (-EINVAL);
strlcpy(req_name, name, BLAKE3_IMPL_NAME_MAX);
while (i > 0 && isspace(req_name[i-1]))
i--;
req_name[i] = '\0';
atomic_swap_32(&icp_blake3_impl, IMPL_PARAM);
return (blake3_set_impl_name(req_name));
}
#if defined(__linux__)
static int
icp_blake3_impl_get(char *buffer, zfs_kernel_param_t *kp)
blake3_param_get(char *buffer, zfs_kernel_param_t *unused)
{
int i, cid, cnt = 0;
const uint32_t impl = IMPL_READ(blake3_impl_chosen);
char *fmt;
int cnt = 0;
/* cycling */
fmt = (icp_blake3_impl == IMPL_CYCLE) ? "[cycle] " : "cycle ";
cnt += sprintf(buffer + cnt, fmt);
fmt = IMPL_FMT(impl, IMPL_CYCLE);
cnt += sprintf(buffer + cnt, fmt, "cycle");
/* fastest one */
fmt = (icp_blake3_impl == IMPL_FASTEST) ? "[fastest] " : "fastest ";
cnt += sprintf(buffer + cnt, fmt);
/* list fastest */
fmt = IMPL_FMT(impl, IMPL_FASTEST);
cnt += sprintf(buffer + cnt, fmt, "fastest");
/* user selected */
for (i = 0, cid = 0; i < ARRAY_SIZE(blake3_impls); i++) {
if (!blake3_impls[i]->is_supported()) continue;
fmt = (icp_blake3_impl == IMPL_USER &&
cid == blake3_current_id) ? "[%s] " : "%s ";
cnt += sprintf(buffer + cnt, fmt, blake3_impls[i]->name);
cid++;
/* list all supported implementations */
for (uint32_t i = 0; i < blake3_supp_impls_cnt; ++i) {
fmt = IMPL_FMT(impl, i);
cnt += sprintf(buffer + cnt, fmt,
blake3_supp_impls[i]->name);
}
buffer[cnt] = 0;
return (cnt);
}
module_param_call(icp_blake3_impl, icp_blake3_impl_set, icp_blake3_impl_get,
NULL, 0644);
MODULE_PARM_DESC(icp_blake3_impl, "Select BLAKE3 implementation.");
static int
blake3_param_set(const char *val, zfs_kernel_param_t *unused)
{
(void) unused;
return (blake3_impl_setname(val));
}
#elif defined(__FreeBSD__)
#include <sys/sbuf.h>
static int
blake3_param(ZFS_MODULE_PARAM_ARGS)
{
int err;
if (req->newptr == NULL) {
const uint32_t impl = IMPL_READ(blake3_impl_chosen);
const int init_buflen = 64;
const char *fmt;
struct sbuf *s;
s = sbuf_new_for_sysctl(NULL, NULL, init_buflen, req);
/* cycling */
fmt = IMPL_FMT(impl, IMPL_CYCLE);
(void) sbuf_printf(s, fmt, "cycle");
/* list fastest */
fmt = IMPL_FMT(impl, IMPL_FASTEST);
(void) sbuf_printf(s, fmt, "fastest");
/* list all supported implementations */
for (uint32_t i = 0; i < blake3_supp_impls_cnt; ++i) {
fmt = IMPL_FMT(impl, i);
(void) sbuf_printf(s, fmt, blake3_supp_impls[i]->name);
}
err = sbuf_finish(s);
sbuf_delete(s);
return (err);
}
char buf[16];
err = sysctl_handle_string(oidp, buf, sizeof (buf), req);
if (err) {
return (err);
}
return (-blake3_impl_setname(buf));
}
#endif
#undef IMPL_FMT
ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs, zfs_, blake3_impl,
blake3_param_set, blake3_param_get, ZMOD_RW, \
"Select BLAKE3 implementation.");
#endif

View File

@ -62,31 +62,31 @@ typedef struct blake3_impl_ops {
blake3_is_supported_f is_supported;
int degree;
const char *name;
} blake3_impl_ops_t;
} blake3_ops_t;
/* Return selected BLAKE3 implementation ops */
extern const blake3_impl_ops_t *blake3_impl_get_ops(void);
extern const blake3_ops_t *blake3_impl_get_ops(void);
extern const blake3_impl_ops_t blake3_generic_impl;
extern const blake3_ops_t blake3_generic_impl;
#if defined(__aarch64__) || \
(defined(__x86_64) && defined(HAVE_SSE2)) || \
(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
extern const blake3_impl_ops_t blake3_sse2_impl;
extern const blake3_ops_t blake3_sse2_impl;
#endif
#if defined(__aarch64__) || \
(defined(__x86_64) && defined(HAVE_SSE4_1)) || \
(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
extern const blake3_impl_ops_t blake3_sse41_impl;
extern const blake3_ops_t blake3_sse41_impl;
#endif
#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
extern const blake3_impl_ops_t blake3_avx2_impl;
extern const blake3_ops_t blake3_avx2_impl;
#endif
#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
extern const blake3_impl_ops_t blake3_avx512_impl;
extern const blake3_ops_t blake3_avx512_impl;
#endif
#if defined(__x86_64)

View File

@ -81,7 +81,7 @@ static boolean_t blake3_is_sse2_supported(void)
#endif
}
const blake3_impl_ops_t blake3_sse2_impl = {
const blake3_ops_t blake3_sse2_impl = {
.compress_in_place = blake3_compress_in_place_sse2,
.compress_xof = blake3_compress_xof_sse2,
.hash_many = blake3_hash_many_sse2,
@ -147,7 +147,7 @@ static boolean_t blake3_is_sse41_supported(void)
#endif
}
const blake3_impl_ops_t blake3_sse41_impl = {
const blake3_ops_t blake3_sse41_impl = {
.compress_in_place = blake3_compress_in_place_sse41,
.compress_xof = blake3_compress_xof_sse41,
.hash_many = blake3_hash_many_sse41,
@ -179,7 +179,7 @@ static boolean_t blake3_is_avx2_supported(void)
zfs_avx2_available());
}
const blake3_impl_ops_t blake3_avx2_impl = {
const blake3_ops_t blake3_avx2_impl = {
.compress_in_place = blake3_compress_in_place_sse41,
.compress_xof = blake3_compress_xof_sse41,
.hash_many = blake3_hash_many_avx2,
@ -237,7 +237,7 @@ static boolean_t blake3_is_avx512_supported(void)
zfs_avx512vl_available());
}
const blake3_impl_ops_t blake3_avx512_impl = {
const blake3_ops_t blake3_avx512_impl = {
.compress_in_place = blake3_compress_in_place_avx512,
.compress_xof = blake3_compress_xof_avx512,
.hash_many = blake3_hash_many_avx512,

View File

@ -244,12 +244,13 @@ chksum_benchmark(void)
#endif
chksum_stat_t *cs;
int cbid = 0, id;
int cbid = 0;
uint64_t max = 0;
uint32_t id, id_save;
/* space for the benchmark times */
chksum_stat_cnt = 4;
chksum_stat_cnt += blake3_get_impl_count();
chksum_stat_cnt += blake3_impl_getcnt();
chksum_stat_data = (chksum_stat_t *)kmem_zalloc(
sizeof (chksum_stat_t) * chksum_stat_cnt, KM_SLEEP);
@ -290,20 +291,24 @@ chksum_benchmark(void)
chksum_benchit(cs);
/* blake3 */
for (id = 0; id < blake3_get_impl_count(); id++) {
blake3_set_impl_id(id);
id_save = blake3_impl_getid();
for (id = 0; id < blake3_impl_getcnt(); id++) {
blake3_impl_setid(id);
cs = &chksum_stat_data[cbid++];
cs->init = abd_checksum_blake3_tmpl_init;
cs->func = abd_checksum_blake3_native;
cs->free = abd_checksum_blake3_tmpl_free;
cs->name = "blake3";
cs->impl = blake3_get_impl_name();
cs->impl = blake3_impl_getname();
chksum_benchit(cs);
if (cs->bs256k > max) {
max = cs->bs256k;
blake3_set_impl_fastest(id);
blake3_impl_set_fastest(id);
}
}
/* restore initial value */
blake3_impl_setid(id_save);
}
void
@ -329,9 +334,6 @@ chksum_init(void)
chksum_kstat_addr);
kstat_install(chksum_kstat);
}
/* setup implementations */
blake3_setup_impl();
}
void

View File

@ -497,9 +497,9 @@ main(int argc, char *argv[])
}
(void) printf("Running algorithm correctness tests:\n");
for (id = 0; id < blake3_get_impl_count(); id++) {
blake3_set_impl_id(id);
const char *name = blake3_get_impl_name();
for (id = 0; id < blake3_impl_getcnt(); id++) {
blake3_impl_setid(id);
const char *name = blake3_impl_getname();
dprintf("Result for BLAKE3-%s:\n", name);
for (i = 0; TestArray[i].hash; i++) {
blake3_test_t *cur = &TestArray[i];
@ -565,9 +565,9 @@ main(int argc, char *argv[])
} while (0)
printf("Running performance tests (hashing 1024 MiB of data):\n");
for (id = 0; id < blake3_get_impl_count(); id++) {
blake3_set_impl_id(id);
const char *name = blake3_get_impl_name();
for (id = 0; id < blake3_impl_getcnt(); id++) {
blake3_impl_setid(id);
const char *name = blake3_impl_getname();
BLAKE3_PERF_TEST(name, 256);
}