Fix BLAKE3 tuneable and module loading on Linux and FreeBSD

Apply similar options to BLAKE3 as it is done for zfs_fletcher_4_impl.

The zfs module parameter on Linux changes from icp_blake3_impl to
zfs_blake3_impl.

You can check and set it on Linux via sysfs like this:
```
[bash]# cat /sys/module/zfs/parameters/zfs_blake3_impl
cycle [fastest] generic sse2 sse41 avx2

[bash]# echo sse2 > /sys/module/zfs/parameters/zfs_blake3_impl
[bash]# cat /sys/module/zfs/parameters/zfs_blake3_impl
cycle fastest generic [sse2] sse41 avx2
```

The modprobe module parameters may also be used now:
```
[bash]# modprobe zfs zfs_blake3_impl=sse41
[bash]# cat /sys/module/zfs/parameters/zfs_blake3_impl
cycle fastest generic sse2 [sse41] avx2
```

On FreeBSD the BLAKE3 implementation can be set via sysctl like this:
```
[bsd]# sysctl vfs.zfs.blake3_impl
vfs.zfs.blake3_impl: cycle [fastest] generic sse2 sse41 avx2
[bsd]# sysctl vfs.zfs.blake3_impl=sse2
vfs.zfs.blake3_impl: cycle [fastest] generic sse2 sse41 avx2 \
  -> cycle fastest generic [sse2] sse41 avx2
```

This commit changes also some Blake3 internals like these:
- blake3_impl_ops_t was renamed to blake3_ops_t
- all functions are named blake3_impl_NAME() now

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Co-authored-by: Ryan Moeller <ryan@iXsystems.com>
Signed-off-by: Tino Reichardt <milky-zfs@mcmilk.de>
Closes #13725
This commit is contained in:
Tino Reichardt
2022-08-03 18:36:41 +02:00
committed by Brian Behlendorf
parent 7dee043af5
commit 75e8b5ad84
10 changed files with 273 additions and 197 deletions
+7 -7
View File
@@ -129,7 +129,7 @@ static output_t make_output(const uint32_t input_cv[8],
* bytes. For that reason, chaining values in the CV stack are represented as
* bytes.
*/
static void output_chaining_value(const blake3_impl_ops_t *ops,
static void output_chaining_value(const blake3_ops_t *ops,
const output_t *ctx, uint8_t cv[32])
{
uint32_t cv_words[8];
@@ -139,7 +139,7 @@ static void output_chaining_value(const blake3_impl_ops_t *ops,
store_cv_words(cv, cv_words);
}
static void output_root_bytes(const blake3_impl_ops_t *ops, const output_t *ctx,
static void output_root_bytes(const blake3_ops_t *ops, const output_t *ctx,
uint64_t seek, uint8_t *out, size_t out_len)
{
uint64_t output_block_counter = seek / 64;
@@ -163,7 +163,7 @@ static void output_root_bytes(const blake3_impl_ops_t *ops, const output_t *ctx,
}
}
static void chunk_state_update(const blake3_impl_ops_t *ops,
static void chunk_state_update(const blake3_ops_t *ops,
blake3_chunk_state_t *ctx, const uint8_t *input, size_t input_len)
{
if (ctx->buf_len > 0) {
@@ -230,7 +230,7 @@ static size_t left_len(size_t content_len)
* number of chunks hashed. These chunks are never the root and never empty;
* those cases use a different codepath.
*/
static size_t compress_chunks_parallel(const blake3_impl_ops_t *ops,
static size_t compress_chunks_parallel(const blake3_ops_t *ops,
const uint8_t *input, size_t input_len, const uint32_t key[8],
uint64_t chunk_counter, uint8_t flags, uint8_t *out)
{
@@ -274,7 +274,7 @@ static size_t compress_chunks_parallel(const blake3_impl_ops_t *ops,
* return it as an additional output.) These parents are never the root and
* never empty; those cases use a different codepath.
*/
static size_t compress_parents_parallel(const blake3_impl_ops_t *ops,
static size_t compress_parents_parallel(const blake3_ops_t *ops,
const uint8_t *child_chaining_values, size_t num_chaining_values,
const uint32_t key[8], uint8_t flags, uint8_t *out)
{
@@ -320,7 +320,7 @@ static size_t compress_parents_parallel(const blake3_impl_ops_t *ops,
* of implementing this special rule? Because we don't want to limit SIMD or
* multi-threading parallelism for that update().
*/
static size_t blake3_compress_subtree_wide(const blake3_impl_ops_t *ops,
static size_t blake3_compress_subtree_wide(const blake3_ops_t *ops,
const uint8_t *input, size_t input_len, const uint32_t key[8],
uint64_t chunk_counter, uint8_t flags, uint8_t *out)
{
@@ -406,7 +406,7 @@ static size_t blake3_compress_subtree_wide(const blake3_impl_ops_t *ops,
* As with compress_subtree_wide(), this function is not used on inputs of 1
* chunk or less. That's a different codepath.
*/
static void compress_subtree_to_parent_node(const blake3_impl_ops_t *ops,
static void compress_subtree_to_parent_node(const blake3_ops_t *ops,
const uint8_t *input, size_t input_len, const uint32_t key[8],
uint64_t chunk_counter, uint8_t flags, uint8_t out[2 * BLAKE3_OUT_LEN])
{