mirror_zfs/module/icp/algs/blake3/blake3_x86-64.c
Tino Reichardt 75e8b5ad84 Fix BLAKE3 tuneable and module loading on Linux and FreeBSD
Apply similar options to BLAKE3 as it is done for zfs_fletcher_4_impl.

The zfs module parameter on Linux changes from icp_blake3_impl to
zfs_blake3_impl.

You can check and set it on Linux via sysfs like this:
```
[bash]# cat /sys/module/zfs/parameters/zfs_blake3_impl
cycle [fastest] generic sse2 sse41 avx2

[bash]# echo sse2 > /sys/module/zfs/parameters/zfs_blake3_impl
[bash]# cat /sys/module/zfs/parameters/zfs_blake3_impl
cycle fastest generic [sse2] sse41 avx2
```

The modprobe module parameters may also be used now:
```
[bash]# modprobe zfs zfs_blake3_impl=sse41
[bash]# cat /sys/module/zfs/parameters/zfs_blake3_impl
cycle fastest generic sse2 [sse41] avx2
```

On FreeBSD the BLAKE3 implementation can be set via sysctl like this:
```
[bsd]# sysctl vfs.zfs.blake3_impl
vfs.zfs.blake3_impl: cycle [fastest] generic sse2 sse41 avx2
[bsd]# sysctl vfs.zfs.blake3_impl=sse2
vfs.zfs.blake3_impl: cycle [fastest] generic sse2 sse41 avx2 \
  -> cycle fastest generic [sse2] sse41 avx2
```

This commit changes also some Blake3 internals like these:
- blake3_impl_ops_t was renamed to blake3_ops_t
- all functions are named blake3_impl_NAME() now

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Co-authored-by: Ryan Moeller <ryan@iXsystems.com>
Signed-off-by: Tino Reichardt <milky-zfs@mcmilk.de>
Closes #13725
2022-09-16 14:25:53 -07:00

249 lines
8.4 KiB
C

/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
*/
#include "blake3_impl.h"
#if defined(__aarch64__) || \
(defined(__x86_64) && defined(HAVE_SSE2)) || \
(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
extern void zfs_blake3_compress_in_place_sse2(uint32_t cv[8],
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
uint64_t counter, uint8_t flags);
extern void zfs_blake3_compress_xof_sse2(const uint32_t cv[8],
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
uint64_t counter, uint8_t flags, uint8_t out[64]);
extern void zfs_blake3_hash_many_sse2(const uint8_t * const *inputs,
size_t num_inputs, size_t blocks, const uint32_t key[8],
uint64_t counter, boolean_t increment_counter, uint8_t flags,
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
static void blake3_compress_in_place_sse2(uint32_t cv[8],
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
uint64_t counter, uint8_t flags) {
kfpu_begin();
zfs_blake3_compress_in_place_sse2(cv, block, block_len, counter,
flags);
kfpu_end();
}
static void blake3_compress_xof_sse2(const uint32_t cv[8],
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
uint64_t counter, uint8_t flags, uint8_t out[64]) {
kfpu_begin();
zfs_blake3_compress_xof_sse2(cv, block, block_len, counter, flags,
out);
kfpu_end();
}
static void blake3_hash_many_sse2(const uint8_t * const *inputs,
size_t num_inputs, size_t blocks, const uint32_t key[8],
uint64_t counter, boolean_t increment_counter, uint8_t flags,
uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
kfpu_begin();
zfs_blake3_hash_many_sse2(inputs, num_inputs, blocks, key, counter,
increment_counter, flags, flags_start, flags_end, out);
kfpu_end();
}
static boolean_t blake3_is_sse2_supported(void)
{
#if defined(__x86_64)
return (kfpu_allowed() && zfs_sse2_available());
#elif defined(__PPC64__) && defined(__linux__)
return (kfpu_allowed() && zfs_vsx_available());
#else
return (kfpu_allowed());
#endif
}
const blake3_ops_t blake3_sse2_impl = {
.compress_in_place = blake3_compress_in_place_sse2,
.compress_xof = blake3_compress_xof_sse2,
.hash_many = blake3_hash_many_sse2,
.is_supported = blake3_is_sse2_supported,
.degree = 4,
.name = "sse2"
};
#endif
#if defined(__aarch64__) || \
(defined(__x86_64) && defined(HAVE_SSE2)) || \
(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
extern void zfs_blake3_compress_in_place_sse41(uint32_t cv[8],
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
uint64_t counter, uint8_t flags);
extern void zfs_blake3_compress_xof_sse41(const uint32_t cv[8],
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
uint64_t counter, uint8_t flags, uint8_t out[64]);
extern void zfs_blake3_hash_many_sse41(const uint8_t * const *inputs,
size_t num_inputs, size_t blocks, const uint32_t key[8],
uint64_t counter, boolean_t increment_counter, uint8_t flags,
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
static void blake3_compress_in_place_sse41(uint32_t cv[8],
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
uint64_t counter, uint8_t flags) {
kfpu_begin();
zfs_blake3_compress_in_place_sse41(cv, block, block_len, counter,
flags);
kfpu_end();
}
static void blake3_compress_xof_sse41(const uint32_t cv[8],
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
uint64_t counter, uint8_t flags, uint8_t out[64]) {
kfpu_begin();
zfs_blake3_compress_xof_sse41(cv, block, block_len, counter, flags,
out);
kfpu_end();
}
static void blake3_hash_many_sse41(const uint8_t * const *inputs,
size_t num_inputs, size_t blocks, const uint32_t key[8],
uint64_t counter, boolean_t increment_counter, uint8_t flags,
uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
kfpu_begin();
zfs_blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter,
increment_counter, flags, flags_start, flags_end, out);
kfpu_end();
}
static boolean_t blake3_is_sse41_supported(void)
{
#if defined(__x86_64)
return (kfpu_allowed() && zfs_sse4_1_available());
#elif defined(__PPC64__) && defined(__linux__)
return (kfpu_allowed() && zfs_vsx_available());
#else
return (kfpu_allowed());
#endif
}
const blake3_ops_t blake3_sse41_impl = {
.compress_in_place = blake3_compress_in_place_sse41,
.compress_xof = blake3_compress_xof_sse41,
.hash_many = blake3_hash_many_sse41,
.is_supported = blake3_is_sse41_supported,
.degree = 4,
.name = "sse41"
};
#endif
#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
extern void zfs_blake3_hash_many_avx2(const uint8_t * const *inputs,
size_t num_inputs, size_t blocks, const uint32_t key[8],
uint64_t counter, boolean_t increment_counter, uint8_t flags,
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
static void blake3_hash_many_avx2(const uint8_t * const *inputs,
size_t num_inputs, size_t blocks, const uint32_t key[8],
uint64_t counter, boolean_t increment_counter, uint8_t flags,
uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
kfpu_begin();
zfs_blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter,
increment_counter, flags, flags_start, flags_end, out);
kfpu_end();
}
static boolean_t blake3_is_avx2_supported(void)
{
return (kfpu_allowed() && zfs_sse4_1_available() &&
zfs_avx2_available());
}
const blake3_ops_t blake3_avx2_impl = {
.compress_in_place = blake3_compress_in_place_sse41,
.compress_xof = blake3_compress_xof_sse41,
.hash_many = blake3_hash_many_avx2,
.is_supported = blake3_is_avx2_supported,
.degree = 8,
.name = "avx2"
};
#endif
#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
extern void zfs_blake3_compress_in_place_avx512(uint32_t cv[8],
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
uint64_t counter, uint8_t flags);
extern void zfs_blake3_compress_xof_avx512(const uint32_t cv[8],
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
uint64_t counter, uint8_t flags, uint8_t out[64]);
extern void zfs_blake3_hash_many_avx512(const uint8_t * const *inputs,
size_t num_inputs, size_t blocks, const uint32_t key[8],
uint64_t counter, boolean_t increment_counter, uint8_t flags,
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
static void blake3_compress_in_place_avx512(uint32_t cv[8],
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
uint64_t counter, uint8_t flags) {
kfpu_begin();
zfs_blake3_compress_in_place_avx512(cv, block, block_len, counter,
flags);
kfpu_end();
}
static void blake3_compress_xof_avx512(const uint32_t cv[8],
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
uint64_t counter, uint8_t flags, uint8_t out[64]) {
kfpu_begin();
zfs_blake3_compress_xof_avx512(cv, block, block_len, counter, flags,
out);
kfpu_end();
}
static void blake3_hash_many_avx512(const uint8_t * const *inputs,
size_t num_inputs, size_t blocks, const uint32_t key[8],
uint64_t counter, boolean_t increment_counter, uint8_t flags,
uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
kfpu_begin();
zfs_blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
increment_counter, flags, flags_start, flags_end, out);
kfpu_end();
}
static boolean_t blake3_is_avx512_supported(void)
{
return (kfpu_allowed() && zfs_avx512f_available() &&
zfs_avx512vl_available());
}
const blake3_ops_t blake3_avx512_impl = {
.compress_in_place = blake3_compress_in_place_avx512,
.compress_xof = blake3_compress_xof_avx512,
.hash_many = blake3_hash_many_avx512,
.is_supported = blake3_is_avx512_supported,
.degree = 16,
.name = "avx512"
};
#endif