mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-24 19:28:53 +03:00
Fletcher4 implementation using avx512f instruction set
Algorithm runs 8 parallel sums, consuming 8x uint32_t elements per loop iteration. Size alignment of main fletcher4 methods is adjusted accordingly. New implementation is called 'avx512f'. Note: byteswap method can be implemented more efficiently when avx512bw hardware becomes available. Currently, it is ~ 2x slower than native method. Table shows result of full (native) fletcher4 calculation for different buffer size: fletcher4 4KB 16KB 64KB 128KB 256KB 1MB 16MB -------------------------------------------------------------------- [scalar] 1213 1228 1231 1231 1225 1200 1160 [sse2] 2374 2442 2459 2456 2462 2250 2220 [avx2] 4288 4753 4871 4893 4900 4050 3882 [avx512f] 5975 8445 9196 9221 9262 6307 5620 Signed-off-by: Gvozden Neskovic <neskovic@gmail.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Issue #4952
This commit is contained in:
committed by
Brian Behlendorf
parent
32ffaa3de5
commit
70b258fc96
@@ -158,6 +158,9 @@ static const fletcher_4_ops_t *fletcher_4_algos[] = {
|
||||
#if defined(HAVE_AVX) && defined(HAVE_AVX2)
|
||||
&fletcher_4_avx2_ops,
|
||||
#endif
|
||||
#if defined(__x86_64) && defined(HAVE_AVX512F)
|
||||
&fletcher_4_avx512f_ops,
|
||||
#endif
|
||||
};
|
||||
|
||||
static enum fletcher_selector {
|
||||
@@ -171,6 +174,9 @@ static enum fletcher_selector {
|
||||
#endif
|
||||
#if defined(HAVE_AVX) && defined(HAVE_AVX2)
|
||||
FLETCHER_AVX2,
|
||||
#endif
|
||||
#if defined(__x86_64) && defined(HAVE_AVX512F)
|
||||
FLETCHER_AVX512F,
|
||||
#endif
|
||||
FLETCHER_CYCLE
|
||||
} fletcher_4_impl_chosen = FLETCHER_SCALAR;
|
||||
@@ -190,6 +196,9 @@ static struct fletcher_4_impl_selector {
|
||||
#if defined(HAVE_AVX) && defined(HAVE_AVX2)
|
||||
[ FLETCHER_AVX2 ] = { "avx2", &fletcher_4_avx2_ops },
|
||||
#endif
|
||||
#if defined(__x86_64) && defined(HAVE_AVX512F)
|
||||
[ FLETCHER_AVX512F ] = { "avx512f", &fletcher_4_avx512f_ops },
|
||||
#endif
|
||||
#if !defined(_KERNEL)
|
||||
[ FLETCHER_CYCLE ] = { "cycle", &fletcher_4_scalar_ops }
|
||||
#endif
|
||||
@@ -354,7 +363,7 @@ fletcher_4_native(const void *buf, uint64_t size, zio_cksum_t *zcp)
|
||||
{
|
||||
const fletcher_4_ops_t *ops;
|
||||
|
||||
if (IS_P2ALIGNED(size, 4 * sizeof (uint32_t)))
|
||||
if (IS_P2ALIGNED(size, 8 * sizeof (uint32_t)))
|
||||
ops = fletcher_4_impl_get();
|
||||
else
|
||||
ops = &fletcher_4_scalar_ops;
|
||||
@@ -370,7 +379,7 @@ fletcher_4_byteswap(const void *buf, uint64_t size, zio_cksum_t *zcp)
|
||||
{
|
||||
const fletcher_4_ops_t *ops;
|
||||
|
||||
if (IS_P2ALIGNED(size, 4 * sizeof (uint32_t)))
|
||||
if (IS_P2ALIGNED(size, 8 * sizeof (uint32_t)))
|
||||
ops = fletcher_4_impl_get();
|
||||
else
|
||||
ops = &fletcher_4_scalar_ops;
|
||||
|
||||
Reference in New Issue
Block a user