Use fletcher_4 routines natively with abd_iterate_func()

This patch adds the necessary infrastructure for ABD to make use
of the vectorized fletcher 4 routines.

- export ABD compatible interface from fletcher_4
- add ABD fletcher_4 tests for data and metadata ABD types.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Original-patch-by: Gvozden Neskovic <neskovic@gmail.com>
Signed-off-by: David Quigley <david.quigley@intel.com>
Closes #5589
This commit is contained in:
David Quigley 2017-02-01 10:34:22 -07:00 committed by Brian Behlendorf
parent 544b8053db
commit 2fe36b0bfb
4 changed files with 167 additions and 8 deletions

View File

@ -5680,6 +5680,7 @@ ztest_fletcher(ztest_ds_t *zd, uint64_t id)
while (gethrtime() <= end) { while (gethrtime() <= end) {
int run_count = 100; int run_count = 100;
void *buf; void *buf;
struct abd *abd_data, *abd_meta;
uint32_t size; uint32_t size;
int *ptr; int *ptr;
int i; int i;
@ -5687,11 +5688,17 @@ ztest_fletcher(ztest_ds_t *zd, uint64_t id)
zio_cksum_t zc_ref_byteswap; zio_cksum_t zc_ref_byteswap;
size = ztest_random_blocksize(); size = ztest_random_blocksize();
buf = umem_alloc(size, UMEM_NOFAIL); buf = umem_alloc(size, UMEM_NOFAIL);
abd_data = abd_alloc(size, B_FALSE);
abd_meta = abd_alloc(size, B_TRUE);
for (i = 0, ptr = buf; i < size / sizeof (*ptr); i++, ptr++) for (i = 0, ptr = buf; i < size / sizeof (*ptr); i++, ptr++)
*ptr = ztest_random(UINT_MAX); *ptr = ztest_random(UINT_MAX);
abd_copy_from_buf_off(abd_data, buf, 0, size);
abd_copy_from_buf_off(abd_meta, buf, 0, size);
VERIFY0(fletcher_4_impl_set("scalar")); VERIFY0(fletcher_4_impl_set("scalar"));
fletcher_4_native(buf, size, NULL, &zc_ref); fletcher_4_native(buf, size, NULL, &zc_ref);
fletcher_4_byteswap(buf, size, NULL, &zc_ref_byteswap); fletcher_4_byteswap(buf, size, NULL, &zc_ref_byteswap);
@ -5707,9 +5714,30 @@ ztest_fletcher(ztest_ds_t *zd, uint64_t id)
VERIFY0(bcmp(&zc, &zc_ref, sizeof (zc))); VERIFY0(bcmp(&zc, &zc_ref, sizeof (zc)));
VERIFY0(bcmp(&zc_byteswap, &zc_ref_byteswap, VERIFY0(bcmp(&zc_byteswap, &zc_ref_byteswap,
sizeof (zc_byteswap))); sizeof (zc_byteswap)));
/* Test ABD - data */
abd_fletcher_4_byteswap(abd_data, size, NULL,
&zc_byteswap);
abd_fletcher_4_native(abd_data, size, NULL, &zc);
VERIFY0(bcmp(&zc, &zc_ref, sizeof (zc)));
VERIFY0(bcmp(&zc_byteswap, &zc_ref_byteswap,
sizeof (zc_byteswap)));
/* Test ABD - metadata */
abd_fletcher_4_byteswap(abd_meta, size, NULL,
&zc_byteswap);
abd_fletcher_4_native(abd_meta, size, NULL, &zc);
VERIFY0(bcmp(&zc, &zc_ref, sizeof (zc)));
VERIFY0(bcmp(&zc_byteswap, &zc_ref_byteswap,
sizeof (zc_byteswap)));
} }
umem_free(buf, size); umem_free(buf, size);
abd_free(abd_data);
abd_free(abd_meta);
} }
} }

View File

@ -29,6 +29,7 @@
#include <sys/zio.h> #include <sys/zio.h>
#include <zfeature_common.h> #include <zfeature_common.h>
#include <zfs_fletcher.h>
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
@ -57,6 +58,28 @@ typedef enum zio_checksum_flags {
ZCHECKSUM_FLAG_NOPWRITE = (1 << 5) ZCHECKSUM_FLAG_NOPWRITE = (1 << 5)
} zio_checksum_flags_t; } zio_checksum_flags_t;
typedef enum {
ZIO_CHECKSUM_NATIVE,
ZIO_CHECKSUM_BYTESWAP
} zio_byteorder_t;
typedef struct zio_abd_checksum_data {
zio_byteorder_t acd_byteorder;
fletcher_4_ctx_t *acd_ctx;
zio_cksum_t *acd_zcp;
void *acd_private;
} zio_abd_checksum_data_t;
typedef void zio_abd_checksum_init_t(zio_abd_checksum_data_t *);
typedef void zio_abd_checksum_fini_t(zio_abd_checksum_data_t *);
typedef int zio_abd_checksum_iter_t(void *, size_t, void *);
typedef const struct zio_abd_checksum_func {
zio_abd_checksum_init_t *acf_init;
zio_abd_checksum_fini_t *acf_fini;
zio_abd_checksum_iter_t *acf_iter;
} zio_abd_checksum_func_t;
/* /*
* Information about each checksum function. * Information about each checksum function.
*/ */
@ -99,6 +122,10 @@ extern zio_checksum_t abd_checksum_edonr_byteswap;
extern zio_checksum_tmpl_init_t abd_checksum_edonr_tmpl_init; extern zio_checksum_tmpl_init_t abd_checksum_edonr_tmpl_init;
extern zio_checksum_tmpl_free_t abd_checksum_edonr_tmpl_free; extern zio_checksum_tmpl_free_t abd_checksum_edonr_tmpl_free;
extern zio_abd_checksum_func_t fletcher_4_abd_ops;
extern zio_checksum_t abd_fletcher_4_native;
extern zio_checksum_t abd_fletcher_4_byteswap;
extern int zio_checksum_equal(spa_t *, blkptr_t *, enum zio_checksum, extern int zio_checksum_equal(spa_t *, blkptr_t *, enum zio_checksum,
void *, uint64_t, uint64_t, zio_bad_cksum_t *); void *, uint64_t, uint64_t, zio_bad_cksum_t *);
extern void zio_checksum_compute(zio_t *, enum zio_checksum, extern void zio_checksum_compute(zio_t *, enum zio_checksum,

View File

@ -141,6 +141,7 @@
#include <sys/zfs_context.h> #include <sys/zfs_context.h>
#include <zfs_fletcher.h> #include <zfs_fletcher.h>
#define FLETCHER_MIN_SIMD_SIZE 64
static void fletcher_4_scalar_init(fletcher_4_ctx_t *ctx); static void fletcher_4_scalar_init(fletcher_4_ctx_t *ctx);
static void fletcher_4_scalar_fini(fletcher_4_ctx_t *ctx, zio_cksum_t *zcp); static void fletcher_4_scalar_fini(fletcher_4_ctx_t *ctx, zio_cksum_t *zcp);
@ -456,7 +457,7 @@ void
fletcher_4_native(const void *buf, uint64_t size, fletcher_4_native(const void *buf, uint64_t size,
const void *ctx_template, zio_cksum_t *zcp) const void *ctx_template, zio_cksum_t *zcp)
{ {
const uint64_t p2size = P2ALIGN(size, 64); const uint64_t p2size = P2ALIGN(size, FLETCHER_MIN_SIMD_SIZE);
ASSERT(IS_P2ALIGNED(size, sizeof (uint32_t))); ASSERT(IS_P2ALIGNED(size, sizeof (uint32_t)));
@ -498,7 +499,7 @@ void
fletcher_4_byteswap(const void *buf, uint64_t size, fletcher_4_byteswap(const void *buf, uint64_t size,
const void *ctx_template, zio_cksum_t *zcp) const void *ctx_template, zio_cksum_t *zcp)
{ {
const uint64_t p2size = P2ALIGN(size, 64); const uint64_t p2size = P2ALIGN(size, FLETCHER_MIN_SIMD_SIZE);
ASSERT(IS_P2ALIGNED(size, sizeof (uint32_t))); ASSERT(IS_P2ALIGNED(size, sizeof (uint32_t)));
@ -778,6 +779,87 @@ fletcher_4_fini(void)
} }
} }
/* ABD adapters */
static void
abd_fletcher_4_init(zio_abd_checksum_data_t *cdp)
{
const fletcher_4_ops_t *ops = fletcher_4_impl_get();
cdp->acd_private = (void *) ops;
if (cdp->acd_byteorder == ZIO_CHECKSUM_NATIVE)
ops->init_native(cdp->acd_ctx);
else
ops->init_byteswap(cdp->acd_ctx);
}
static void
abd_fletcher_4_fini(zio_abd_checksum_data_t *cdp)
{
fletcher_4_ops_t *ops = (fletcher_4_ops_t *)cdp->acd_private;
ASSERT(ops);
if (cdp->acd_byteorder == ZIO_CHECKSUM_NATIVE)
ops->fini_native(cdp->acd_ctx, cdp->acd_zcp);
else
ops->fini_byteswap(cdp->acd_ctx, cdp->acd_zcp);
}
static void
abd_fletcher_4_simd2scalar(boolean_t native, void *data, size_t size,
zio_abd_checksum_data_t *cdp)
{
zio_cksum_t *zcp = cdp->acd_zcp;
ASSERT3U(size, <, FLETCHER_MIN_SIMD_SIZE);
abd_fletcher_4_fini(cdp);
cdp->acd_private = (void *)&fletcher_4_scalar_ops;
if (native)
fletcher_4_incremental_native(data, size, zcp);
else
fletcher_4_incremental_byteswap(data, size, zcp);
}
static int
abd_fletcher_4_iter(void *data, size_t size, void *private)
{
zio_abd_checksum_data_t *cdp = (zio_abd_checksum_data_t *)private;
fletcher_4_ctx_t *ctx = cdp->acd_ctx;
fletcher_4_ops_t *ops = (fletcher_4_ops_t *)cdp->acd_private;
boolean_t native = cdp->acd_byteorder == ZIO_CHECKSUM_NATIVE;
uint64_t asize = P2ALIGN(size, FLETCHER_MIN_SIMD_SIZE);
ASSERT(IS_P2ALIGNED(size, sizeof (uint32_t)));
if (asize > 0) {
if (native)
ops->compute_native(ctx, data, asize);
else
ops->compute_byteswap(ctx, data, asize);
size -= asize;
data = (char *)data + asize;
}
if (size > 0) {
ASSERT3U(size, <, FLETCHER_MIN_SIMD_SIZE);
/* At this point we have to switch to scalar impl */
abd_fletcher_4_simd2scalar(native, data, size, cdp);
}
return (0);
}
zio_abd_checksum_func_t fletcher_4_abd_ops = {
.acf_init = abd_fletcher_4_init,
.acf_fini = abd_fletcher_4_fini,
.acf_iter = abd_fletcher_4_iter
};
#if defined(_KERNEL) && defined(HAVE_SPL) #if defined(_KERNEL) && defined(HAVE_SPL)
#include <linux/mod_compat.h> #include <linux/mod_compat.h>
@ -829,4 +911,5 @@ EXPORT_SYMBOL(fletcher_4_native_varsize);
EXPORT_SYMBOL(fletcher_4_byteswap); EXPORT_SYMBOL(fletcher_4_byteswap);
EXPORT_SYMBOL(fletcher_4_incremental_native); EXPORT_SYMBOL(fletcher_4_incremental_native);
EXPORT_SYMBOL(fletcher_4_incremental_byteswap); EXPORT_SYMBOL(fletcher_4_incremental_byteswap);
EXPORT_SYMBOL(fletcher_4_abd_ops);
#endif #endif

View File

@ -119,14 +119,29 @@ abd_fletcher_2_byteswap(abd_t *abd, uint64_t size,
fletcher_2_incremental_byteswap, zcp); fletcher_2_incremental_byteswap, zcp);
} }
static inline void
abd_fletcher_4_impl(abd_t *abd, uint64_t size, zio_abd_checksum_data_t *acdp)
{
fletcher_4_abd_ops.acf_init(acdp);
abd_iterate_func(abd, 0, size, fletcher_4_abd_ops.acf_iter, acdp);
fletcher_4_abd_ops.acf_fini(acdp);
}
/*ARGSUSED*/ /*ARGSUSED*/
void void
abd_fletcher_4_native(abd_t *abd, uint64_t size, abd_fletcher_4_native(abd_t *abd, uint64_t size,
const void *ctx_template, zio_cksum_t *zcp) const void *ctx_template, zio_cksum_t *zcp)
{ {
fletcher_init(zcp); fletcher_4_ctx_t ctx;
(void) abd_iterate_func(abd, 0, size,
fletcher_4_incremental_native, zcp); zio_abd_checksum_data_t acd = {
.acd_byteorder = ZIO_CHECKSUM_NATIVE,
.acd_zcp = zcp,
.acd_ctx = &ctx
};
abd_fletcher_4_impl(abd, size, &acd);
} }
/*ARGSUSED*/ /*ARGSUSED*/
@ -134,9 +149,15 @@ void
abd_fletcher_4_byteswap(abd_t *abd, uint64_t size, abd_fletcher_4_byteswap(abd_t *abd, uint64_t size,
const void *ctx_template, zio_cksum_t *zcp) const void *ctx_template, zio_cksum_t *zcp)
{ {
fletcher_init(zcp); fletcher_4_ctx_t ctx;
(void) abd_iterate_func(abd, 0, size,
fletcher_4_incremental_byteswap, zcp); zio_abd_checksum_data_t acd = {
.acd_byteorder = ZIO_CHECKSUM_BYTESWAP,
.acd_zcp = zcp,
.acd_ctx = &ctx
};
abd_fletcher_4_impl(abd, size, &acd);
} }
zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = { zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = {