From 9029278dde70a85f720e7a75f4c6f69dcd25ee0f Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Mon, 3 Jul 2023 23:28:46 +1000 Subject: [PATCH] ddt: rework ops interface in terms of keys and values Store objects store keys and values, so have them take those types and nothing more. This way, they don't need to be concerned about the "kind" of entry being operated on; the dispatch layer can take care of the appropriate conversions. This adds a "contains" op to see if a particular entry exists without loading it, which makes a couple of things easier to do; in particular, it allows us to avoid an allocation in ddt_class_contains(). Reviewed-by: Brian Behlendorf Signed-off-by: Rob Norris Sponsored-by: Klara, Inc. Sponsored-by: iXsystems, Inc. Closes #15887 --- include/sys/ddt_impl.h | 20 ++++++++------ module/Makefile.bsd | 1 + module/zfs/ddt.c | 48 ++++++++++++++++++++------------- module/zfs/ddt_zap.c | 61 +++++++++++++++++++++++++----------------- 4 files changed, 78 insertions(+), 52 deletions(-) diff --git a/include/sys/ddt_impl.h b/include/sys/ddt_impl.h index 09937c714..d66936588 100644 --- a/include/sys/ddt_impl.h +++ b/include/sys/ddt_impl.h @@ -41,15 +41,19 @@ typedef struct { int (*ddt_op_create)(objset_t *os, uint64_t *object, dmu_tx_t *tx, boolean_t prehash); int (*ddt_op_destroy)(objset_t *os, uint64_t object, dmu_tx_t *tx); - int (*ddt_op_lookup)(objset_t *os, uint64_t object, ddt_entry_t *dde); + int (*ddt_op_lookup)(objset_t *os, uint64_t object, + const ddt_key_t *ddk, ddt_phys_t *phys, size_t psize); + int (*ddt_op_contains)(objset_t *os, uint64_t object, + const ddt_key_t *ddk); void (*ddt_op_prefetch)(objset_t *os, uint64_t object, - ddt_entry_t *dde); - int (*ddt_op_update)(objset_t *os, uint64_t object, ddt_entry_t *dde, + const ddt_key_t *ddk); + int (*ddt_op_update)(objset_t *os, uint64_t object, + const ddt_key_t *ddk, const ddt_phys_t *phys, size_t psize, dmu_tx_t *tx); - int (*ddt_op_remove)(objset_t *os, uint64_t object, ddt_entry_t *dde, - dmu_tx_t *tx); - int (*ddt_op_walk)(objset_t *os, uint64_t object, ddt_entry_t *dde, - uint64_t *walk); + int (*ddt_op_remove)(objset_t *os, uint64_t object, + const ddt_key_t *ddk, dmu_tx_t *tx); + int (*ddt_op_walk)(objset_t *os, uint64_t object, uint64_t *walk, + ddt_key_t *ddk, ddt_phys_t *phys, size_t psize); int (*ddt_op_count)(objset_t *os, uint64_t object, uint64_t *count); } ddt_ops_t; @@ -62,7 +66,7 @@ extern void ddt_stat_update(ddt_t *ddt, ddt_entry_t *dde, uint64_t neg); * outside of the DDT implementation proper, and if you do, consider moving * them up. */ -#define DDT_NAMELEN 107 +#define DDT_NAMELEN 110 extern uint64_t ddt_phys_total_refcnt(const ddt_entry_t *dde); diff --git a/module/Makefile.bsd b/module/Makefile.bsd index 1b0110d3a..e9ad69fc5 100644 --- a/module/Makefile.bsd +++ b/module/Makefile.bsd @@ -421,6 +421,7 @@ CFLAGS.gcc+= -Wno-pointer-to-int-cast CFLAGS.abd.c= -Wno-cast-qual CFLAGS.ddt.c= -Wno-cast-qual +CFLAGS.ddt_zap.c= -Wno-cast-qual CFLAGS.dmu.c= -Wno-cast-qual CFLAGS.dmu_traverse.c= -Wno-cast-qual CFLAGS.dnode.c= ${NO_WUNUSED_BUT_SET_VARIABLE} diff --git a/module/zfs/ddt.c b/module/zfs/ddt.c index 1b7063998..df79de74e 100644 --- a/module/zfs/ddt.c +++ b/module/zfs/ddt.c @@ -186,18 +186,30 @@ ddt_object_lookup(ddt_t *ddt, ddt_type_t type, ddt_class_t class, return (SET_ERROR(ENOENT)); return (ddt_ops[type]->ddt_op_lookup(ddt->ddt_os, - ddt->ddt_object[type][class], dde)); + ddt->ddt_object[type][class], &dde->dde_key, + dde->dde_phys, sizeof (dde->dde_phys))); +} + +static int +ddt_object_contains(ddt_t *ddt, ddt_type_t type, ddt_class_t class, + const ddt_key_t *ddk) +{ + if (!ddt_object_exists(ddt, type, class)) + return (SET_ERROR(ENOENT)); + + return (ddt_ops[type]->ddt_op_contains(ddt->ddt_os, + ddt->ddt_object[type][class], ddk)); } static void ddt_object_prefetch(ddt_t *ddt, ddt_type_t type, ddt_class_t class, - ddt_entry_t *dde) + const ddt_key_t *ddk) { if (!ddt_object_exists(ddt, type, class)) return; ddt_ops[type]->ddt_op_prefetch(ddt->ddt_os, - ddt->ddt_object[type][class], dde); + ddt->ddt_object[type][class], ddk); } static int @@ -207,17 +219,18 @@ ddt_object_update(ddt_t *ddt, ddt_type_t type, ddt_class_t class, ASSERT(ddt_object_exists(ddt, type, class)); return (ddt_ops[type]->ddt_op_update(ddt->ddt_os, - ddt->ddt_object[type][class], dde, tx)); + ddt->ddt_object[type][class], &dde->dde_key, dde->dde_phys, + sizeof (dde->dde_phys), tx)); } static int ddt_object_remove(ddt_t *ddt, ddt_type_t type, ddt_class_t class, - ddt_entry_t *dde, dmu_tx_t *tx) + const ddt_key_t *ddk, dmu_tx_t *tx) { ASSERT(ddt_object_exists(ddt, type, class)); return (ddt_ops[type]->ddt_op_remove(ddt->ddt_os, - ddt->ddt_object[type][class], dde, tx)); + ddt->ddt_object[type][class], ddk, tx)); } int @@ -227,7 +240,8 @@ ddt_object_walk(ddt_t *ddt, ddt_type_t type, ddt_class_t class, ASSERT(ddt_object_exists(ddt, type, class)); return (ddt_ops[type]->ddt_op_walk(ddt->ddt_os, - ddt->ddt_object[type][class], dde, walk)); + ddt->ddt_object[type][class], walk, &dde->dde_key, + dde->dde_phys, sizeof (dde->dde_phys))); } int @@ -523,7 +537,7 @@ void ddt_prefetch(spa_t *spa, const blkptr_t *bp) { ddt_t *ddt; - ddt_entry_t dde; + ddt_key_t ddk; if (!zfs_dedup_prefetch || bp == NULL || !BP_GET_DEDUP(bp)) return; @@ -534,11 +548,11 @@ ddt_prefetch(spa_t *spa, const blkptr_t *bp) * Thus no locking is required as the DDT can't disappear on us. */ ddt = ddt_select(spa, bp); - ddt_key_fill(&dde.dde_key, bp); + ddt_key_fill(&ddk, bp); for (ddt_type_t type = 0; type < DDT_TYPES; type++) { for (ddt_class_t class = 0; class < DDT_CLASSES; class++) { - ddt_object_prefetch(ddt, type, class, &dde); + ddt_object_prefetch(ddt, type, class, &ddk); } } } @@ -660,7 +674,7 @@ boolean_t ddt_class_contains(spa_t *spa, ddt_class_t max_class, const blkptr_t *bp) { ddt_t *ddt; - ddt_entry_t *dde; + ddt_key_t ddk; if (!BP_GET_DEDUP(bp)) return (B_FALSE); @@ -669,20 +683,16 @@ ddt_class_contains(spa_t *spa, ddt_class_t max_class, const blkptr_t *bp) return (B_TRUE); ddt = spa->spa_ddt[BP_GET_CHECKSUM(bp)]; - dde = kmem_cache_alloc(ddt_entry_cache, KM_SLEEP); - ddt_key_fill(&(dde->dde_key), bp); + ddt_key_fill(&ddk, bp); for (ddt_type_t type = 0; type < DDT_TYPES; type++) { for (ddt_class_t class = 0; class <= max_class; class++) { - if (ddt_object_lookup(ddt, type, class, dde) == 0) { - kmem_cache_free(ddt_entry_cache, dde); + if (ddt_object_contains(ddt, type, class, &ddk) == 0) return (B_TRUE); - } } } - kmem_cache_free(ddt_entry_cache, dde); return (B_FALSE); } @@ -833,9 +843,9 @@ ddt_sync_entry(ddt_t *ddt, ddt_entry_t *dde, dmu_tx_t *tx, uint64_t txg) if (otype != DDT_TYPES && (otype != ntype || oclass != nclass || total_refcnt == 0)) { - VERIFY0(ddt_object_remove(ddt, otype, oclass, dde, tx)); + VERIFY0(ddt_object_remove(ddt, otype, oclass, ddk, tx)); ASSERT3U( - ddt_object_lookup(ddt, otype, oclass, dde), ==, ENOENT); + ddt_object_contains(ddt, otype, oclass, ddk), ==, ENOENT); } if (total_refcnt != 0) { diff --git a/module/zfs/ddt_zap.c b/module/zfs/ddt_zap.c index 56312881f..741554de3 100644 --- a/module/zfs/ddt_zap.c +++ b/module/zfs/ddt_zap.c @@ -42,7 +42,7 @@ static unsigned int ddt_zap_default_ibs = 15; #define DDT_KEY_WORDS (sizeof (ddt_key_t) / sizeof (uint64_t)) static size_t -ddt_zap_compress(void *src, uchar_t *dst, size_t s_len, size_t d_len) +ddt_zap_compress(const void *src, uchar_t *dst, size_t s_len, size_t d_len) { uchar_t *version = dst++; int cpfunc = ZIO_COMPRESS_ZLE; @@ -51,7 +51,8 @@ ddt_zap_compress(void *src, uchar_t *dst, size_t s_len, size_t d_len) ASSERT3U(d_len, >=, s_len + 1); /* no compression plus version byte */ - c_len = ci->ci_compress(src, dst, s_len, d_len - 1, ci->ci_level); + c_len = ci->ci_compress((void *)src, dst, s_len, d_len - 1, + ci->ci_level); if (c_len == s_len) { cpfunc = ZIO_COMPRESS_OFF; @@ -93,8 +94,10 @@ ddt_zap_create(objset_t *os, uint64_t *objectp, dmu_tx_t *tx, boolean_t prehash) *objectp = zap_create_flags(os, 0, flags, DMU_OT_DDT_ZAP, ddt_zap_default_bs, ddt_zap_default_ibs, DMU_OT_NONE, 0, tx); + if (*objectp == 0) + return (SET_ERROR(ENOTSUP)); - return (*objectp == 0 ? SET_ERROR(ENOTSUP) : 0); + return (0); } static int @@ -104,51 +107,57 @@ ddt_zap_destroy(objset_t *os, uint64_t object, dmu_tx_t *tx) } static int -ddt_zap_lookup(objset_t *os, uint64_t object, ddt_entry_t *dde) +ddt_zap_lookup(objset_t *os, uint64_t object, + const ddt_key_t *ddk, ddt_phys_t *phys, size_t psize) { uchar_t *cbuf; uint64_t one, csize; int error; - error = zap_length_uint64(os, object, (uint64_t *)&dde->dde_key, + error = zap_length_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS, &one, &csize); if (error) return (error); ASSERT3U(one, ==, 1); - ASSERT3U(csize, <=, (sizeof (dde->dde_phys) + 1)); + ASSERT3U(csize, <=, psize + 1); cbuf = kmem_alloc(csize, KM_SLEEP); - error = zap_lookup_uint64(os, object, (uint64_t *)&dde->dde_key, + error = zap_lookup_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS, 1, csize, cbuf); if (error == 0) - ddt_zap_decompress(cbuf, dde->dde_phys, csize, - sizeof (dde->dde_phys)); + ddt_zap_decompress(cbuf, phys, csize, psize); kmem_free(cbuf, csize); return (error); } -static void -ddt_zap_prefetch(objset_t *os, uint64_t object, ddt_entry_t *dde) +static int +ddt_zap_contains(objset_t *os, uint64_t object, const ddt_key_t *ddk) { - (void) zap_prefetch_uint64(os, object, (uint64_t *)&dde->dde_key, - DDT_KEY_WORDS); + return (zap_length_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS, + NULL, NULL)); +} + +static void +ddt_zap_prefetch(objset_t *os, uint64_t object, const ddt_key_t *ddk) +{ + (void) zap_prefetch_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS); } static int -ddt_zap_update(objset_t *os, uint64_t object, ddt_entry_t *dde, dmu_tx_t *tx) +ddt_zap_update(objset_t *os, uint64_t object, const ddt_key_t *ddk, + const ddt_phys_t *phys, size_t psize, dmu_tx_t *tx) { - const size_t cbuf_size = sizeof (dde->dde_phys) + 1; + const size_t cbuf_size = psize + 1; uchar_t *cbuf = kmem_alloc(cbuf_size, KM_SLEEP); - uint64_t csize = ddt_zap_compress(dde->dde_phys, cbuf, - sizeof (dde->dde_phys), cbuf_size); + uint64_t csize = ddt_zap_compress(phys, cbuf, psize, cbuf_size); - int error = zap_update_uint64(os, object, (uint64_t *)&dde->dde_key, + int error = zap_update_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS, 1, csize, cbuf, tx); kmem_free(cbuf, cbuf_size); @@ -157,14 +166,16 @@ ddt_zap_update(objset_t *os, uint64_t object, ddt_entry_t *dde, dmu_tx_t *tx) } static int -ddt_zap_remove(objset_t *os, uint64_t object, ddt_entry_t *dde, dmu_tx_t *tx) +ddt_zap_remove(objset_t *os, uint64_t object, const ddt_key_t *ddk, + dmu_tx_t *tx) { - return (zap_remove_uint64(os, object, (uint64_t *)&dde->dde_key, + return (zap_remove_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS, tx)); } static int -ddt_zap_walk(objset_t *os, uint64_t object, ddt_entry_t *dde, uint64_t *walk) +ddt_zap_walk(objset_t *os, uint64_t object, uint64_t *walk, ddt_key_t *ddk, + ddt_phys_t *phys, size_t psize) { zap_cursor_t zc; zap_attribute_t za; @@ -186,7 +197,7 @@ ddt_zap_walk(objset_t *os, uint64_t object, ddt_entry_t *dde, uint64_t *walk) uint64_t csize = za.za_num_integers; ASSERT3U(za.za_integer_length, ==, 1); - ASSERT3U(csize, <=, sizeof (dde->dde_phys) + 1); + ASSERT3U(csize, <=, psize + 1); uchar_t *cbuf = kmem_alloc(csize, KM_SLEEP); @@ -194,9 +205,8 @@ ddt_zap_walk(objset_t *os, uint64_t object, ddt_entry_t *dde, uint64_t *walk) DDT_KEY_WORDS, 1, csize, cbuf); ASSERT0(error); if (error == 0) { - ddt_zap_decompress(cbuf, dde->dde_phys, csize, - sizeof (dde->dde_phys)); - dde->dde_key = *(ddt_key_t *)za.za_name; + ddt_zap_decompress(cbuf, phys, csize, psize); + *ddk = *(ddt_key_t *)za.za_name; } kmem_free(cbuf, csize); @@ -219,6 +229,7 @@ const ddt_ops_t ddt_zap_ops = { ddt_zap_create, ddt_zap_destroy, ddt_zap_lookup, + ddt_zap_contains, ddt_zap_prefetch, ddt_zap_update, ddt_zap_remove,