DDT: Switch to using ZAP _by_dnode() interfaces

As was previously done for BRT, avoid holding/releasing DDT ZAP
dnodes for every access.  Instead hold the dnodes during all their
life time, never releasing.

While at this, add _by_dnode() interfaces for zap_length_uint64()
and zap_count(), actively used by DDT code.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Alexander Motin <alexander.motin@TrueNAS.com>
Closes #18047
This commit is contained in:
Alexander Motin 2025-12-15 12:49:14 -05:00 committed by GitHub
parent 46d6f1fe56
commit ff5414406f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 132 additions and 63 deletions

View File

@ -326,6 +326,7 @@ typedef struct {
/* per-type/per-class entry store objects */
uint64_t ddt_object[DDT_TYPES][DDT_CLASSES];
dnode_t *ddt_object_dnode[DDT_TYPES][DDT_CLASSES];
/* object ids for stored, logged and per-type/per-class stats */
uint64_t ddt_stat_object;

View File

@ -163,21 +163,18 @@ typedef struct {
int (*ddt_op_create)(objset_t *os, uint64_t *object, dmu_tx_t *tx,
boolean_t prehash);
int (*ddt_op_destroy)(objset_t *os, uint64_t object, dmu_tx_t *tx);
int (*ddt_op_lookup)(objset_t *os, uint64_t object,
const ddt_key_t *ddk, void *phys, size_t psize);
int (*ddt_op_contains)(objset_t *os, uint64_t object,
const ddt_key_t *ddk);
void (*ddt_op_prefetch)(objset_t *os, uint64_t object,
const ddt_key_t *ddk);
void (*ddt_op_prefetch_all)(objset_t *os, uint64_t object);
int (*ddt_op_update)(objset_t *os, uint64_t object,
const ddt_key_t *ddk, const void *phys, size_t psize,
int (*ddt_op_lookup)(dnode_t *dn, const ddt_key_t *ddk,
void *phys, size_t psize);
int (*ddt_op_contains)(dnode_t *dn, const ddt_key_t *ddk);
void (*ddt_op_prefetch)(dnode_t *dn, const ddt_key_t *ddk);
void (*ddt_op_prefetch_all)(dnode_t *dn);
int (*ddt_op_update)(dnode_t *dn, const ddt_key_t *ddk,
const void *phys, size_t psize, dmu_tx_t *tx);
int (*ddt_op_remove)(dnode_t *dn, const ddt_key_t *ddk,
dmu_tx_t *tx);
int (*ddt_op_remove)(objset_t *os, uint64_t object,
const ddt_key_t *ddk, dmu_tx_t *tx);
int (*ddt_op_walk)(objset_t *os, uint64_t object, uint64_t *walk,
ddt_key_t *ddk, void *phys, size_t psize);
int (*ddt_op_count)(objset_t *os, uint64_t object, uint64_t *count);
int (*ddt_op_walk)(dnode_t *dn, uint64_t *walk, ddt_key_t *ddk,
void *phys, size_t psize);
int (*ddt_op_count)(dnode_t *dn, uint64_t *count);
} ddt_ops_t;
extern const ddt_ops_t ddt_zap_ops;

View File

@ -288,6 +288,8 @@ int zap_length(objset_t *ds, uint64_t zapobj, const char *name,
uint64_t *integer_size, uint64_t *num_integers);
int zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
int key_numints, uint64_t *integer_size, uint64_t *num_integers);
int zap_length_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
int key_numints, uint64_t *integer_size, uint64_t *num_integers);
/*
* Remove the specified attribute.
@ -309,6 +311,7 @@ int zap_remove_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
* object.
*/
int zap_count(objset_t *ds, uint64_t zapobj, uint64_t *count);
int zap_count_by_dnode(dnode_t *dn, uint64_t *count);
/*
* Returns (in name) the name of the entry whose (value & mask)

View File

@ -407,6 +407,9 @@ ddt_object_create(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
VERIFY0(ddt_ops[type]->ddt_op_create(os, objectp, tx, prehash));
ASSERT3U(*objectp, !=, 0);
VERIFY0(dnode_hold(os, *objectp, ddt,
&ddt->ddt_object_dnode[type][class]));
ASSERT3U(ddt->ddt_version, !=, DDT_VERSION_UNCONFIGURED);
VERIFY0(zap_add(os, ddt->ddt_dir_object, name, sizeof (uint64_t), 1,
@ -437,6 +440,10 @@ ddt_object_destroy(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
VERIFY0(count);
VERIFY0(zap_remove(os, ddt->ddt_dir_object, name, tx));
VERIFY0(zap_remove(os, spa->spa_ddt_stat_object, name, tx));
if (ddt->ddt_object_dnode[type][class] != NULL) {
dnode_rele(ddt->ddt_object_dnode[type][class], ddt);
ddt->ddt_object_dnode[type][class] = NULL;
}
VERIFY0(ddt_ops[type]->ddt_op_destroy(os, *objectp, tx));
memset(&ddt->ddt_object_stats[type][class], 0, sizeof (ddt_object_t));
@ -468,28 +475,38 @@ ddt_object_load(ddt_t *ddt, ddt_type_t type, ddt_class_t class)
if (error != 0)
return (error);
error = dnode_hold(ddt->ddt_os, ddt->ddt_object[type][class], ddt,
&ddt->ddt_object_dnode[type][class]);
if (error != 0)
return (error);
error = zap_lookup(ddt->ddt_os, ddt->ddt_spa->spa_ddt_stat_object, name,
sizeof (uint64_t), sizeof (ddt_histogram_t) / sizeof (uint64_t),
&ddt->ddt_histogram[type][class]);
if (error != 0)
return (error);
goto error;
/*
* Seed the cached statistics.
*/
error = ddt_object_info(ddt, type, class, &doi);
if (error)
return (error);
goto error;
error = ddt_object_count(ddt, type, class, &count);
if (error)
return (error);
goto error;
ddo->ddo_count = count;
ddo->ddo_dspace = doi.doi_physical_blocks_512 << 9;
ddo->ddo_mspace = doi.doi_fill_count * doi.doi_data_block_size;
return (0);
error:
dnode_rele(ddt->ddt_object_dnode[type][class], ddt);
ddt->ddt_object_dnode[type][class] = NULL;
return (error);
}
static void
@ -528,11 +545,11 @@ static int
ddt_object_lookup(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
ddt_entry_t *dde)
{
if (!ddt_object_exists(ddt, type, class))
dnode_t *dn = ddt->ddt_object_dnode[type][class];
if (dn == NULL)
return (SET_ERROR(ENOENT));
return (ddt_ops[type]->ddt_op_lookup(ddt->ddt_os,
ddt->ddt_object[type][class], &dde->dde_key,
return (ddt_ops[type]->ddt_op_lookup(dn, &dde->dde_key,
dde->dde_phys, DDT_PHYS_SIZE(ddt)));
}
@ -540,42 +557,42 @@ static int
ddt_object_contains(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
const ddt_key_t *ddk)
{
if (!ddt_object_exists(ddt, type, class))
dnode_t *dn = ddt->ddt_object_dnode[type][class];
if (dn == NULL)
return (SET_ERROR(ENOENT));
return (ddt_ops[type]->ddt_op_contains(ddt->ddt_os,
ddt->ddt_object[type][class], ddk));
return (ddt_ops[type]->ddt_op_contains(dn, ddk));
}
static void
ddt_object_prefetch(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
const ddt_key_t *ddk)
{
if (!ddt_object_exists(ddt, type, class))
dnode_t *dn = ddt->ddt_object_dnode[type][class];
if (dn == NULL)
return;
ddt_ops[type]->ddt_op_prefetch(ddt->ddt_os,
ddt->ddt_object[type][class], ddk);
ddt_ops[type]->ddt_op_prefetch(dn, ddk);
}
static void
ddt_object_prefetch_all(ddt_t *ddt, ddt_type_t type, ddt_class_t class)
{
if (!ddt_object_exists(ddt, type, class))
dnode_t *dn = ddt->ddt_object_dnode[type][class];
if (dn == NULL)
return;
ddt_ops[type]->ddt_op_prefetch_all(ddt->ddt_os,
ddt->ddt_object[type][class]);
ddt_ops[type]->ddt_op_prefetch_all(dn);
}
static int
ddt_object_update(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
const ddt_lightweight_entry_t *ddlwe, dmu_tx_t *tx)
{
ASSERT(ddt_object_exists(ddt, type, class));
dnode_t *dn = ddt->ddt_object_dnode[type][class];
ASSERT(dn != NULL);
return (ddt_ops[type]->ddt_op_update(ddt->ddt_os,
ddt->ddt_object[type][class], &ddlwe->ddlwe_key,
return (ddt_ops[type]->ddt_op_update(dn, &ddlwe->ddlwe_key,
&ddlwe->ddlwe_phys, DDT_PHYS_SIZE(ddt), tx));
}
@ -583,20 +600,20 @@ static int
ddt_object_remove(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
const ddt_key_t *ddk, dmu_tx_t *tx)
{
ASSERT(ddt_object_exists(ddt, type, class));
dnode_t *dn = ddt->ddt_object_dnode[type][class];
ASSERT(dn != NULL);
return (ddt_ops[type]->ddt_op_remove(ddt->ddt_os,
ddt->ddt_object[type][class], ddk, tx));
return (ddt_ops[type]->ddt_op_remove(dn, ddk, tx));
}
int
ddt_object_walk(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
uint64_t *walk, ddt_lightweight_entry_t *ddlwe)
{
ASSERT(ddt_object_exists(ddt, type, class));
dnode_t *dn = ddt->ddt_object_dnode[type][class];
ASSERT(dn != NULL);
int error = ddt_ops[type]->ddt_op_walk(ddt->ddt_os,
ddt->ddt_object[type][class], walk, &ddlwe->ddlwe_key,
int error = ddt_ops[type]->ddt_op_walk(dn, walk, &ddlwe->ddlwe_key,
&ddlwe->ddlwe_phys, DDT_PHYS_SIZE(ddt));
if (error == 0) {
ddlwe->ddlwe_type = type;
@ -610,10 +627,10 @@ int
ddt_object_count(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
uint64_t *count)
{
ASSERT(ddt_object_exists(ddt, type, class));
dnode_t *dn = ddt->ddt_object_dnode[type][class];
ASSERT(dn != NULL);
return (ddt_ops[type]->ddt_op_count(ddt->ddt_os,
ddt->ddt_object[type][class], count));
return (ddt_ops[type]->ddt_op_count(dn, count));
}
int
@ -1718,6 +1735,15 @@ ddt_table_free(ddt_t *ddt)
wmsum_fini(&ddt->ddt_kstat_dds_lookup_stored_miss);
ddt_log_free(ddt);
for (ddt_type_t type = 0; type < DDT_TYPES; type++) {
for (ddt_class_t class = 0; class < DDT_CLASSES; class++) {
if (ddt->ddt_object_dnode[type][class] != NULL) {
dnode_rele(ddt->ddt_object_dnode[type][class],
ddt);
ddt->ddt_object_dnode[type][class] = NULL;
}
}
}
ASSERT0(avl_numnodes(&ddt->ddt_tree));
ASSERT0(avl_numnodes(&ddt->ddt_repair_tree));
avl_destroy(&ddt->ddt_tree);

View File

@ -33,6 +33,7 @@
#include <sys/ddt_impl.h>
#include <sys/zap.h>
#include <sys/dmu_tx.h>
#include <sys/dnode.h>
#include <sys/zio_compress.h>
static unsigned int ddt_zap_default_bs = 15;
@ -120,14 +121,13 @@ ddt_zap_destroy(objset_t *os, uint64_t object, dmu_tx_t *tx)
}
static int
ddt_zap_lookup(objset_t *os, uint64_t object,
const ddt_key_t *ddk, void *phys, size_t psize)
ddt_zap_lookup(dnode_t *dn, const ddt_key_t *ddk, void *phys, size_t psize)
{
uchar_t *cbuf;
uint64_t one, csize;
int error;
error = zap_length_uint64(os, object, (uint64_t *)ddk,
error = zap_length_uint64_by_dnode(dn, (uint64_t *)ddk,
DDT_KEY_WORDS, &one, &csize);
if (error)
return (error);
@ -137,7 +137,7 @@ ddt_zap_lookup(objset_t *os, uint64_t object,
cbuf = kmem_alloc(csize, KM_SLEEP);
error = zap_lookup_uint64(os, object, (uint64_t *)ddk,
error = zap_lookup_uint64_by_dnode(dn, (uint64_t *)ddk,
DDT_KEY_WORDS, 1, csize, cbuf);
if (error == 0)
ddt_zap_decompress(cbuf, phys, csize, psize);
@ -148,26 +148,27 @@ ddt_zap_lookup(objset_t *os, uint64_t object,
}
static int
ddt_zap_contains(objset_t *os, uint64_t object, const ddt_key_t *ddk)
ddt_zap_contains(dnode_t *dn, const ddt_key_t *ddk)
{
return (zap_length_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS,
NULL, NULL));
return (zap_length_uint64_by_dnode(dn, (uint64_t *)ddk,
DDT_KEY_WORDS, NULL, NULL));
}
static void
ddt_zap_prefetch(objset_t *os, uint64_t object, const ddt_key_t *ddk)
ddt_zap_prefetch(dnode_t *dn, const ddt_key_t *ddk)
{
(void) zap_prefetch_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS);
(void) zap_prefetch_uint64_by_dnode(dn, (uint64_t *)ddk,
DDT_KEY_WORDS);
}
static void
ddt_zap_prefetch_all(objset_t *os, uint64_t object)
ddt_zap_prefetch_all(dnode_t *dn)
{
(void) zap_prefetch_object(os, object);
(void) zap_prefetch_object(dn->dn_objset, dn->dn_object);
}
static int
ddt_zap_update(objset_t *os, uint64_t object, const ddt_key_t *ddk,
ddt_zap_update(dnode_t *dn, const ddt_key_t *ddk,
const void *phys, size_t psize, dmu_tx_t *tx)
{
const size_t cbuf_size = psize + 1;
@ -176,7 +177,7 @@ ddt_zap_update(objset_t *os, uint64_t object, const ddt_key_t *ddk,
uint64_t csize = ddt_zap_compress(phys, cbuf, psize, cbuf_size);
int error = zap_update_uint64(os, object, (uint64_t *)ddk,
int error = zap_update_uint64_by_dnode(dn, (uint64_t *)ddk,
DDT_KEY_WORDS, 1, csize, cbuf, tx);
kmem_free(cbuf, cbuf_size);
@ -185,15 +186,14 @@ ddt_zap_update(objset_t *os, uint64_t object, const ddt_key_t *ddk,
}
static int
ddt_zap_remove(objset_t *os, uint64_t object, const ddt_key_t *ddk,
dmu_tx_t *tx)
ddt_zap_remove(dnode_t *dn, const ddt_key_t *ddk, dmu_tx_t *tx)
{
return (zap_remove_uint64(os, object, (uint64_t *)ddk,
return (zap_remove_uint64_by_dnode(dn, (uint64_t *)ddk,
DDT_KEY_WORDS, tx));
}
static int
ddt_zap_walk(objset_t *os, uint64_t object, uint64_t *walk, ddt_key_t *ddk,
ddt_zap_walk(dnode_t *dn, uint64_t *walk, ddt_key_t *ddk,
void *phys, size_t psize)
{
zap_cursor_t zc;
@ -209,9 +209,10 @@ ddt_zap_walk(objset_t *os, uint64_t object, uint64_t *walk, ddt_key_t *ddk,
* scrub I/Os for each ZAP block that we read in, so
* reading the ZAP is unlikely to be the bottleneck.
*/
zap_cursor_init_noprefetch(&zc, os, object);
zap_cursor_init_noprefetch(&zc, dn->dn_objset, dn->dn_object);
} else {
zap_cursor_init_serialized(&zc, os, object, *walk);
zap_cursor_init_serialized(&zc, dn->dn_objset, dn->dn_object,
*walk);
}
if ((error = zap_cursor_retrieve(&zc, za)) == 0) {
uint64_t csize = za->za_num_integers;
@ -221,7 +222,7 @@ ddt_zap_walk(objset_t *os, uint64_t object, uint64_t *walk, ddt_key_t *ddk,
uchar_t *cbuf = kmem_alloc(csize, KM_SLEEP);
error = zap_lookup_uint64(os, object, (uint64_t *)za->za_name,
error = zap_lookup_uint64_by_dnode(dn, (uint64_t *)za->za_name,
DDT_KEY_WORDS, 1, csize, cbuf);
ASSERT0(error);
if (error == 0) {
@ -240,9 +241,9 @@ ddt_zap_walk(objset_t *os, uint64_t object, uint64_t *walk, ddt_key_t *ddk,
}
static int
ddt_zap_count(objset_t *os, uint64_t object, uint64_t *count)
ddt_zap_count(dnode_t *dn, uint64_t *count)
{
return (zap_count(os, object, count));
return (zap_count_by_dnode(dn, count));
}
const ddt_ops_t ddt_zap_ops = {

View File

@ -1049,6 +1049,24 @@ zap_count(objset_t *os, uint64_t zapobj, uint64_t *count)
return (err);
}
int
zap_count_by_dnode(dnode_t *dn, uint64_t *count)
{
zap_t *zap;
int err = zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE,
FTAG, &zap);
if (err != 0)
return (err);
if (!zap->zap_ismicro) {
err = fzap_count(zap, count);
} else {
*count = zap->zap_m.zap_num_entries;
}
zap_unlockdir(zap, FTAG);
return (err);
}
/*
* zn may be NULL; if not specified, it will be computed if needed.
* See also the comment above zap_entry_normalization_conflict().
@ -1395,6 +1413,27 @@ zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
return (err);
}
int
zap_length_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
int key_numints, uint64_t *integer_size, uint64_t *num_integers)
{
zap_t *zap;
int err = zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE,
FTAG, &zap);
if (err != 0)
return (err);
zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
if (zn == NULL) {
zap_unlockdir(zap, FTAG);
return (SET_ERROR(ENOTSUP));
}
err = fzap_length(zn, integer_size, num_integers);
zap_name_free(zn);
zap_unlockdir(zap, FTAG);
return (err);
}
static void
mzap_addent(zap_name_t *zn, uint64_t value)
{
@ -2016,12 +2055,14 @@ EXPORT_SYMBOL(zap_update_uint64);
EXPORT_SYMBOL(zap_update_uint64_by_dnode);
EXPORT_SYMBOL(zap_length);
EXPORT_SYMBOL(zap_length_uint64);
EXPORT_SYMBOL(zap_length_uint64_by_dnode);
EXPORT_SYMBOL(zap_remove);
EXPORT_SYMBOL(zap_remove_by_dnode);
EXPORT_SYMBOL(zap_remove_norm);
EXPORT_SYMBOL(zap_remove_uint64);
EXPORT_SYMBOL(zap_remove_uint64_by_dnode);
EXPORT_SYMBOL(zap_count);
EXPORT_SYMBOL(zap_count_by_dnode);
EXPORT_SYMBOL(zap_value_search);
EXPORT_SYMBOL(zap_join);
EXPORT_SYMBOL(zap_join_increment);