DDT: Add locking for table ZAP destruction

Similar to BRT, DDT ZAP can be destroyed by sync context when it
becomes empty.  Respectively similar to BRT introduce RW-lock to
protect open context methods from the destruction.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Alexander Motin <alexander.motin@TrueNAS.com>
Closes #18115
This commit is contained in:
Alexander Motin 2026-01-13 18:07:15 -05:00 committed by GitHub
parent 1051c3d211
commit 765929cb4e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 79 additions and 23 deletions

View File

@ -284,6 +284,9 @@ typedef struct {
avl_tree_t ddt_tree; /* "live" (changed) entries this txg */
avl_tree_t ddt_repair_tree; /* entries being repaired */
/* Protects ddt_object[] and ddt_object_dnode[]. */
krwlock_t ddt_objects_lock ____cacheline_aligned;
/*
* Log trees are stable during I/O, and only modified during sync
* with exclusive access.

View File

@ -426,7 +426,6 @@ ddt_object_destroy(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
{
spa_t *spa = ddt->ddt_spa;
objset_t *os = ddt->ddt_os;
uint64_t *objectp = &ddt->ddt_object[type][class];
uint64_t count;
char name[DDT_NAMELEN];
@ -434,20 +433,24 @@ ddt_object_destroy(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
ddt_object_name(ddt, type, class, name);
ASSERT3U(*objectp, !=, 0);
ASSERT(ddt->ddt_object[type][class] != 0);
ASSERT(ddt_histogram_empty(&ddt->ddt_histogram[type][class]));
VERIFY0(ddt_object_count(ddt, type, class, &count));
VERIFY0(count);
VERIFY0(zap_remove(os, ddt->ddt_dir_object, name, tx));
VERIFY0(zap_remove(os, spa->spa_ddt_stat_object, name, tx));
if (ddt->ddt_object_dnode[type][class] != NULL) {
dnode_rele(ddt->ddt_object_dnode[type][class], ddt);
ddt->ddt_object_dnode[type][class] = NULL;
}
VERIFY0(ddt_ops[type]->ddt_op_destroy(os, *objectp, tx));
memset(&ddt->ddt_object_stats[type][class], 0, sizeof (ddt_object_t));
*objectp = 0;
uint64_t object = ddt->ddt_object[type][class];
dnode_t *dn = ddt->ddt_object_dnode[type][class];
rw_enter(&ddt->ddt_objects_lock, RW_WRITER);
ddt->ddt_object[type][class] = 0;
ddt->ddt_object_dnode[type][class] = NULL;
rw_exit(&ddt->ddt_objects_lock);
if (dn != NULL)
dnode_rele(dn, ddt);
VERIFY0(ddt_ops[type]->ddt_op_destroy(os, object, tx));
memset(&ddt->ddt_object_stats[type][class], 0, sizeof (ddt_object_t));
}
static int
@ -553,6 +556,20 @@ ddt_object_lookup(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
dde->dde_phys, DDT_PHYS_SIZE(ddt)));
}
/*
* Like ddt_object_lookup(), but for open context where we need protection
* against concurrent object destruction by sync context.
*/
static int
ddt_object_lookup_open(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
ddt_entry_t *dde)
{
rw_enter(&ddt->ddt_objects_lock, RW_READER);
int error = ddt_object_lookup(ddt, type, class, dde);
rw_exit(&ddt->ddt_objects_lock);
return (error);
}
static int
ddt_object_contains(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
const ddt_key_t *ddk)
@ -568,21 +585,33 @@ static void
ddt_object_prefetch(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
const ddt_key_t *ddk)
{
dnode_t *dn = ddt->ddt_object_dnode[type][class];
if (dn == NULL)
return;
/*
* Called from open context, so protect against concurrent
* object destruction by sync context.
*/
rw_enter(&ddt->ddt_objects_lock, RW_READER);
ddt_ops[type]->ddt_op_prefetch(dn, ddk);
dnode_t *dn = ddt->ddt_object_dnode[type][class];
if (dn != NULL)
ddt_ops[type]->ddt_op_prefetch(dn, ddk);
rw_exit(&ddt->ddt_objects_lock);
}
static void
ddt_object_prefetch_all(ddt_t *ddt, ddt_type_t type, ddt_class_t class)
{
dnode_t *dn = ddt->ddt_object_dnode[type][class];
if (dn == NULL)
return;
/*
* Called from open context, so protect against concurrent
* object destruction by sync context.
*/
rw_enter(&ddt->ddt_objects_lock, RW_READER);
ddt_ops[type]->ddt_op_prefetch_all(dn);
dnode_t *dn = ddt->ddt_object_dnode[type][class];
if (dn != NULL)
ddt_ops[type]->ddt_op_prefetch_all(dn);
rw_exit(&ddt->ddt_objects_lock);
}
static int
@ -610,16 +639,26 @@ int
ddt_object_walk(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
uint64_t *walk, ddt_lightweight_entry_t *ddlwe)
{
/*
* Can be called from open context, so protect against concurrent
* object destruction by sync context.
*/
rw_enter(&ddt->ddt_objects_lock, RW_READER);
dnode_t *dn = ddt->ddt_object_dnode[type][class];
ASSERT(dn != NULL);
if (dn == NULL) {
rw_exit(&ddt->ddt_objects_lock);
return (SET_ERROR(ENOENT));
}
int error = ddt_ops[type]->ddt_op_walk(dn, walk, &ddlwe->ddlwe_key,
&ddlwe->ddlwe_phys, DDT_PHYS_SIZE(ddt));
if (error == 0) {
ddlwe->ddlwe_type = type;
ddlwe->ddlwe_class = class;
return (0);
}
rw_exit(&ddt->ddt_objects_lock);
return (error);
}
@ -627,10 +666,22 @@ int
ddt_object_count(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
uint64_t *count)
{
dnode_t *dn = ddt->ddt_object_dnode[type][class];
ASSERT(dn != NULL);
/*
* Can be called from open context, so protect against concurrent
* object destruction by sync context.
*/
rw_enter(&ddt->ddt_objects_lock, RW_READER);
return (ddt_ops[type]->ddt_op_count(dn, count));
dnode_t *dn = ddt->ddt_object_dnode[type][class];
if (dn == NULL) {
rw_exit(&ddt->ddt_objects_lock);
return (SET_ERROR(ENOENT));
}
int error = ddt_ops[type]->ddt_op_count(dn, count);
rw_exit(&ddt->ddt_objects_lock);
return (error);
}
int
@ -1698,6 +1749,7 @@ ddt_table_alloc(spa_t *spa, enum zio_checksum c)
sizeof (ddt_entry_t), offsetof(ddt_entry_t, dde_node));
avl_create(&ddt->ddt_repair_tree, ddt_key_compare,
sizeof (ddt_entry_t), offsetof(ddt_entry_t, dde_node));
rw_init(&ddt->ddt_objects_lock, NULL, RW_DEFAULT, NULL);
ddt->ddt_checksum = c;
ddt->ddt_spa = spa;
@ -1744,6 +1796,7 @@ ddt_table_free(ddt_t *ddt)
}
}
}
rw_destroy(&ddt->ddt_objects_lock);
ASSERT0(avl_numnodes(&ddt->ddt_tree));
ASSERT0(avl_numnodes(&ddt->ddt_repair_tree));
avl_destroy(&ddt->ddt_tree);
@ -1876,7 +1929,7 @@ ddt_repair_start(ddt_t *ddt, const blkptr_t *bp)
* there's definitely only one copy, so don't even try.
*/
if (class != DDT_CLASS_UNIQUE &&
ddt_object_lookup(ddt, type, class, dde) == 0)
ddt_object_lookup_open(ddt, type, class, dde) == 0)
return (dde);
}
}