BRT: Rework structures and locks to be per-vdev

While block cloning operation from the beginning was made per-vdev,
before this change most of its data were protected by two pool-
wide locks.  It created lots of lock contention in many workload.

This change makes most of block cloning data structures per-vdev,
which allows to lock them separately.  The only pool-wide lock now
it spa_brt_lock, protecting array of per-vdev pointers and in most
cases taken as reader.  Also this splits per-vdev locks into three
different ones: bv_pending_lock protects the AVL-tree of pending
operations in open context, bv_mos_entries_lock protects BRT ZAP
object from while being prefetched, and bv_lock protects the rest
of per-vdev context during TXG commit process.  There should be
no functional difference aside of some optimizations.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Pawel Jakub Dawidek <pjd@FreeBSD.org>
Reviewed-by: Brian Atkinson <batkinson@lanl.gov>
Signed-off-by: Alexander Motin <mav@FreeBSD.org>
Sponsored by: iXsystems, Inc.
Closes #16740
This commit is contained in:
Alexander Motin
2024-11-10 17:29:25 -05:00
committed by Brian Behlendorf
parent 1917c26944
commit 409aad3f33
6 changed files with 401 additions and 533 deletions
+13 -21
View File
@@ -2119,9 +2119,6 @@ dump_brt(spa_t *spa)
return;
}
brt_t *brt = spa->spa_brt;
VERIFY(brt);
char count[32], used[32], saved[32];
zdb_nicebytes(brt_get_used(spa), used, sizeof (used));
zdb_nicebytes(brt_get_saved(spa), saved, sizeof (saved));
@@ -2132,11 +2129,8 @@ dump_brt(spa_t *spa)
if (dump_opt['T'] < 2)
return;
for (uint64_t vdevid = 0; vdevid < brt->brt_nvdevs; vdevid++) {
brt_vdev_t *brtvd = &brt->brt_vdevs[vdevid];
if (brtvd == NULL)
continue;
for (uint64_t vdevid = 0; vdevid < spa->spa_brt_nvdevs; vdevid++) {
brt_vdev_t *brtvd = spa->spa_brt_vdevs[vdevid];
if (!brtvd->bv_initiated) {
printf("BRT: vdev %" PRIu64 ": empty\n", vdevid);
continue;
@@ -2160,20 +2154,21 @@ dump_brt(spa_t *spa)
if (!do_histo)
printf("\n%-16s %-10s\n", "DVA", "REFCNT");
for (uint64_t vdevid = 0; vdevid < brt->brt_nvdevs; vdevid++) {
brt_vdev_t *brtvd = &brt->brt_vdevs[vdevid];
if (brtvd == NULL || !brtvd->bv_initiated)
for (uint64_t vdevid = 0; vdevid < spa->spa_brt_nvdevs; vdevid++) {
brt_vdev_t *brtvd = spa->spa_brt_vdevs[vdevid];
if (!brtvd->bv_initiated)
continue;
uint64_t counts[64] = {};
zap_cursor_t zc;
zap_attribute_t *za = zap_attribute_alloc();
for (zap_cursor_init(&zc, brt->brt_mos, brtvd->bv_mos_entries);
for (zap_cursor_init(&zc, spa->spa_meta_objset,
brtvd->bv_mos_entries);
zap_cursor_retrieve(&zc, za) == 0;
zap_cursor_advance(&zc)) {
uint64_t refcnt;
VERIFY0(zap_lookup_uint64(brt->brt_mos,
VERIFY0(zap_lookup_uint64(spa->spa_meta_objset,
brtvd->bv_mos_entries,
(const uint64_t *)za->za_name, 1,
za->za_integer_length, za->za_num_integers,
@@ -8227,14 +8222,11 @@ dump_mos_leaks(spa_t *spa)
}
}
if (spa->spa_brt != NULL) {
brt_t *brt = spa->spa_brt;
for (uint64_t vdevid = 0; vdevid < brt->brt_nvdevs; vdevid++) {
brt_vdev_t *brtvd = &brt->brt_vdevs[vdevid];
if (brtvd != NULL && brtvd->bv_initiated) {
mos_obj_refd(brtvd->bv_mos_brtvdev);
mos_obj_refd(brtvd->bv_mos_entries);
}
for (uint64_t vdevid = 0; vdevid < spa->spa_brt_nvdevs; vdevid++) {
brt_vdev_t *brtvd = spa->spa_brt_vdevs[vdevid];
if (brtvd->bv_initiated) {
mos_obj_refd(brtvd->bv_mos_brtvdev);
mos_obj_refd(brtvd->bv_mos_entries);
}
}