From d06a1d9ac30d8ce6a64ac825f7f41db901a8d836 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Thu, 19 Feb 2026 13:36:35 -0500 Subject: [PATCH] Fix available space accounting for special/dedup (#18222) Currently, spa_dspace (base to calculate dataset AVAIL) only includes the normal allocation class capacity, but dd_used_bytes tracks space allocated across all classes. Since we don't want to report free space of other classes as available (we can't promise new allocations will be able to use it), report only allocated space, similar to how we report space saved by dedup and block cloning. Since we need deflated space here, make allocation classes track deflated allocated space also. While here, make mc_deferred also deflated, matching its use contexts. Also while there, use atomic_load() to read the allocation class stats. Reviewed-by: Rob Norris Reviewed-by: Brian Behlendorf Signed-off-by: Alexander Motin Closes #18190 Closes #18222 --- include/sys/metaslab.h | 4 +- include/sys/metaslab_impl.h | 8 ++-- module/zfs/metaslab.c | 72 ++++++++++++++++++++++------------- module/zfs/spa.c | 2 +- module/zfs/spa_log_spacemap.c | 5 +-- module/zfs/spa_misc.c | 5 ++- 6 files changed, 60 insertions(+), 36 deletions(-) diff --git a/include/sys/metaslab.h b/include/sys/metaslab.h index 36cbe06ba..0f711fe6f 100644 --- a/include/sys/metaslab.h +++ b/include/sys/metaslab.h @@ -117,12 +117,12 @@ boolean_t metaslab_class_throttle_unreserve(metaslab_class_t *, int, int, void metaslab_class_evict_old(metaslab_class_t *, uint64_t); const char *metaslab_class_get_name(metaslab_class_t *); uint64_t metaslab_class_get_alloc(metaslab_class_t *); +uint64_t metaslab_class_get_dalloc(metaslab_class_t *); uint64_t metaslab_class_get_space(metaslab_class_t *); uint64_t metaslab_class_get_dspace(metaslab_class_t *); uint64_t metaslab_class_get_deferred(metaslab_class_t *); -void metaslab_space_update(vdev_t *, metaslab_class_t *, - int64_t, int64_t, int64_t); +void metaslab_space_update(metaslab_group_t *, int64_t, int64_t, int64_t); metaslab_group_t *metaslab_group_create(metaslab_class_t *, vdev_t *); void metaslab_group_destroy(metaslab_group_t *); diff --git a/include/sys/metaslab_impl.h b/include/sys/metaslab_impl.h index 6ce995d0a..faeb96fe9 100644 --- a/include/sys/metaslab_impl.h +++ b/include/sys/metaslab_impl.h @@ -199,10 +199,12 @@ struct metaslab_class { uint64_t mc_alloc_groups; /* # of allocatable groups */ - uint64_t mc_alloc; /* total allocated space */ - uint64_t mc_deferred; /* total deferred frees */ + uint64_t mc_alloc; /* allocated space */ + uint64_t mc_dalloc; /* deflated allocated space */ + uint64_t mc_deferred; /* deferred frees */ + uint64_t mc_ddeferred; /* deflated deferred frees */ uint64_t mc_space; /* total space (alloc + free) */ - uint64_t mc_dspace; /* total deflated space */ + uint64_t mc_dspace; /* deflated total space */ uint64_t mc_histogram[ZFS_RANGE_TREE_HISTOGRAM_SIZE]; /* diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index 3f649ffb4..0c359928a 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -457,7 +457,9 @@ metaslab_class_destroy(metaslab_class_t *mc) spa_t *spa = mc->mc_spa; ASSERT0(mc->mc_alloc); + ASSERT0(mc->mc_dalloc); ASSERT0(mc->mc_deferred); + ASSERT0(mc->mc_ddeferred); ASSERT0(mc->mc_space); ASSERT0(mc->mc_dspace); @@ -573,8 +575,10 @@ metaslab_class_balance(metaslab_class_t *mc, boolean_t onsync) * relative to average. Bigger vdevs should get more to * fill up at the same time as smaller ones. */ - if (mc->mc_space > 0 && vs->vs_space > 0) { - ratio = vs->vs_space / (mc->mc_space / (mc->mc_groups * + uint64_t mc_space = atomic_load_64(&mc->mc_space); + uint64_t vs_space = atomic_load_64(&vs->vs_space); + if (mc_space > 0 && vs_space > 0) { + ratio = vs_space / (mc_space / (mc->mc_groups * 256) + 1); mg_aliquot = mg_aliquot * ratio / 256; } @@ -595,18 +599,20 @@ metaslab_class_balance(metaslab_class_t *mc, boolean_t onsync) * queue depth, stronger enforcing the free space balance. */ if (metaslab_bias_enabled && - mc->mc_space > 0 && vs->vs_space > 0) { - uint64_t vs_free = vs->vs_space > vs->vs_alloc ? - vs->vs_space - vs->vs_alloc : 0; - uint64_t mc_free = mc->mc_space > mc->mc_alloc ? - mc->mc_space - mc->mc_alloc : 0; + mc_space > 0 && vs_space > 0) { + uint64_t mc_alloc = atomic_load_64(&mc->mc_alloc); + uint64_t vs_alloc = atomic_load_64(&vs->vs_alloc); + uint64_t vs_free = vs_space > vs_alloc ? + vs_space - vs_alloc : 0; + uint64_t mc_free = mc_space > mc_alloc ? + mc_space - mc_alloc : 0; /* * vs_fr is 16 bit fixed-point free space fraction. * mc_fr is 8 bit fixed-point free space fraction. * ratio as their quotient is 8 bit fixed-point. */ - uint_t vs_fr = vs_free / (vs->vs_space / 65536 + 1); - uint_t mc_fr = mc_free / (mc->mc_space / 256 + 1); + uint_t vs_fr = vs_free / (vs_space / 65536 + 1); + uint_t mc_fr = mc_free / (mc_space / 256 + 1); ratio = vs_fr / (mc_fr + 1); mg->mg_aliquot = mg_aliquot * ratio / 256; /* From 2.5x at 25% full to 1x at 75%. */ @@ -693,10 +699,13 @@ rotate: static void metaslab_class_space_update(metaslab_class_t *mc, int64_t alloc_delta, - int64_t defer_delta, int64_t space_delta, int64_t dspace_delta) + int64_t dalloc_delta, int64_t deferred_delta, int64_t ddeferred_delta, + int64_t space_delta, int64_t dspace_delta) { atomic_add_64(&mc->mc_alloc, alloc_delta); - atomic_add_64(&mc->mc_deferred, defer_delta); + atomic_add_64(&mc->mc_dalloc, dalloc_delta); + atomic_add_64(&mc->mc_deferred, deferred_delta); + atomic_add_64(&mc->mc_ddeferred, ddeferred_delta); atomic_add_64(&mc->mc_space, space_delta); atomic_add_64(&mc->mc_dspace, dspace_delta); } @@ -710,25 +719,34 @@ metaslab_class_get_name(metaslab_class_t *mc) uint64_t metaslab_class_get_alloc(metaslab_class_t *mc) { - return (mc->mc_alloc); + return (atomic_load_64(&mc->mc_alloc)); +} + +uint64_t +metaslab_class_get_dalloc(metaslab_class_t *mc) +{ + return (spa_deflate(mc->mc_spa) ? atomic_load_64(&mc->mc_dalloc) : + atomic_load_64(&mc->mc_alloc)); } uint64_t metaslab_class_get_deferred(metaslab_class_t *mc) { - return (mc->mc_deferred); + return (spa_deflate(mc->mc_spa) ? atomic_load_64(&mc->mc_ddeferred) : + atomic_load_64(&mc->mc_deferred)); } uint64_t metaslab_class_get_space(metaslab_class_t *mc) { - return (mc->mc_space); + return (atomic_load_64(&mc->mc_space)); } uint64_t metaslab_class_get_dspace(metaslab_class_t *mc) { - return (spa_deflate(mc->mc_spa) ? mc->mc_dspace : mc->mc_space); + return (spa_deflate(mc->mc_spa) ? atomic_load_64(&mc->mc_dspace) : + atomic_load_64(&mc->mc_space)); } void @@ -2841,16 +2859,21 @@ metaslab_set_selected_txg(metaslab_t *msp, uint64_t txg) } void -metaslab_space_update(vdev_t *vd, metaslab_class_t *mc, int64_t alloc_delta, +metaslab_space_update(metaslab_group_t *mg, int64_t alloc_delta, int64_t defer_delta, int64_t space_delta) { + vdev_t *vd = mg->mg_vd; + int64_t dalloc_delta = vdev_deflated_space(vd, alloc_delta); + int64_t ddefer_delta = vdev_deflated_space(vd, defer_delta); + int64_t dspace_delta = vdev_deflated_space(vd, space_delta); + vdev_space_update(vd, alloc_delta, defer_delta, space_delta); ASSERT3P(vd->vdev_spa->spa_root_vdev, ==, vd->vdev_parent); ASSERT(vd->vdev_ms_count != 0); - metaslab_class_space_update(mc, alloc_delta, defer_delta, space_delta, - vdev_deflated_space(vd, space_delta)); + metaslab_class_space_update(mg->mg_class, alloc_delta, dalloc_delta, + defer_delta, ddefer_delta, space_delta, dspace_delta); } int @@ -2962,8 +2985,7 @@ metaslab_init(metaslab_group_t *mg, uint64_t id, uint64_t object, */ if (txg <= TXG_INITIAL) { metaslab_sync_done(ms, 0); - metaslab_space_update(vd, mg->mg_class, - metaslab_allocated_space(ms), 0, 0); + metaslab_space_update(mg, metaslab_allocated_space(ms), 0, 0); } if (txg != 0) { @@ -3025,9 +3047,8 @@ metaslab_fini(metaslab_t *msp) * subtracted. */ if (!msp->ms_new) { - metaslab_space_update(vd, mg->mg_class, - -metaslab_allocated_space(msp), 0, -msp->ms_size); - + metaslab_space_update(mg, -metaslab_allocated_space(msp), 0, + -msp->ms_size); } space_map_close(msp->ms_sm); msp->ms_sm = NULL; @@ -4537,7 +4558,7 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg) if (msp->ms_new) { /* this is a new metaslab, add its capacity to the vdev */ - metaslab_space_update(vd, mg->mg_class, 0, 0, msp->ms_size); + metaslab_space_update(mg, 0, 0, msp->ms_size); /* there should be no allocations nor frees at this point */ VERIFY0(msp->ms_allocated_this_txg); @@ -4566,8 +4587,7 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg) } else { defer_delta -= zfs_range_tree_space(*defer_tree); } - metaslab_space_update(vd, mg->mg_class, alloc_delta + defer_delta, - defer_delta, 0); + metaslab_space_update(mg, alloc_delta + defer_delta, defer_delta, 0); if (spa_syncing_log_sm(spa) == NULL) { /* diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 7d0846812..a90509974 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -7120,6 +7120,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, spa->spa_removing_phys.sr_removing_vdev = -1; spa->spa_removing_phys.sr_prev_indirect_vdev = -1; spa->spa_indirect_vdevs_loaded = B_TRUE; + spa->spa_deflate = (version >= SPA_VERSION_RAIDZ_DEFLATE); /* * Create "The Godfather" zio to hold all async IOs @@ -7249,7 +7250,6 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, /* Newly created pools with the right version are always deflated. */ if (version >= SPA_VERSION_RAIDZ_DEFLATE) { - spa->spa_deflate = TRUE; if (zap_add(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) { diff --git a/module/zfs/spa_log_spacemap.c b/module/zfs/spa_log_spacemap.c index 8adcba374..32ef0aaeb 100644 --- a/module/zfs/spa_log_spacemap.c +++ b/module/zfs/spa_log_spacemap.c @@ -1255,10 +1255,9 @@ out: zfs_range_tree_space(m->ms_unflushed_allocs) - zfs_range_tree_space(m->ms_unflushed_frees); - vdev_t *vd = m->ms_group->mg_vd; - metaslab_space_update(vd, m->ms_group->mg_class, + metaslab_space_update(m->ms_group, zfs_range_tree_space(m->ms_unflushed_allocs), 0, 0); - metaslab_space_update(vd, m->ms_group->mg_class, + metaslab_space_update(m->ms_group, -zfs_range_tree_space(m->ms_unflushed_frees), 0, 0); ASSERT0(m->ms_weight & METASLAB_ACTIVE_MASK); diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 2ad149212..eaaa429ed 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -2031,7 +2031,10 @@ spa_update_dspace(spa_t *spa) ASSERT3U(spa->spa_rdspace, >=, spa->spa_nonallocating_dspace); spa->spa_rdspace -= spa->spa_nonallocating_dspace; } - spa->spa_dspace = spa->spa_rdspace + ddt_get_dedup_dspace(spa) + + spa->spa_dspace = spa->spa_rdspace + + metaslab_class_get_dalloc(spa_special_class(spa)) + + metaslab_class_get_dalloc(spa_dedup_class(spa)) + + ddt_get_dedup_dspace(spa) + brt_get_dspace(spa); }