metaslab: don't pass whole zio to throttle reserve APIs

They only need a couple of fields, and passing the whole thing just
invites fiddling around inside it, like modifying flags, which then
makes it much harder to understand the zio state from inside zio.c.

We move the flag update to just after a successful throttle in zio.c.

Rename ZIO_FLAG_IO_ALLOCATING to ZIO_FLAG_ALLOC_THROTTLED
Better describes what it means, and makes it look less like
IO_IS_ALLOCATING, which means something different.

Sponsored-by: Klara, Inc.
Sponsored-by: Wasabi Technology, Inc.
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
Closes #17508
This commit is contained in:
Rob Norris 2025-07-05 13:22:22 +10:00 committed by GitHub
parent 92d3b4ee2c
commit 6af8db61b1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 44 additions and 39 deletions

View File

@ -110,9 +110,10 @@ void metaslab_class_balance(metaslab_class_t *mc, boolean_t onsync);
void metaslab_class_histogram_verify(metaslab_class_t *); void metaslab_class_histogram_verify(metaslab_class_t *);
uint64_t metaslab_class_fragmentation(metaslab_class_t *); uint64_t metaslab_class_fragmentation(metaslab_class_t *);
uint64_t metaslab_class_expandable_space(metaslab_class_t *); uint64_t metaslab_class_expandable_space(metaslab_class_t *);
boolean_t metaslab_class_throttle_reserve(metaslab_class_t *, int, zio_t *, boolean_t metaslab_class_throttle_reserve(metaslab_class_t *, int, int,
boolean_t, boolean_t *); uint64_t, boolean_t, boolean_t *);
boolean_t metaslab_class_throttle_unreserve(metaslab_class_t *, int, zio_t *); boolean_t metaslab_class_throttle_unreserve(metaslab_class_t *, int, int,
uint64_t);
void metaslab_class_evict_old(metaslab_class_t *, uint64_t); void metaslab_class_evict_old(metaslab_class_t *, uint64_t);
const char *metaslab_class_get_name(metaslab_class_t *); const char *metaslab_class_get_name(metaslab_class_t *);
uint64_t metaslab_class_get_alloc(metaslab_class_t *); uint64_t metaslab_class_get_alloc(metaslab_class_t *);

View File

@ -196,7 +196,7 @@ typedef uint64_t zio_flag_t;
#define ZIO_FLAG_DONT_RETRY (1ULL << 10) #define ZIO_FLAG_DONT_RETRY (1ULL << 10)
#define ZIO_FLAG_NODATA (1ULL << 12) #define ZIO_FLAG_NODATA (1ULL << 12)
#define ZIO_FLAG_INDUCE_DAMAGE (1ULL << 13) #define ZIO_FLAG_INDUCE_DAMAGE (1ULL << 13)
#define ZIO_FLAG_IO_ALLOCATING (1ULL << 14) #define ZIO_FLAG_ALLOC_THROTTLED (1ULL << 14)
#define ZIO_FLAG_DDT_INHERIT (ZIO_FLAG_IO_RETRY - 1) #define ZIO_FLAG_DDT_INHERIT (ZIO_FLAG_IO_RETRY - 1)
#define ZIO_FLAG_GANG_INHERIT (ZIO_FLAG_IO_RETRY - 1) #define ZIO_FLAG_GANG_INHERIT (ZIO_FLAG_IO_RETRY - 1)

View File

@ -28,7 +28,7 @@
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\" Copyright (c) 2024, 2025, Klara, Inc. .\" Copyright (c) 2024, 2025, Klara, Inc.
.\" .\"
.Dd May 27, 2025 .Dd July 3, 2025
.Dt ZPOOL-EVENTS 8 .Dt ZPOOL-EVENTS 8
.Os .Os
. .
@ -465,7 +465,7 @@ ZIO_FLAG_DONT_RETRY:0x00000400
ZIO_FLAG_NODATA:0x00001000 ZIO_FLAG_NODATA:0x00001000
ZIO_FLAG_INDUCE_DAMAGE:0x00002000 ZIO_FLAG_INDUCE_DAMAGE:0x00002000
ZIO_FLAG_IO_ALLOCATING:0x00004000 ZIO_FLAG_ALLOC_THROTTLED:0x00004000
ZIO_FLAG_IO_RETRY:0x00008000 ZIO_FLAG_IO_RETRY:0x00008000
ZIO_FLAG_PROBE:0x00010000 ZIO_FLAG_PROBE:0x00010000
ZIO_FLAG_TRYHARD:0x00020000 ZIO_FLAG_TRYHARD:0x00020000

View File

@ -203,7 +203,7 @@ _VALSTR_BITFIELD_IMPL(zio_flag,
{ '?', "??", "[UNUSED 11]" }, { '?', "??", "[UNUSED 11]" },
{ '.', "ND", "NODATA" }, { '.', "ND", "NODATA" },
{ '.', "ID", "INDUCE_DAMAGE" }, { '.', "ID", "INDUCE_DAMAGE" },
{ '.', "AL", "IO_ALLOCATING" }, { '.', "AT", "ALLOC_THROTTLED" },
{ '.', "RE", "IO_RETRY" }, { '.', "RE", "IO_RETRY" },
{ '.', "PR", "PROBE" }, { '.', "PR", "PROBE" },
{ '.', "TH", "TRYHARD" }, { '.', "TH", "TRYHARD" },

View File

@ -5757,21 +5757,21 @@ metaslab_free_dva(spa_t *spa, const dva_t *dva, boolean_t checkpoint)
} }
/* /*
* Reserve some allocation slots. The reservation system must be called * Reserve some space for a future allocation. The reservation system must be
* before we call into the allocator. If there aren't any available slots * called before we call into the allocator. If there aren't enough space
* then the I/O will be throttled until an I/O completes and its slots are * available, the calling I/O will be throttled until another I/O completes and
* freed up. The function returns true if it was successful in placing * its reservation is released. The function returns true if it was successful
* the reservation. * in placing the reservation.
*/ */
boolean_t boolean_t
metaslab_class_throttle_reserve(metaslab_class_t *mc, int slots, zio_t *zio, metaslab_class_throttle_reserve(metaslab_class_t *mc, int allocator,
boolean_t must, boolean_t *more) int copies, uint64_t io_size, boolean_t must, boolean_t *more)
{ {
metaslab_class_allocator_t *mca = &mc->mc_allocator[zio->io_allocator]; metaslab_class_allocator_t *mca = &mc->mc_allocator[allocator];
ASSERT(mc->mc_alloc_throttle_enabled); ASSERT(mc->mc_alloc_throttle_enabled);
if (mc->mc_alloc_io_size < zio->io_size) { if (mc->mc_alloc_io_size < io_size) {
mc->mc_alloc_io_size = zio->io_size; mc->mc_alloc_io_size = io_size;
metaslab_class_balance(mc, B_FALSE); metaslab_class_balance(mc, B_FALSE);
} }
if (must || mca->mca_reserved <= mc->mc_alloc_max) { if (must || mca->mca_reserved <= mc->mc_alloc_max) {
@ -5782,10 +5782,9 @@ metaslab_class_throttle_reserve(metaslab_class_t *mc, int slots, zio_t *zio,
* worst that can happen is few more I/Os get to allocation * worst that can happen is few more I/Os get to allocation
* earlier, that is not a problem. * earlier, that is not a problem.
*/ */
int64_t delta = slots * zio->io_size; int64_t delta = copies * io_size;
*more = (atomic_add_64_nv(&mca->mca_reserved, delta) <= *more = (atomic_add_64_nv(&mca->mca_reserved, delta) <=
mc->mc_alloc_max); mc->mc_alloc_max);
zio->io_flags |= ZIO_FLAG_IO_ALLOCATING;
return (B_TRUE); return (B_TRUE);
} }
*more = B_FALSE; *more = B_FALSE;
@ -5793,13 +5792,13 @@ metaslab_class_throttle_reserve(metaslab_class_t *mc, int slots, zio_t *zio,
} }
boolean_t boolean_t
metaslab_class_throttle_unreserve(metaslab_class_t *mc, int slots, metaslab_class_throttle_unreserve(metaslab_class_t *mc, int allocator,
zio_t *zio) int copies, uint64_t io_size)
{ {
metaslab_class_allocator_t *mca = &mc->mc_allocator[zio->io_allocator]; metaslab_class_allocator_t *mca = &mc->mc_allocator[allocator];
ASSERT(mc->mc_alloc_throttle_enabled); ASSERT(mc->mc_alloc_throttle_enabled);
int64_t delta = slots * zio->io_size; int64_t delta = copies * io_size;
return (atomic_add_64_nv(&mca->mca_reserved, -delta) <= return (atomic_add_64_nv(&mca->mca_reserved, -delta) <=
mc->mc_alloc_max); mc->mc_alloc_max);
} }

View File

@ -1679,7 +1679,7 @@ zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset,
* If this is a retried I/O then we ignore it since we will * If this is a retried I/O then we ignore it since we will
* have already processed the original allocating I/O. * have already processed the original allocating I/O.
*/ */
if (flags & ZIO_FLAG_IO_ALLOCATING && if (flags & ZIO_FLAG_ALLOC_THROTTLED &&
(vd != vd->vdev_top || (flags & ZIO_FLAG_IO_RETRY))) { (vd != vd->vdev_top || (flags & ZIO_FLAG_IO_RETRY))) {
ASSERT(pio->io_metaslab_class != NULL); ASSERT(pio->io_metaslab_class != NULL);
ASSERT(pio->io_metaslab_class->mc_alloc_throttle_enabled); ASSERT(pio->io_metaslab_class->mc_alloc_throttle_enabled);
@ -1689,7 +1689,7 @@ zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset,
ASSERT(!(pio->io_flags & ZIO_FLAG_IO_REWRITE) || ASSERT(!(pio->io_flags & ZIO_FLAG_IO_REWRITE) ||
pio->io_child_type == ZIO_CHILD_GANG); pio->io_child_type == ZIO_CHILD_GANG);
flags &= ~ZIO_FLAG_IO_ALLOCATING; flags &= ~ZIO_FLAG_ALLOC_THROTTLED;
} }
zio = zio_create(pio, pio->io_spa, pio->io_txg, bp, data, size, size, zio = zio_create(pio, pio->io_spa, pio->io_txg, bp, data, size, size,
@ -3151,7 +3151,7 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc)
ASSERT(ZIO_HAS_ALLOCATOR(pio)); ASSERT(ZIO_HAS_ALLOCATOR(pio));
int flags = METASLAB_GANG_HEADER; int flags = METASLAB_GANG_HEADER;
if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) { if (pio->io_flags & ZIO_FLAG_ALLOC_THROTTLED) {
ASSERT(pio->io_priority == ZIO_PRIORITY_ASYNC_WRITE); ASSERT(pio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
ASSERT(has_data); ASSERT(has_data);
@ -3186,10 +3186,11 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc)
ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark); ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark);
zio_gang_inherit_allocator(pio, zio); zio_gang_inherit_allocator(pio, zio);
if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) { if (pio->io_flags & ZIO_FLAG_ALLOC_THROTTLED) {
boolean_t more; boolean_t more;
VERIFY(metaslab_class_throttle_reserve(mc, gbh_copies, VERIFY(metaslab_class_throttle_reserve(mc, zio->io_allocator,
zio, B_TRUE, &more)); gbh_copies, zio->io_size, B_TRUE, &more));
zio->io_flags |= ZIO_FLAG_ALLOC_THROTTLED;
} }
/* /*
@ -4072,9 +4073,11 @@ zio_io_to_allocate(metaslab_class_allocator_t *mca, boolean_t *more)
* reserve then we throttle. * reserve then we throttle.
*/ */
if (!metaslab_class_throttle_reserve(zio->io_metaslab_class, if (!metaslab_class_throttle_reserve(zio->io_metaslab_class,
zio->io_prop.zp_copies, zio, B_FALSE, more)) { zio->io_allocator, zio->io_prop.zp_copies, zio->io_size,
B_FALSE, more)) {
return (NULL); return (NULL);
} }
zio->io_flags |= ZIO_FLAG_ALLOC_THROTTLED;
avl_remove(&mca->mca_tree, zio); avl_remove(&mca->mca_tree, zio);
ASSERT3U(zio->io_stage, <, ZIO_STAGE_DVA_ALLOCATE); ASSERT3U(zio->io_stage, <, ZIO_STAGE_DVA_ALLOCATE);
@ -4230,13 +4233,14 @@ again:
* If we are holding old class reservation, drop it. * If we are holding old class reservation, drop it.
* Dispatch the next ZIO(s) there if some are waiting. * Dispatch the next ZIO(s) there if some are waiting.
*/ */
if (zio->io_flags & ZIO_FLAG_IO_ALLOCATING) { if (zio->io_flags & ZIO_FLAG_ALLOC_THROTTLED) {
if (metaslab_class_throttle_unreserve(mc, if (metaslab_class_throttle_unreserve(mc,
zio->io_prop.zp_copies, zio)) { zio->io_allocator, zio->io_prop.zp_copies,
zio->io_size)) {
zio_allocate_dispatch(zio->io_metaslab_class, zio_allocate_dispatch(zio->io_metaslab_class,
zio->io_allocator); zio->io_allocator);
} }
zio->io_flags &= ~ZIO_FLAG_IO_ALLOCATING; zio->io_flags &= ~ZIO_FLAG_ALLOC_THROTTLED;
} }
if (zfs_flags & ZFS_DEBUG_METASLAB_ALLOC) { if (zfs_flags & ZFS_DEBUG_METASLAB_ALLOC) {
@ -5196,7 +5200,7 @@ zio_ready(zio_t *zio)
if (zio->io_error != 0) { if (zio->io_error != 0) {
zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
if (zio->io_flags & ZIO_FLAG_IO_ALLOCATING) { if (zio->io_flags & ZIO_FLAG_ALLOC_THROTTLED) {
ASSERT(IO_IS_ALLOCATING(zio)); ASSERT(IO_IS_ALLOCATING(zio));
ASSERT(zio->io_priority == ZIO_PRIORITY_ASYNC_WRITE); ASSERT(zio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
ASSERT(zio->io_metaslab_class != NULL); ASSERT(zio->io_metaslab_class != NULL);
@ -5207,8 +5211,8 @@ zio_ready(zio_t *zio)
* issue the next I/O to allocate. * issue the next I/O to allocate.
*/ */
if (metaslab_class_throttle_unreserve( if (metaslab_class_throttle_unreserve(
zio->io_metaslab_class, zio->io_prop.zp_copies, zio->io_metaslab_class, zio->io_allocator,
zio)) { zio->io_prop.zp_copies, zio->io_size)) {
zio_allocate_dispatch(zio->io_metaslab_class, zio_allocate_dispatch(zio->io_metaslab_class,
zio->io_allocator); zio->io_allocator);
} }
@ -5267,7 +5271,7 @@ zio_dva_throttle_done(zio_t *zio)
ASSERT3P(vd, ==, vd->vdev_top); ASSERT3P(vd, ==, vd->vdev_top);
ASSERT(zio_injection_enabled || !(zio->io_flags & ZIO_FLAG_IO_RETRY)); ASSERT(zio_injection_enabled || !(zio->io_flags & ZIO_FLAG_IO_RETRY));
ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REPAIR)); ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REPAIR));
ASSERT(zio->io_flags & ZIO_FLAG_IO_ALLOCATING); ASSERT(zio->io_flags & ZIO_FLAG_ALLOC_THROTTLED);
/* /*
* Parents of gang children can have two flavors -- ones that allocated * Parents of gang children can have two flavors -- ones that allocated
@ -5291,7 +5295,8 @@ zio_dva_throttle_done(zio_t *zio)
metaslab_group_alloc_decrement(zio->io_spa, vd->vdev_id, metaslab_group_alloc_decrement(zio->io_spa, vd->vdev_id,
pio->io_allocator, flags, pio->io_size, tag); pio->io_allocator, flags, pio->io_size, tag);
if (metaslab_class_throttle_unreserve(zio->io_metaslab_class, 1, pio)) { if (metaslab_class_throttle_unreserve(pio->io_metaslab_class,
pio->io_allocator, 1, pio->io_size)) {
zio_allocate_dispatch(zio->io_metaslab_class, zio_allocate_dispatch(zio->io_metaslab_class,
pio->io_allocator); pio->io_allocator);
} }
@ -5322,7 +5327,7 @@ zio_done(zio_t *zio)
* write. We must do this since the allocation is performed * write. We must do this since the allocation is performed
* by the logical I/O but the actual write is done by child I/Os. * by the logical I/O but the actual write is done by child I/Os.
*/ */
if (zio->io_flags & ZIO_FLAG_IO_ALLOCATING && if (zio->io_flags & ZIO_FLAG_ALLOC_THROTTLED &&
zio->io_child_type == ZIO_CHILD_VDEV) zio->io_child_type == ZIO_CHILD_VDEV)
zio_dva_throttle_done(zio); zio_dva_throttle_done(zio);