Implement allocation size ranges and use for gang leaves (#17111)

When forced to resort to ganging, ZFS currently allocates three child
blocks, each one third of the size of the original. This is true
regardless of whether larger allocations could be made, which would
allow us to have fewer gang leaves. This improves performance when
fragmentation is high enough to require ganging, but not so high that
all the free ranges are only just big enough to hold a third of the
recordsize. This is also useful for improving the behavior of a future
change to allow larger gang headers.

We add the ability for the allocation codepath to allocate a range of
sizes instead of a single fixed size. We then use this to pre-allocate
the DVAs for the gang children. If those allocations fail, we fall back
to the normal write path, which will likely re-gang.

Signed-off-by: Paul Dagnelie <paul.dagnelie@klarasystems.com>
Co-authored-by: Paul Dagnelie <paul.dagnelie@klarasystems.com>
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
This commit is contained in:
Paul Dagnelie
2025-05-02 15:32:18 -07:00
committed by GitHub
parent a7de203c86
commit 246e5883bb
24 changed files with 392 additions and 107 deletions
+7 -1
View File
@@ -41,7 +41,7 @@ extern "C" {
typedef struct metaslab_ops {
const char *msop_name;
uint64_t (*msop_alloc)(metaslab_t *, uint64_t);
uint64_t (*msop_alloc)(metaslab_t *, uint64_t, uint64_t, uint64_t *);
} metaslab_ops_t;
@@ -82,6 +82,9 @@ uint64_t metaslab_largest_allocatable(metaslab_t *);
int metaslab_alloc(spa_t *, metaslab_class_t *, uint64_t, blkptr_t *, int,
uint64_t, blkptr_t *, int, zio_alloc_list_t *, int, const void *);
int metaslab_alloc_range(spa_t *, metaslab_class_t *, uint64_t, uint64_t,
blkptr_t *, int, uint64_t, blkptr_t *, int, zio_alloc_list_t *,
int, const void *, uint64_t *);
int metaslab_alloc_dva(spa_t *, metaslab_class_t *, uint64_t,
dva_t *, int, dva_t *, uint64_t, int, zio_alloc_list_t *, int);
void metaslab_free(spa_t *, const blkptr_t *, uint64_t, boolean_t);
@@ -95,6 +98,7 @@ void metaslab_check_free(spa_t *, const blkptr_t *);
void metaslab_stat_init(void);
void metaslab_stat_fini(void);
void metaslab_trace_move(zio_alloc_list_t *, zio_alloc_list_t *);
void metaslab_trace_init(zio_alloc_list_t *);
void metaslab_trace_fini(zio_alloc_list_t *);
@@ -127,6 +131,8 @@ uint64_t metaslab_group_get_space(metaslab_group_t *);
void metaslab_group_histogram_verify(metaslab_group_t *);
uint64_t metaslab_group_fragmentation(metaslab_group_t *);
void metaslab_group_histogram_remove(metaslab_group_t *, metaslab_t *);
void metaslab_group_alloc_increment_all(spa_t *, blkptr_t *, int, int,
uint64_t, const void *);
void metaslab_group_alloc_decrement(spa_t *, uint64_t, int, int, uint64_t,
const void *);
void metaslab_recalculate_weight_and_sort(metaslab_t *);