From e037327bfee02717fc8f53fd848a09f370e14499 Mon Sep 17 00:00:00 2001 From: Ameer Hamza Date: Thu, 20 Jul 2023 22:23:52 +0500 Subject: [PATCH] spa_min_alloc should be GCD, not min Since spa_min_alloc may not be a power of 2, unlike ashifts, in the case of DRAID, we should not select the minimal value among several vdevs. Rounding to a multiple of it is unlikely to work for other vdevs. Instead, using the greatest common divisor produces smaller yet more reasonable results. Reviewed-by: Brian Behlendorf Reviewed-by: Alexander Motin Signed-off-by: Ameer Hamza Closes #15067 --- include/sys/spa_impl.h | 1 + module/zfs/spa_misc.c | 1 + module/zfs/vdev.c | 36 ++++++++++++++++++++++++++++++++---- module/zfs/zio.c | 22 +++++++++++++++++----- 4 files changed, 51 insertions(+), 9 deletions(-) diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index 44afa7632..588c72f6e 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -250,6 +250,7 @@ struct spa { uint64_t spa_min_ashift; /* of vdevs in normal class */ uint64_t spa_max_ashift; /* of vdevs in normal class */ uint64_t spa_min_alloc; /* of vdevs in normal class */ + uint64_t spa_gcd_alloc; /* of vdevs in normal class */ uint64_t spa_config_guid; /* config pool guid */ uint64_t spa_load_guid; /* spa_load initialized guid */ uint64_t spa_last_synced_guid; /* last synced guid */ diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 06f640769..3b355e0de 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -772,6 +772,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot) spa->spa_min_ashift = INT_MAX; spa->spa_max_ashift = 0; spa->spa_min_alloc = INT_MAX; + spa->spa_gcd_alloc = INT_MAX; /* Reset cached value */ spa->spa_dedup_dspace = ~0ULL; diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index b6f8c0ab3..f3812b843 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -1399,6 +1399,36 @@ vdev_remove_parent(vdev_t *cvd) vdev_free(mvd); } +/* + * Choose GCD for spa_gcd_alloc. + */ +static uint64_t +vdev_gcd(uint64_t a, uint64_t b) +{ + while (b != 0) { + uint64_t t = b; + b = a % b; + a = t; + } + return (a); +} + +/* + * Set spa_min_alloc and spa_gcd_alloc. + */ +static void +vdev_spa_set_alloc(spa_t *spa, uint64_t min_alloc) +{ + if (min_alloc < spa->spa_min_alloc) + spa->spa_min_alloc = min_alloc; + if (spa->spa_gcd_alloc == INT_MAX) { + spa->spa_gcd_alloc = min_alloc; + } else { + spa->spa_gcd_alloc = vdev_gcd(min_alloc, + spa->spa_gcd_alloc); + } +} + void vdev_metaslab_group_create(vdev_t *vd) { @@ -1451,8 +1481,7 @@ vdev_metaslab_group_create(vdev_t *vd) spa->spa_min_ashift = vd->vdev_ashift; uint64_t min_alloc = vdev_get_min_alloc(vd); - if (min_alloc < spa->spa_min_alloc) - spa->spa_min_alloc = min_alloc; + vdev_spa_set_alloc(spa, min_alloc); } } } @@ -2213,8 +2242,7 @@ vdev_open(vdev_t *vd) if (vd->vdev_top == vd && vd->vdev_ashift != 0 && vd->vdev_islog == 0 && vd->vdev_aux == NULL) { uint64_t min_alloc = vdev_get_min_alloc(vd); - if (min_alloc < spa->spa_min_alloc) - spa->spa_min_alloc = min_alloc; + vdev_spa_set_alloc(spa, min_alloc); } /* diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 10279fde8..3f5e6a08d 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -1596,6 +1596,19 @@ zio_shrink(zio_t *zio, uint64_t size) } } +/* + * Round provided allocation size up to a value that can be allocated + * by at least some vdev(s) in the pool with minimum or no additional + * padding and without extra space usage on others + */ +static uint64_t +zio_roundup_alloc_size(spa_t *spa, uint64_t size) +{ + if (size > spa->spa_min_alloc) + return (roundup(size, spa->spa_gcd_alloc)); + return (spa->spa_min_alloc); +} + /* * ========================================================================== * Prepare to read and write logical blocks @@ -1802,9 +1815,8 @@ zio_write_compress(zio_t *zio) * in that we charge for the padding used to fill out * the last sector. */ - ASSERT3U(spa->spa_min_alloc, >=, SPA_MINBLOCKSHIFT); - size_t rounded = (size_t)roundup(psize, - spa->spa_min_alloc); + size_t rounded = (size_t)zio_roundup_alloc_size(spa, + psize); if (rounded >= lsize) { compress = ZIO_COMPRESS_OFF; zio_buf_free(cbuf, lsize); @@ -1847,8 +1859,8 @@ zio_write_compress(zio_t *zio) * take this codepath because it will change the on-disk block * and decryption will fail. */ - size_t rounded = MIN((size_t)roundup(psize, - spa->spa_min_alloc), lsize); + size_t rounded = MIN((size_t)zio_roundup_alloc_size(spa, psize), + lsize); if (rounded != psize) { abd_t *cdata = abd_alloc_linear(rounded, B_TRUE);