spa_min_alloc should be GCD, not min

Since spa_min_alloc may not be a power of 2, unlike ashifts, in the
case of DRAID, we should not select the minimal value among several
vdevs. Rounding to a multiple of it is unlikely to work for other
vdevs. Instead, using the greatest common divisor produces smaller
yet more reasonable results.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Signed-off-by: Ameer Hamza <ahamza@ixsystems.com>
Closes #15067
This commit is contained in:
Ameer Hamza 2023-07-20 22:23:52 +05:00 committed by Brian Behlendorf
parent 1a2e486d25
commit e037327bfe
4 changed files with 51 additions and 9 deletions

View File

@ -250,6 +250,7 @@ struct spa {
uint64_t spa_min_ashift; /* of vdevs in normal class */ uint64_t spa_min_ashift; /* of vdevs in normal class */
uint64_t spa_max_ashift; /* of vdevs in normal class */ uint64_t spa_max_ashift; /* of vdevs in normal class */
uint64_t spa_min_alloc; /* of vdevs in normal class */ uint64_t spa_min_alloc; /* of vdevs in normal class */
uint64_t spa_gcd_alloc; /* of vdevs in normal class */
uint64_t spa_config_guid; /* config pool guid */ uint64_t spa_config_guid; /* config pool guid */
uint64_t spa_load_guid; /* spa_load initialized guid */ uint64_t spa_load_guid; /* spa_load initialized guid */
uint64_t spa_last_synced_guid; /* last synced guid */ uint64_t spa_last_synced_guid; /* last synced guid */

View File

@ -772,6 +772,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
spa->spa_min_ashift = INT_MAX; spa->spa_min_ashift = INT_MAX;
spa->spa_max_ashift = 0; spa->spa_max_ashift = 0;
spa->spa_min_alloc = INT_MAX; spa->spa_min_alloc = INT_MAX;
spa->spa_gcd_alloc = INT_MAX;
/* Reset cached value */ /* Reset cached value */
spa->spa_dedup_dspace = ~0ULL; spa->spa_dedup_dspace = ~0ULL;

View File

@ -1399,6 +1399,36 @@ vdev_remove_parent(vdev_t *cvd)
vdev_free(mvd); vdev_free(mvd);
} }
/*
* Choose GCD for spa_gcd_alloc.
*/
static uint64_t
vdev_gcd(uint64_t a, uint64_t b)
{
while (b != 0) {
uint64_t t = b;
b = a % b;
a = t;
}
return (a);
}
/*
* Set spa_min_alloc and spa_gcd_alloc.
*/
static void
vdev_spa_set_alloc(spa_t *spa, uint64_t min_alloc)
{
if (min_alloc < spa->spa_min_alloc)
spa->spa_min_alloc = min_alloc;
if (spa->spa_gcd_alloc == INT_MAX) {
spa->spa_gcd_alloc = min_alloc;
} else {
spa->spa_gcd_alloc = vdev_gcd(min_alloc,
spa->spa_gcd_alloc);
}
}
void void
vdev_metaslab_group_create(vdev_t *vd) vdev_metaslab_group_create(vdev_t *vd)
{ {
@ -1451,8 +1481,7 @@ vdev_metaslab_group_create(vdev_t *vd)
spa->spa_min_ashift = vd->vdev_ashift; spa->spa_min_ashift = vd->vdev_ashift;
uint64_t min_alloc = vdev_get_min_alloc(vd); uint64_t min_alloc = vdev_get_min_alloc(vd);
if (min_alloc < spa->spa_min_alloc) vdev_spa_set_alloc(spa, min_alloc);
spa->spa_min_alloc = min_alloc;
} }
} }
} }
@ -2213,8 +2242,7 @@ vdev_open(vdev_t *vd)
if (vd->vdev_top == vd && vd->vdev_ashift != 0 && if (vd->vdev_top == vd && vd->vdev_ashift != 0 &&
vd->vdev_islog == 0 && vd->vdev_aux == NULL) { vd->vdev_islog == 0 && vd->vdev_aux == NULL) {
uint64_t min_alloc = vdev_get_min_alloc(vd); uint64_t min_alloc = vdev_get_min_alloc(vd);
if (min_alloc < spa->spa_min_alloc) vdev_spa_set_alloc(spa, min_alloc);
spa->spa_min_alloc = min_alloc;
} }
/* /*

View File

@ -1596,6 +1596,19 @@ zio_shrink(zio_t *zio, uint64_t size)
} }
} }
/*
* Round provided allocation size up to a value that can be allocated
* by at least some vdev(s) in the pool with minimum or no additional
* padding and without extra space usage on others
*/
static uint64_t
zio_roundup_alloc_size(spa_t *spa, uint64_t size)
{
if (size > spa->spa_min_alloc)
return (roundup(size, spa->spa_gcd_alloc));
return (spa->spa_min_alloc);
}
/* /*
* ========================================================================== * ==========================================================================
* Prepare to read and write logical blocks * Prepare to read and write logical blocks
@ -1802,9 +1815,8 @@ zio_write_compress(zio_t *zio)
* in that we charge for the padding used to fill out * in that we charge for the padding used to fill out
* the last sector. * the last sector.
*/ */
ASSERT3U(spa->spa_min_alloc, >=, SPA_MINBLOCKSHIFT); size_t rounded = (size_t)zio_roundup_alloc_size(spa,
size_t rounded = (size_t)roundup(psize, psize);
spa->spa_min_alloc);
if (rounded >= lsize) { if (rounded >= lsize) {
compress = ZIO_COMPRESS_OFF; compress = ZIO_COMPRESS_OFF;
zio_buf_free(cbuf, lsize); zio_buf_free(cbuf, lsize);
@ -1847,8 +1859,8 @@ zio_write_compress(zio_t *zio)
* take this codepath because it will change the on-disk block * take this codepath because it will change the on-disk block
* and decryption will fail. * and decryption will fail.
*/ */
size_t rounded = MIN((size_t)roundup(psize, size_t rounded = MIN((size_t)zio_roundup_alloc_size(spa, psize),
spa->spa_min_alloc), lsize); lsize);
if (rounded != psize) { if (rounded != psize) {
abd_t *cdata = abd_alloc_linear(rounded, B_TRUE); abd_t *cdata = abd_alloc_linear(rounded, B_TRUE);