From 717213d431c57510bf8c994587fdf4f1e5d229cf Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Thu, 19 Jun 2025 06:25:58 -0700 Subject: [PATCH] Fix other nonrot bugs There are still a variety of bugs involving the vdev_nonrot property that will cause problems if you try to run the test suite with segment-based weighting disabled, and with other things in the weighting code. Parents' nonrot property need to be updated when children are added. When vdevs are expanded and more metaslabs are added, the weights have to be recalculated (since the number of metaslabs is an input to the lba bias function). When opening, faulted or unopenable children should not be considered for whether a vdev is nonrot or not (since the nonrot property is determined during a successful open, this can cause false negatives). And draid spares need to have the nonrot property set correctly. Sponsored-by: Eshtek, creators of HexOS Sponsored-by: Klara, Inc. Reviewed-by: Allan Jude Reviewed-by: Brian Behlendorf Reviewed-by: Alexander Motin Reviewed-by: Rob Norris Signed-off-by: Paul Dagnelie Closes #17469 --- module/zfs/vdev.c | 18 ++++++++++++++++++ module/zfs/vdev_draid.c | 1 + 2 files changed, 19 insertions(+) diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 63f2f3794..01758b0c5 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -553,6 +553,7 @@ vdev_add_child(vdev_t *pvd, vdev_t *cvd) pvd->vdev_child = newchild; pvd->vdev_child[id] = cvd; + pvd->vdev_nonrot &= cvd->vdev_nonrot; cvd->vdev_top = (pvd->vdev_top ? pvd->vdev_top: cvd); ASSERT(cvd->vdev_top->vdev_parent->vdev_parent == NULL); @@ -1374,6 +1375,7 @@ vdev_add_parent(vdev_t *cvd, vdev_ops_t *ops) mvd->vdev_physical_ashift = cvd->vdev_physical_ashift; mvd->vdev_state = cvd->vdev_state; mvd->vdev_crtxg = cvd->vdev_crtxg; + mvd->vdev_nonrot = cvd->vdev_nonrot; vdev_remove_child(pvd, cvd); vdev_add_child(pvd, mvd); @@ -1579,6 +1581,18 @@ vdev_metaslab_init(vdev_t *vd, uint64_t txg) vd->vdev_ms = mspp; vd->vdev_ms_count = newc; + /* + * Weighting algorithms can depend on the number of metaslabs in the + * vdev. In order to ensure that all weights are correct at all times, + * we need to recalculate here. + */ + for (uint64_t m = 0; m < oldc; m++) { + metaslab_t *msp = vd->vdev_ms[m]; + mutex_enter(&msp->ms_lock); + metaslab_recalculate_weight_and_sort(msp); + mutex_exit(&msp->ms_lock); + } + for (uint64_t m = oldc; m < newc; m++) { uint64_t object = 0; /* @@ -1960,6 +1974,10 @@ vdev_open_children_impl(vdev_t *vd, vdev_open_children_func_t *open_func) taskq_wait(tq); for (int c = 0; c < children; c++) { vdev_t *cvd = vd->vdev_child[c]; + + if (open_func(cvd) == B_FALSE || + cvd->vdev_state <= VDEV_STATE_FAULTED) + continue; vd->vdev_nonrot &= cvd->vdev_nonrot; } diff --git a/module/zfs/vdev_draid.c b/module/zfs/vdev_draid.c index e0fafd0da..feec5fd3c 100644 --- a/module/zfs/vdev_draid.c +++ b/module/zfs/vdev_draid.c @@ -2484,6 +2484,7 @@ vdev_draid_spare_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, *max_psize = max_asize + VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE; vds->vds_draid_vdev = tvd; + vd->vdev_nonrot = tvd->vdev_nonrot; return (0); }