mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2024-12-25 02:49:32 +03:00
Fix sequential resilver drive failure race condition
This patch handles the race condition on simultaneous failure of 2 drives, which misses the vdev_rebuild_reset_wanted signal in vdev_rebuild_thread. We retry to catch this inside the vdev_rebuild_complete_sync function. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Richard Yao <richard.yao@alumni.stonybrook.edu> Reviewed-by: Dipak Ghosh <dipak.ghosh@hpe.com> Reviewed-by: Akash B <akash-b@hpe.com> Signed-off-by: Samuel Wycliffe J <samwyc@hpe.com> Closes #14041 Closes #14050
This commit is contained in:
parent
2a068a1394
commit
2be0a124af
@ -22,6 +22,7 @@
|
|||||||
*
|
*
|
||||||
* Copyright (c) 2018, Intel Corporation.
|
* Copyright (c) 2018, Intel Corporation.
|
||||||
* Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
|
* Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
|
||||||
|
* Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <sys/vdev_impl.h>
|
#include <sys/vdev_impl.h>
|
||||||
@ -134,6 +135,7 @@ static int zfs_rebuild_scrub_enabled = 1;
|
|||||||
* For vdev_rebuild_initiate_sync() and vdev_rebuild_reset_sync().
|
* For vdev_rebuild_initiate_sync() and vdev_rebuild_reset_sync().
|
||||||
*/
|
*/
|
||||||
static __attribute__((noreturn)) void vdev_rebuild_thread(void *arg);
|
static __attribute__((noreturn)) void vdev_rebuild_thread(void *arg);
|
||||||
|
static void vdev_rebuild_reset_sync(void *arg, dmu_tx_t *tx);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Clear the per-vdev rebuild bytes value for a vdev tree.
|
* Clear the per-vdev rebuild bytes value for a vdev tree.
|
||||||
@ -307,6 +309,17 @@ vdev_rebuild_complete_sync(void *arg, dmu_tx_t *tx)
|
|||||||
vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys;
|
vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys;
|
||||||
|
|
||||||
mutex_enter(&vd->vdev_rebuild_lock);
|
mutex_enter(&vd->vdev_rebuild_lock);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Handle a second device failure if it occurs after all rebuild I/O
|
||||||
|
* has completed but before this sync task has been executed.
|
||||||
|
*/
|
||||||
|
if (vd->vdev_rebuild_reset_wanted) {
|
||||||
|
mutex_exit(&vd->vdev_rebuild_lock);
|
||||||
|
vdev_rebuild_reset_sync(arg, tx);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
vrp->vrp_rebuild_state = VDEV_REBUILD_COMPLETE;
|
vrp->vrp_rebuild_state = VDEV_REBUILD_COMPLETE;
|
||||||
vrp->vrp_end_time = gethrestime_sec();
|
vrp->vrp_end_time = gethrestime_sec();
|
||||||
|
|
||||||
@ -760,7 +773,6 @@ vdev_rebuild_thread(void *arg)
|
|||||||
ASSERT(vd->vdev_rebuilding);
|
ASSERT(vd->vdev_rebuilding);
|
||||||
ASSERT(spa_feature_is_active(spa, SPA_FEATURE_DEVICE_REBUILD));
|
ASSERT(spa_feature_is_active(spa, SPA_FEATURE_DEVICE_REBUILD));
|
||||||
ASSERT3B(vd->vdev_rebuild_cancel_wanted, ==, B_FALSE);
|
ASSERT3B(vd->vdev_rebuild_cancel_wanted, ==, B_FALSE);
|
||||||
ASSERT3B(vd->vdev_rebuild_reset_wanted, ==, B_FALSE);
|
|
||||||
|
|
||||||
vdev_rebuild_t *vr = &vd->vdev_rebuild_config;
|
vdev_rebuild_t *vr = &vd->vdev_rebuild_config;
|
||||||
vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys;
|
vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys;
|
||||||
|
Loading…
Reference in New Issue
Block a user