mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-27 02:14:28 +03:00
Fix consistency of ztest_device_removal_active
ztest currently uses the boolean flag ztest_device_removal_active to protect some tests that may not run successfully if they occur at the same time as ztest_device_removal(). Unfortunately, in the event that ztest is in the middle of a device removal when it decides to issue a SIGKILL, the device removal will be automatically restarted (without setting the flag) when the pool is re-imported on the next run. This patch corrects this by ensuring that any in-progress removals are completed before running further tests after the re-import. This patch also makes a few small changes to prevent race conditions involving the creation and destruction of spa->spa_vdev_removal, since this field is not protected by any locks. Some checks that may run concurrently with setting / unsetting this field have been updated to check spa->spa_removing_phys.sr_state instead. The most significant change here is that spa_removal_get_stats() no longer accounts for in-flight work done, since that could result in a NULL pointer dereference. Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed-by: Serapheim Dimitropoulos <serapheim@delphix.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Tom Caputi <tcaputi@datto.com> Closes #8105
This commit is contained in:
parent
c71c8c715b
commit
c40a1124e1
@ -3573,7 +3573,7 @@ ztest_device_removal(ztest_ds_t *zd, uint64_t id)
|
||||
*/
|
||||
txg_wait_synced(spa_get_dsl(spa), 0);
|
||||
|
||||
while (spa->spa_vdev_removal != NULL)
|
||||
while (spa->spa_removing_phys.sr_state == DSS_SCANNING)
|
||||
txg_wait_synced(spa_get_dsl(spa), 0);
|
||||
} else {
|
||||
mutex_exit(&ztest_vdev_lock);
|
||||
@ -6887,6 +6887,26 @@ ztest_run(ztest_shared_t *zs)
|
||||
}
|
||||
zs->zs_enospc_count = 0;
|
||||
|
||||
/*
|
||||
* If we were in the middle of ztest_device_removal() and were killed
|
||||
* we need to ensure the removal and scrub complete before running
|
||||
* any tests that check ztest_device_removal_active. The removal will
|
||||
* be restarted automatically when the spa is opened, but we need to
|
||||
* initate the scrub manually if it is not already in progress. Note
|
||||
* that we always run the scrub whenever an indirect vdev exists
|
||||
* because we have no way of knowing for sure if ztest_device_removal()
|
||||
* fully completed its scrub before the pool was reimported.
|
||||
*/
|
||||
if (spa->spa_removing_phys.sr_state == DSS_SCANNING ||
|
||||
spa->spa_removing_phys.sr_prev_indirect_vdev != -1) {
|
||||
while (spa->spa_removing_phys.sr_state == DSS_SCANNING)
|
||||
txg_wait_synced(spa_get_dsl(spa), 0);
|
||||
|
||||
(void) spa_scan(spa, POOL_SCAN_SCRUB);
|
||||
while (dsl_scan_scrubbing(spa_get_dsl(spa)))
|
||||
txg_wait_synced(spa_get_dsl(spa), 0);
|
||||
}
|
||||
|
||||
run_threads = umem_zalloc(ztest_opts.zo_threads * sizeof (kthread_t *),
|
||||
UMEM_NOFAIL);
|
||||
|
||||
|
@ -462,7 +462,7 @@ spa_checkpoint_check(void *arg, dmu_tx_t *tx)
|
||||
if (!spa_top_vdevs_spacemap_addressable(spa))
|
||||
return (SET_ERROR(ZFS_ERR_VDEV_TOO_BIG));
|
||||
|
||||
if (spa->spa_vdev_removal != NULL)
|
||||
if (spa->spa_removing_phys.sr_state == DSS_SCANNING)
|
||||
return (SET_ERROR(ZFS_ERR_DEVRM_IN_PROGRESS));
|
||||
|
||||
if (spa->spa_checkpoint_txg != 0)
|
||||
|
@ -672,7 +672,7 @@ spa_finish_removal(spa_t *spa, dsl_scan_state_t state, dmu_tx_t *tx)
|
||||
vdev_t *vd = vdev_lookup_top(spa, svr->svr_vdev_id);
|
||||
vdev_indirect_config_t *vic = &vd->vdev_indirect_config;
|
||||
|
||||
if (srp->sr_prev_indirect_vdev != UINT64_MAX) {
|
||||
if (srp->sr_prev_indirect_vdev != -1) {
|
||||
vdev_t *pvd;
|
||||
pvd = vdev_lookup_top(spa,
|
||||
srp->sr_prev_indirect_vdev);
|
||||
@ -2145,13 +2145,6 @@ spa_removal_get_stats(spa_t *spa, pool_removal_stat_t *prs)
|
||||
prs->prs_to_copy = spa->spa_removing_phys.sr_to_copy;
|
||||
prs->prs_copied = spa->spa_removing_phys.sr_copied;
|
||||
|
||||
if (spa->spa_vdev_removal != NULL) {
|
||||
for (int i = 0; i < TXG_SIZE; i++) {
|
||||
prs->prs_copied +=
|
||||
spa->spa_vdev_removal->svr_bytes_done[i];
|
||||
}
|
||||
}
|
||||
|
||||
prs->prs_mapping_memory = 0;
|
||||
uint64_t indirect_vdev_id =
|
||||
spa->spa_removing_phys.sr_prev_indirect_vdev;
|
||||
|
Loading…
Reference in New Issue
Block a user