mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 10:37:35 +03:00
Remove races from scrub / resilver tests
Currently, several tests in the ZFS Test Suite that attempt to test scrub and resilver behavior occasionally fail. A big reason for this is that these tests use a combination of zinject and zfs_scan_vdev_limit to attempt to slow these operations enough to verify their test commands. This method works most of the time, but provides no guarantees and leads to flaky behavior. This patch adds a new tunable, zfs_scan_suspend_progress, that ensures that scans make no progress, guaranteeing that tests can be run without racing. This patch also changes zfs_remove_max_bytes_pause to match this new tunable. This provides some consistency between these two similar tunables and ensures that the tunable will not misbehave on 32-bit systems. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Giuseppe Di Natale <guss80@gmail.com> Reviewed-by: Tony Hutter <hutter2@llnl.gov> Signed-off-by: Tom Caputi <tcaputi@datto.com> Closes #8111
This commit is contained in:
committed by
Brian Behlendorf
parent
00369f3338
commit
cef48f14da
@@ -169,6 +169,7 @@ int zfs_obsolete_min_time_ms = 500; /* min millisecs to obsolete per txg */
|
||||
int zfs_free_min_time_ms = 1000; /* min millisecs to free per txg */
|
||||
int zfs_resilver_min_time_ms = 3000; /* min millisecs to resilver per txg */
|
||||
int zfs_scan_checkpoint_intval = 7200; /* in seconds */
|
||||
int zfs_scan_suspend_progress = 0; /* set to prevent scans from progressing */
|
||||
int zfs_no_scrub_io = B_FALSE; /* set to disable scrub i/o */
|
||||
int zfs_no_scrub_prefetch = B_FALSE; /* set to disable scrub prefetch */
|
||||
enum ddt_class zfs_scrub_ddt_class_max = DDT_CLASS_DUPLICATE;
|
||||
@@ -3356,6 +3357,27 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
|
||||
if (spa->spa_syncing_txg < spa->spa_first_txg + SCAN_IMPORT_WAIT_TXGS)
|
||||
return;
|
||||
|
||||
/*
|
||||
* zfs_scan_suspend_progress can be set to disable scan progress.
|
||||
* We don't want to spin the txg_sync thread, so we add a delay
|
||||
* here to simulate the time spent doing a scan. This is mostly
|
||||
* useful for testing and debugging.
|
||||
*/
|
||||
if (zfs_scan_suspend_progress) {
|
||||
uint64_t scan_time_ns = gethrtime() - scn->scn_sync_start_time;
|
||||
int mintime = (scn->scn_phys.scn_func == POOL_SCAN_RESILVER) ?
|
||||
zfs_resilver_min_time_ms : zfs_scrub_min_time_ms;
|
||||
|
||||
while (zfs_scan_suspend_progress &&
|
||||
!txg_sync_waiting(scn->scn_dp) &&
|
||||
!spa_shutting_down(scn->scn_dp->dp_spa) &&
|
||||
NSEC2MSEC(scan_time_ns) < mintime) {
|
||||
delay(hz);
|
||||
scan_time_ns = gethrtime() - scn->scn_sync_start_time;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* It is possible to switch from unsorted to sorted at any time,
|
||||
* but afterwards the scan will remain sorted unless reloaded from
|
||||
@@ -4070,6 +4092,10 @@ MODULE_PARM_DESC(zfs_free_min_time_ms, "Min millisecs to free per txg");
|
||||
module_param(zfs_resilver_min_time_ms, int, 0644);
|
||||
MODULE_PARM_DESC(zfs_resilver_min_time_ms, "Min millisecs to resilver per txg");
|
||||
|
||||
module_param(zfs_scan_suspend_progress, int, 0644);
|
||||
MODULE_PARM_DESC(zfs_scan_suspend_progress,
|
||||
"Set to prevent scans from progressing");
|
||||
|
||||
module_param(zfs_no_scrub_io, int, 0644);
|
||||
MODULE_PARM_DESC(zfs_no_scrub_io, "Set to disable scrub I/O");
|
||||
|
||||
|
||||
@@ -121,7 +121,7 @@ int vdev_removal_max_span = 32 * 1024;
|
||||
* This is used by the test suite so that it can ensure that certain
|
||||
* actions happen while in the middle of a removal.
|
||||
*/
|
||||
unsigned long zfs_remove_max_bytes_pause = -1UL;
|
||||
int zfs_removal_suspend_progress = 0;
|
||||
|
||||
#define VDEV_REMOVAL_ZAP_OBJS "lzap"
|
||||
|
||||
@@ -1449,14 +1449,14 @@ spa_vdev_remove_thread(void *arg)
|
||||
|
||||
/*
|
||||
* This delay will pause the removal around the point
|
||||
* specified by zfs_remove_max_bytes_pause. We do this
|
||||
* specified by zfs_removal_suspend_progress. We do this
|
||||
* solely from the test suite or during debugging.
|
||||
*/
|
||||
uint64_t bytes_copied =
|
||||
spa->spa_removing_phys.sr_copied;
|
||||
for (int i = 0; i < TXG_SIZE; i++)
|
||||
bytes_copied += svr->svr_bytes_done[i];
|
||||
while (zfs_remove_max_bytes_pause <= bytes_copied &&
|
||||
while (zfs_removal_suspend_progress &&
|
||||
!svr->svr_thread_exit)
|
||||
delay(hz);
|
||||
|
||||
@@ -2178,8 +2178,8 @@ MODULE_PARM_DESC(vdev_removal_max_span,
|
||||
"Largest span of free chunks a remap segment can span");
|
||||
|
||||
/* BEGIN CSTYLED */
|
||||
module_param(zfs_remove_max_bytes_pause, ulong, 0644);
|
||||
MODULE_PARM_DESC(zfs_remove_max_bytes_pause,
|
||||
module_param(zfs_removal_suspend_progress, int, 0644);
|
||||
MODULE_PARM_DESC(zfs_removal_suspend_progress,
|
||||
"Pause device removal after this many bytes are copied "
|
||||
"(debug use only - causes removal to hang)");
|
||||
/* END CSTYLED */
|
||||
|
||||
Reference in New Issue
Block a user