mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-02-05 06:43:31 +03:00
Fix random ztest_deadman_thread failures
The zloop test has been failing in buildbot for the last few weeks with various failures in ztest_deadman_thread(). This is due to the fact that this thread is not stopped when performing pool import / export tests as it should be. This patch simply corrects this. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Serapheim Dimitropoulos <serapheim.dimitro@delphix.com> Reviewed-by: Matthew Ahrens <mahrens@delphix.com> Signed-off-by: Tom Caputi <tcaputi@datto.com> Closes #8010
This commit is contained in:
parent
e871a8f058
commit
9410257800
@ -6495,13 +6495,20 @@ ztest_deadman_thread(void *arg)
|
|||||||
{
|
{
|
||||||
ztest_shared_t *zs = arg;
|
ztest_shared_t *zs = arg;
|
||||||
spa_t *spa = ztest_spa;
|
spa_t *spa = ztest_spa;
|
||||||
hrtime_t delta, overdue, total = 0;
|
hrtime_t delay, overdue, last_run = gethrtime();
|
||||||
|
|
||||||
for (;;) {
|
delay = (zs->zs_thread_stop - zs->zs_thread_start) +
|
||||||
delta = zs->zs_thread_stop - zs->zs_thread_start +
|
MSEC2NSEC(zfs_deadman_synctime_ms);
|
||||||
MSEC2NSEC(zfs_deadman_synctime_ms);
|
|
||||||
|
|
||||||
(void) poll(NULL, 0, (int)NSEC2MSEC(delta));
|
while (!ztest_exiting) {
|
||||||
|
/*
|
||||||
|
* Wait for the delay timer while checking occasionally
|
||||||
|
* if we should stop.
|
||||||
|
*/
|
||||||
|
if (gethrtime() < last_run + delay) {
|
||||||
|
(void) poll(NULL, 0, 1000);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the pool is suspended then fail immediately. Otherwise,
|
* If the pool is suspended then fail immediately. Otherwise,
|
||||||
@ -6522,15 +6529,20 @@ ztest_deadman_thread(void *arg)
|
|||||||
* then it may be hung and is terminated.
|
* then it may be hung and is terminated.
|
||||||
*/
|
*/
|
||||||
overdue = zs->zs_proc_stop + MSEC2NSEC(zfs_deadman_synctime_ms);
|
overdue = zs->zs_proc_stop + MSEC2NSEC(zfs_deadman_synctime_ms);
|
||||||
total += zfs_deadman_synctime_ms / 1000;
|
|
||||||
if (gethrtime() > overdue) {
|
if (gethrtime() > overdue) {
|
||||||
fatal(0, "aborting test after %llu seconds because "
|
fatal(0, "aborting test after %llu seconds because "
|
||||||
"the process is overdue for termination.", total);
|
"the process is overdue for termination.",
|
||||||
|
(gethrtime() - zs->zs_proc_start) / NANOSEC);
|
||||||
}
|
}
|
||||||
|
|
||||||
(void) printf("ztest has been running for %lld seconds\n",
|
(void) printf("ztest has been running for %lld seconds\n",
|
||||||
total);
|
(gethrtime() - zs->zs_proc_start) / NANOSEC);
|
||||||
|
|
||||||
|
last_run = gethrtime();
|
||||||
|
delay = MSEC2NSEC(zfs_deadman_checktime_ms);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
thread_exit();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -6724,7 +6736,7 @@ ztest_run(ztest_shared_t *zs)
|
|||||||
{
|
{
|
||||||
spa_t *spa;
|
spa_t *spa;
|
||||||
objset_t *os;
|
objset_t *os;
|
||||||
kthread_t *resume_thread;
|
kthread_t *resume_thread, *deadman_thread;
|
||||||
kthread_t **run_threads;
|
kthread_t **run_threads;
|
||||||
uint64_t object;
|
uint64_t object;
|
||||||
int error;
|
int error;
|
||||||
@ -6782,7 +6794,7 @@ ztest_run(ztest_shared_t *zs)
|
|||||||
/*
|
/*
|
||||||
* Create a deadman thread and set to panic if we hang.
|
* Create a deadman thread and set to panic if we hang.
|
||||||
*/
|
*/
|
||||||
(void) thread_create(NULL, 0, ztest_deadman_thread,
|
deadman_thread = thread_create(NULL, 0, ztest_deadman_thread,
|
||||||
zs, 0, NULL, TS_RUN | TS_JOINABLE, defclsyspri);
|
zs, 0, NULL, TS_RUN | TS_JOINABLE, defclsyspri);
|
||||||
|
|
||||||
spa->spa_deadman_failmode = ZIO_FAILURE_MODE_PANIC;
|
spa->spa_deadman_failmode = ZIO_FAILURE_MODE_PANIC;
|
||||||
@ -6849,9 +6861,10 @@ ztest_run(ztest_shared_t *zs)
|
|||||||
|
|
||||||
umem_free(run_threads, ztest_opts.zo_threads * sizeof (kthread_t *));
|
umem_free(run_threads, ztest_opts.zo_threads * sizeof (kthread_t *));
|
||||||
|
|
||||||
/* Kill the resume thread */
|
/* Kill the resume and deadman threads */
|
||||||
ztest_exiting = B_TRUE;
|
ztest_exiting = B_TRUE;
|
||||||
VERIFY0(thread_join(resume_thread));
|
VERIFY0(thread_join(resume_thread));
|
||||||
|
VERIFY0(thread_join(deadman_thread));
|
||||||
ztest_resume(spa);
|
ztest_resume(spa);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -7351,6 +7364,7 @@ main(int argc, char **argv)
|
|||||||
|
|
||||||
dprintf_setup(&argc, argv);
|
dprintf_setup(&argc, argv);
|
||||||
zfs_deadman_synctime_ms = 300000;
|
zfs_deadman_synctime_ms = 300000;
|
||||||
|
zfs_deadman_checktime_ms = 30000;
|
||||||
/*
|
/*
|
||||||
* As two-word space map entries may not come up often (especially
|
* As two-word space map entries may not come up often (especially
|
||||||
* if pool and vdev sizes are small) we want to force at least some
|
* if pool and vdev sizes are small) we want to force at least some
|
||||||
|
@ -312,7 +312,7 @@ unsigned long zfs_deadman_ziotime_ms = 300000ULL;
|
|||||||
* Check time in milliseconds. This defines the frequency at which we check
|
* Check time in milliseconds. This defines the frequency at which we check
|
||||||
* for hung I/O.
|
* for hung I/O.
|
||||||
*/
|
*/
|
||||||
unsigned long zfs_deadman_checktime_ms = 60000ULL;
|
unsigned long zfs_deadman_checktime_ms = 60000ULL;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* By default the deadman is enabled.
|
* By default the deadman is enabled.
|
||||||
|
Loading…
Reference in New Issue
Block a user