diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5 index cca583de9..8b9a4e8cc 100644 --- a/man/man5/zfs-module-parameters.5 +++ b/man/man5/zfs-module-parameters.5 @@ -765,9 +765,28 @@ Default value: \fB0\fR. \fBzfs_deadman_enabled\fR (int) .ad .RS 12n -Enable deadman timer. See description below. +When a pool sync operation takes longer than \fBzfs_deadman_synctime_ms\fR +milliseconds, a "slow spa_sync" message is logged to the debug log +(see \fBzfs_dbgmsg_enable\fR). If \fBzfs_deadman_enabled\fR is set, +all pending IO operations are also checked and if any haven't completed +within \fBzfs_deadman_synctime_ms\fR milliseconds, a "SLOW IO" message +is logged to the debug log and a "delay" system event with the details of +the hung IO is posted. .sp -Use \fB1\fR for yes (default) and \fB0\fR to disable. +Use \fB1\fR (default) to enable the slow IO check and \fB0\fR to disable. +.RE + +.sp +.ne 2 +.na +\fBzfs_deadman_checktime_ms\fR (int) +.ad +.RS 12n +Once a pool sync operation has taken longer than +\fBzfs_deadman_synctime_ms\fR milliseconds, continue to check for slow +operations every \fBzfs_deadman_checktime_ms\fR milliseconds. +.sp +Default value: \fB5,000\fR. .RE .sp @@ -776,12 +795,11 @@ Use \fB1\fR for yes (default) and \fB0\fR to disable. \fBzfs_deadman_synctime_ms\fR (ulong) .ad .RS 12n -Expiration time in milliseconds. This value has two meanings. First it is -used to determine when the spa_deadman() logic should fire. By default the -spa_deadman() will fire if spa_sync() has not completed in 1000 seconds. -Secondly, the value determines if an I/O is considered "hung". Any I/O that -has not completed in zfs_deadman_synctime_ms is considered "hung" resulting -in a zevent being logged. +Interval in milliseconds after which the deadman is triggered and also +the interval after which an IO operation is considered to be "hung" +if \fBzfs_deadman_enabled\fR is set. + +See \fBzfs_deadman_enabled\fR. .sp Default value: \fB1,000,000\fR. .RE diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index fa9bdd7b8..c39c137e6 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -297,6 +297,12 @@ int zfs_free_leak_on_eio = B_FALSE; */ unsigned long zfs_deadman_synctime_ms = 1000000ULL; +/* + * Check time in milliseconds. This defines the frequency at which we check + * for hung I/O. + */ +unsigned long zfs_deadman_checktime_ms = 5000ULL; + /* * By default the deadman is enabled. */ @@ -524,6 +530,10 @@ spa_deadman(void *arg) { spa_t *spa = arg; + /* Disable the deadman if the pool is suspended. */ + if (spa_suspended(spa)) + return; + zfs_dbgmsg("slow spa_sync: started %llu seconds ago, calls %llu", (gethrtime() - spa->spa_sync_starttime) / NANOSEC, ++spa->spa_deadman_calls); @@ -532,7 +542,7 @@ spa_deadman(void *arg) spa->spa_deadman_tqid = taskq_dispatch_delay(system_delay_taskq, spa_deadman, spa, TQ_SLEEP, ddi_get_lbolt() + - NSEC_TO_TICK(spa->spa_deadman_synctime)); + MSEC_TO_TICK(zfs_deadman_checktime_ms)); } /* @@ -2114,6 +2124,10 @@ MODULE_PARM_DESC(zfs_free_leak_on_eio, module_param(zfs_deadman_synctime_ms, ulong, 0644); MODULE_PARM_DESC(zfs_deadman_synctime_ms, "Expiration time in milliseconds"); +module_param(zfs_deadman_checktime_ms, ulong, 0644); +MODULE_PARM_DESC(zfs_deadman_checktime_ms, + "Dead I/O check interval in milliseconds"); + module_param(zfs_deadman_enabled, int, 0644); MODULE_PARM_DESC(zfs_deadman_enabled, "Enable deadman timer");