mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 02:27:36 +03:00
Report pool suspended due to MMP
When the pool is suspended, record whether it was due to an I/O error or due to MMP writes failing to succeed within the required time. Change spa_suspended from uint8_t to zio_suspend_reason_t to store the reason. When userspace queries pool status via spa_tryimport(), report the reason the pool was suspended in a new key, ZPOOL_CONFIG_SUSPENDED_REASON. In libzfs, when interpreting the returned config nvlist, report suspension due to MMP with a new pool status enum value, ZPOOL_STATUS_IO_FAILURE_MMP. In status_callback(), which generates and emits the message when 'zpool status' is executed, add a case to print an appropriate message for the new pool status enum value. Reviewed-by: George Melikov <mail@gmelikov.ru> Reviewed-by: Giuseppe Di Natale <dinatale2@llnl.gov> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Tony Hutter <hutter2@llnl.gov> Signed-off-by: Olaf Faaland <faaland1@llnl.gov> Closes #7296
This commit is contained in:
committed by
Brian Behlendorf
parent
3874220932
commit
cec3a0a1bb
+1
-1
@@ -520,7 +520,7 @@ mmp_thread(void *arg)
|
||||
"succeeded in over %llus; suspending pool",
|
||||
spa_name(spa),
|
||||
NSEC2SEC(start - mmp->mmp_last_write));
|
||||
zio_suspend(spa, NULL);
|
||||
zio_suspend(spa, NULL, ZIO_SUSPEND_MMP);
|
||||
}
|
||||
|
||||
if (multihost && !suspended)
|
||||
|
||||
+6
-2
@@ -3766,10 +3766,14 @@ spa_get_stats(const char *name, nvlist_t **config,
|
||||
ZPOOL_CONFIG_ERRCOUNT,
|
||||
spa_get_errlog_size(spa)) == 0);
|
||||
|
||||
if (spa_suspended(spa))
|
||||
if (spa_suspended(spa)) {
|
||||
VERIFY(nvlist_add_uint64(*config,
|
||||
ZPOOL_CONFIG_SUSPENDED,
|
||||
spa->spa_failmode) == 0);
|
||||
VERIFY(nvlist_add_uint64(*config,
|
||||
ZPOOL_CONFIG_SUSPENDED_REASON,
|
||||
spa->spa_suspended) == 0);
|
||||
}
|
||||
|
||||
spa_add_spares(spa, *config);
|
||||
spa_add_l2cache(spa, *config);
|
||||
@@ -6984,7 +6988,7 @@ spa_sync(spa_t *spa, uint64_t txg)
|
||||
|
||||
if (error == 0)
|
||||
break;
|
||||
zio_suspend(spa, NULL);
|
||||
zio_suspend(spa, NULL, ZIO_SUSPEND_IOERR);
|
||||
zio_resume_wait(spa);
|
||||
}
|
||||
dmu_tx_commit(tx);
|
||||
|
||||
@@ -1709,7 +1709,7 @@ spa_get_failmode(spa_t *spa)
|
||||
boolean_t
|
||||
spa_suspended(spa_t *spa)
|
||||
{
|
||||
return (spa->spa_suspended);
|
||||
return (spa->spa_suspended != ZIO_SUSPEND_NONE);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
|
||||
+4
-4
@@ -2092,7 +2092,7 @@ zio_reexecute(zio_t *pio)
|
||||
}
|
||||
|
||||
void
|
||||
zio_suspend(spa_t *spa, zio_t *zio)
|
||||
zio_suspend(spa_t *spa, zio_t *zio, zio_suspend_reason_t reason)
|
||||
{
|
||||
if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_PANIC)
|
||||
fm_panic("Pool '%s' has encountered an uncorrectable I/O "
|
||||
@@ -2112,7 +2112,7 @@ zio_suspend(spa_t *spa, zio_t *zio)
|
||||
ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE |
|
||||
ZIO_FLAG_GODFATHER);
|
||||
|
||||
spa->spa_suspended = B_TRUE;
|
||||
spa->spa_suspended = reason;
|
||||
|
||||
if (zio != NULL) {
|
||||
ASSERT(!(zio->io_flags & ZIO_FLAG_GODFATHER));
|
||||
@@ -2135,7 +2135,7 @@ zio_resume(spa_t *spa)
|
||||
* Reexecute all previously suspended i/o.
|
||||
*/
|
||||
mutex_enter(&spa->spa_suspend_lock);
|
||||
spa->spa_suspended = B_FALSE;
|
||||
spa->spa_suspended = ZIO_SUSPEND_NONE;
|
||||
cv_broadcast(&spa->spa_suspend_cv);
|
||||
pio = spa->spa_suspend_zio_root;
|
||||
spa->spa_suspend_zio_root = NULL;
|
||||
@@ -4390,7 +4390,7 @@ zio_done(zio_t *zio)
|
||||
* We'd fail again if we reexecuted now, so suspend
|
||||
* until conditions improve (e.g. device comes online).
|
||||
*/
|
||||
zio_suspend(zio->io_spa, zio);
|
||||
zio_suspend(zio->io_spa, zio, ZIO_SUSPEND_IOERR);
|
||||
} else {
|
||||
/*
|
||||
* Reexecution is potentially a huge amount of work.
|
||||
|
||||
Reference in New Issue
Block a user