Report pool suspended due to MMP

When the pool is suspended, record whether it was due to an I/O error or
due to MMP writes failing to succeed within the required time.

Change spa_suspended from uint8_t to zio_suspend_reason_t to store the
reason.

When userspace queries pool status via spa_tryimport(), report the
reason the pool was suspended in a new key,
ZPOOL_CONFIG_SUSPENDED_REASON.

In libzfs, when interpreting the returned config nvlist, report
suspension due to MMP with a new pool status enum value,
ZPOOL_STATUS_IO_FAILURE_MMP.

In status_callback(), which generates and emits the message when 'zpool
status' is executed, add a case to print an appropriate message for the
new pool status enum value.

Reviewed-by: George Melikov <mail@gmelikov.ru>
Reviewed-by: Giuseppe Di Natale <dinatale2@llnl.gov>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Olaf Faaland <faaland1@llnl.gov>
Closes #7296
This commit is contained in:
Olaf Faaland
2018-03-15 10:56:55 -07:00
committed by Brian Behlendorf
parent 3874220932
commit cec3a0a1bb
10 changed files with 38 additions and 11 deletions
+4 -4
View File
@@ -2092,7 +2092,7 @@ zio_reexecute(zio_t *pio)
}
void
zio_suspend(spa_t *spa, zio_t *zio)
zio_suspend(spa_t *spa, zio_t *zio, zio_suspend_reason_t reason)
{
if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_PANIC)
fm_panic("Pool '%s' has encountered an uncorrectable I/O "
@@ -2112,7 +2112,7 @@ zio_suspend(spa_t *spa, zio_t *zio)
ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE |
ZIO_FLAG_GODFATHER);
spa->spa_suspended = B_TRUE;
spa->spa_suspended = reason;
if (zio != NULL) {
ASSERT(!(zio->io_flags & ZIO_FLAG_GODFATHER));
@@ -2135,7 +2135,7 @@ zio_resume(spa_t *spa)
* Reexecute all previously suspended i/o.
*/
mutex_enter(&spa->spa_suspend_lock);
spa->spa_suspended = B_FALSE;
spa->spa_suspended = ZIO_SUSPEND_NONE;
cv_broadcast(&spa->spa_suspend_cv);
pio = spa->spa_suspend_zio_root;
spa->spa_suspend_zio_root = NULL;
@@ -4390,7 +4390,7 @@ zio_done(zio_t *zio)
* We'd fail again if we reexecuted now, so suspend
* until conditions improve (e.g. device comes online).
*/
zio_suspend(zio->io_spa, zio);
zio_suspend(zio->io_spa, zio, ZIO_SUSPEND_IOERR);
} else {
/*
* Reexecution is potentially a huge amount of work.