mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-27 10:24:22 +03:00
Report pool suspended due to MMP
When the pool is suspended, record whether it was due to an I/O error or due to MMP writes failing to succeed within the required time. Change spa_suspended from uint8_t to zio_suspend_reason_t to store the reason. When userspace queries pool status via spa_tryimport(), report the reason the pool was suspended in a new key, ZPOOL_CONFIG_SUSPENDED_REASON. In libzfs, when interpreting the returned config nvlist, report suspension due to MMP with a new pool status enum value, ZPOOL_STATUS_IO_FAILURE_MMP. In status_callback(), which generates and emits the message when 'zpool status' is executed, add a case to print an appropriate message for the new pool status enum value. Reviewed-by: George Melikov <mail@gmelikov.ru> Reviewed-by: Giuseppe Di Natale <dinatale2@llnl.gov> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Tony Hutter <hutter2@llnl.gov> Signed-off-by: Olaf Faaland <faaland1@llnl.gov> Closes #7296
This commit is contained in:
parent
3874220932
commit
cec3a0a1bb
@ -6467,6 +6467,15 @@ status_callback(zpool_handle_t *zhp, void *data)
|
|||||||
"to be recovered.\n"));
|
"to be recovered.\n"));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case ZPOOL_STATUS_IO_FAILURE_MMP:
|
||||||
|
(void) printf(gettext("status: The pool is suspended because "
|
||||||
|
"multihost writes failed or were delayed;\n\tanother "
|
||||||
|
"system could import the pool undetected.\n"));
|
||||||
|
(void) printf(gettext("action: Make sure the pool's devices "
|
||||||
|
"are connected, then reboot your system and\n\timport the "
|
||||||
|
"pool.\n"));
|
||||||
|
break;
|
||||||
|
|
||||||
case ZPOOL_STATUS_IO_FAILURE_WAIT:
|
case ZPOOL_STATUS_IO_FAILURE_WAIT:
|
||||||
case ZPOOL_STATUS_IO_FAILURE_CONTINUE:
|
case ZPOOL_STATUS_IO_FAILURE_CONTINUE:
|
||||||
(void) printf(gettext("status: One or more devices are "
|
(void) printf(gettext("status: One or more devices are "
|
||||||
|
@ -333,6 +333,7 @@ typedef enum {
|
|||||||
ZPOOL_STATUS_HOSTID_REQUIRED, /* multihost=on and hostid=0 */
|
ZPOOL_STATUS_HOSTID_REQUIRED, /* multihost=on and hostid=0 */
|
||||||
ZPOOL_STATUS_IO_FAILURE_WAIT, /* failed I/O, failmode 'wait' */
|
ZPOOL_STATUS_IO_FAILURE_WAIT, /* failed I/O, failmode 'wait' */
|
||||||
ZPOOL_STATUS_IO_FAILURE_CONTINUE, /* failed I/O, failmode 'continue' */
|
ZPOOL_STATUS_IO_FAILURE_CONTINUE, /* failed I/O, failmode 'continue' */
|
||||||
|
ZPOOL_STATUS_IO_FAILURE_MMP, /* failed MMP, failmode not 'panic' */
|
||||||
ZPOOL_STATUS_BAD_LOG, /* cannot read log chain(s) */
|
ZPOOL_STATUS_BAD_LOG, /* cannot read log chain(s) */
|
||||||
ZPOOL_STATUS_ERRATA, /* informational errata available */
|
ZPOOL_STATUS_ERRATA, /* informational errata available */
|
||||||
|
|
||||||
|
@ -681,6 +681,7 @@ typedef struct zpool_rewind_policy {
|
|||||||
#define ZPOOL_CONFIG_RESILVER_TXG "resilver_txg"
|
#define ZPOOL_CONFIG_RESILVER_TXG "resilver_txg"
|
||||||
#define ZPOOL_CONFIG_COMMENT "comment"
|
#define ZPOOL_CONFIG_COMMENT "comment"
|
||||||
#define ZPOOL_CONFIG_SUSPENDED "suspended" /* not stored on disk */
|
#define ZPOOL_CONFIG_SUSPENDED "suspended" /* not stored on disk */
|
||||||
|
#define ZPOOL_CONFIG_SUSPENDED_REASON "suspended_reason" /* not stored */
|
||||||
#define ZPOOL_CONFIG_TIMESTAMP "timestamp" /* not stored on disk */
|
#define ZPOOL_CONFIG_TIMESTAMP "timestamp" /* not stored on disk */
|
||||||
#define ZPOOL_CONFIG_BOOTFS "bootfs" /* not stored on disk */
|
#define ZPOOL_CONFIG_BOOTFS "bootfs" /* not stored on disk */
|
||||||
#define ZPOOL_CONFIG_MISSING_DEVICES "missing_vdevs" /* not stored on disk */
|
#define ZPOOL_CONFIG_MISSING_DEVICES "missing_vdevs" /* not stored on disk */
|
||||||
|
@ -236,7 +236,7 @@ struct spa {
|
|||||||
zio_t *spa_suspend_zio_root; /* root of all suspended I/O */
|
zio_t *spa_suspend_zio_root; /* root of all suspended I/O */
|
||||||
kmutex_t spa_suspend_lock; /* protects suspend_zio_root */
|
kmutex_t spa_suspend_lock; /* protects suspend_zio_root */
|
||||||
kcondvar_t spa_suspend_cv; /* notification of resume */
|
kcondvar_t spa_suspend_cv; /* notification of resume */
|
||||||
uint8_t spa_suspended; /* pool is suspended */
|
zio_suspend_reason_t spa_suspended; /* pool is suspended */
|
||||||
uint8_t spa_claiming; /* pool is doing zil_claim() */
|
uint8_t spa_claiming; /* pool is doing zil_claim() */
|
||||||
boolean_t spa_debug; /* debug enabled? */
|
boolean_t spa_debug; /* debug enabled? */
|
||||||
boolean_t spa_is_root; /* pool is root */
|
boolean_t spa_is_root; /* pool is root */
|
||||||
|
@ -167,6 +167,12 @@ enum zio_encrypt {
|
|||||||
#define ZIO_FAILURE_MODE_CONTINUE 1
|
#define ZIO_FAILURE_MODE_CONTINUE 1
|
||||||
#define ZIO_FAILURE_MODE_PANIC 2
|
#define ZIO_FAILURE_MODE_PANIC 2
|
||||||
|
|
||||||
|
typedef enum zio_suspend_reason {
|
||||||
|
ZIO_SUSPEND_NONE = 0,
|
||||||
|
ZIO_SUSPEND_IOERR,
|
||||||
|
ZIO_SUSPEND_MMP,
|
||||||
|
} zio_suspend_reason_t;
|
||||||
|
|
||||||
enum zio_flag {
|
enum zio_flag {
|
||||||
/*
|
/*
|
||||||
* Flags inherited by gang, ddt, and vdev children,
|
* Flags inherited by gang, ddt, and vdev children,
|
||||||
@ -610,7 +616,7 @@ extern enum zio_checksum zio_checksum_dedup_select(spa_t *spa,
|
|||||||
extern enum zio_compress zio_compress_select(spa_t *spa,
|
extern enum zio_compress zio_compress_select(spa_t *spa,
|
||||||
enum zio_compress child, enum zio_compress parent);
|
enum zio_compress child, enum zio_compress parent);
|
||||||
|
|
||||||
extern void zio_suspend(spa_t *spa, zio_t *zio);
|
extern void zio_suspend(spa_t *spa, zio_t *zio, zio_suspend_reason_t);
|
||||||
extern int zio_resume(spa_t *spa);
|
extern int zio_resume(spa_t *spa);
|
||||||
extern void zio_resume_wait(spa_t *spa);
|
extern void zio_resume_wait(spa_t *spa);
|
||||||
|
|
||||||
|
@ -275,10 +275,16 @@ check_status(nvlist_t *config, boolean_t isimport, zpool_errata_t *erratap)
|
|||||||
return (ZPOOL_STATUS_BAD_GUID_SUM);
|
return (ZPOOL_STATUS_BAD_GUID_SUM);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check whether the pool has suspended due to failed I/O.
|
* Check whether the pool has suspended.
|
||||||
*/
|
*/
|
||||||
if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SUSPENDED,
|
if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SUSPENDED,
|
||||||
&suspended) == 0) {
|
&suspended) == 0) {
|
||||||
|
uint64_t reason;
|
||||||
|
|
||||||
|
if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SUSPENDED_REASON,
|
||||||
|
&reason) == 0 && reason == ZIO_SUSPEND_MMP)
|
||||||
|
return (ZPOOL_STATUS_IO_FAILURE_MMP);
|
||||||
|
|
||||||
if (suspended == ZIO_FAILURE_MODE_CONTINUE)
|
if (suspended == ZIO_FAILURE_MODE_CONTINUE)
|
||||||
return (ZPOOL_STATUS_IO_FAILURE_CONTINUE);
|
return (ZPOOL_STATUS_IO_FAILURE_CONTINUE);
|
||||||
return (ZPOOL_STATUS_IO_FAILURE_WAIT);
|
return (ZPOOL_STATUS_IO_FAILURE_WAIT);
|
||||||
|
@ -520,7 +520,7 @@ mmp_thread(void *arg)
|
|||||||
"succeeded in over %llus; suspending pool",
|
"succeeded in over %llus; suspending pool",
|
||||||
spa_name(spa),
|
spa_name(spa),
|
||||||
NSEC2SEC(start - mmp->mmp_last_write));
|
NSEC2SEC(start - mmp->mmp_last_write));
|
||||||
zio_suspend(spa, NULL);
|
zio_suspend(spa, NULL, ZIO_SUSPEND_MMP);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (multihost && !suspended)
|
if (multihost && !suspended)
|
||||||
|
@ -3766,10 +3766,14 @@ spa_get_stats(const char *name, nvlist_t **config,
|
|||||||
ZPOOL_CONFIG_ERRCOUNT,
|
ZPOOL_CONFIG_ERRCOUNT,
|
||||||
spa_get_errlog_size(spa)) == 0);
|
spa_get_errlog_size(spa)) == 0);
|
||||||
|
|
||||||
if (spa_suspended(spa))
|
if (spa_suspended(spa)) {
|
||||||
VERIFY(nvlist_add_uint64(*config,
|
VERIFY(nvlist_add_uint64(*config,
|
||||||
ZPOOL_CONFIG_SUSPENDED,
|
ZPOOL_CONFIG_SUSPENDED,
|
||||||
spa->spa_failmode) == 0);
|
spa->spa_failmode) == 0);
|
||||||
|
VERIFY(nvlist_add_uint64(*config,
|
||||||
|
ZPOOL_CONFIG_SUSPENDED_REASON,
|
||||||
|
spa->spa_suspended) == 0);
|
||||||
|
}
|
||||||
|
|
||||||
spa_add_spares(spa, *config);
|
spa_add_spares(spa, *config);
|
||||||
spa_add_l2cache(spa, *config);
|
spa_add_l2cache(spa, *config);
|
||||||
@ -6984,7 +6988,7 @@ spa_sync(spa_t *spa, uint64_t txg)
|
|||||||
|
|
||||||
if (error == 0)
|
if (error == 0)
|
||||||
break;
|
break;
|
||||||
zio_suspend(spa, NULL);
|
zio_suspend(spa, NULL, ZIO_SUSPEND_IOERR);
|
||||||
zio_resume_wait(spa);
|
zio_resume_wait(spa);
|
||||||
}
|
}
|
||||||
dmu_tx_commit(tx);
|
dmu_tx_commit(tx);
|
||||||
|
@ -1709,7 +1709,7 @@ spa_get_failmode(spa_t *spa)
|
|||||||
boolean_t
|
boolean_t
|
||||||
spa_suspended(spa_t *spa)
|
spa_suspended(spa_t *spa)
|
||||||
{
|
{
|
||||||
return (spa->spa_suspended);
|
return (spa->spa_suspended != ZIO_SUSPEND_NONE);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t
|
uint64_t
|
||||||
|
@ -2092,7 +2092,7 @@ zio_reexecute(zio_t *pio)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
zio_suspend(spa_t *spa, zio_t *zio)
|
zio_suspend(spa_t *spa, zio_t *zio, zio_suspend_reason_t reason)
|
||||||
{
|
{
|
||||||
if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_PANIC)
|
if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_PANIC)
|
||||||
fm_panic("Pool '%s' has encountered an uncorrectable I/O "
|
fm_panic("Pool '%s' has encountered an uncorrectable I/O "
|
||||||
@ -2112,7 +2112,7 @@ zio_suspend(spa_t *spa, zio_t *zio)
|
|||||||
ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE |
|
ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE |
|
||||||
ZIO_FLAG_GODFATHER);
|
ZIO_FLAG_GODFATHER);
|
||||||
|
|
||||||
spa->spa_suspended = B_TRUE;
|
spa->spa_suspended = reason;
|
||||||
|
|
||||||
if (zio != NULL) {
|
if (zio != NULL) {
|
||||||
ASSERT(!(zio->io_flags & ZIO_FLAG_GODFATHER));
|
ASSERT(!(zio->io_flags & ZIO_FLAG_GODFATHER));
|
||||||
@ -2135,7 +2135,7 @@ zio_resume(spa_t *spa)
|
|||||||
* Reexecute all previously suspended i/o.
|
* Reexecute all previously suspended i/o.
|
||||||
*/
|
*/
|
||||||
mutex_enter(&spa->spa_suspend_lock);
|
mutex_enter(&spa->spa_suspend_lock);
|
||||||
spa->spa_suspended = B_FALSE;
|
spa->spa_suspended = ZIO_SUSPEND_NONE;
|
||||||
cv_broadcast(&spa->spa_suspend_cv);
|
cv_broadcast(&spa->spa_suspend_cv);
|
||||||
pio = spa->spa_suspend_zio_root;
|
pio = spa->spa_suspend_zio_root;
|
||||||
spa->spa_suspend_zio_root = NULL;
|
spa->spa_suspend_zio_root = NULL;
|
||||||
@ -4390,7 +4390,7 @@ zio_done(zio_t *zio)
|
|||||||
* We'd fail again if we reexecuted now, so suspend
|
* We'd fail again if we reexecuted now, so suspend
|
||||||
* until conditions improve (e.g. device comes online).
|
* until conditions improve (e.g. device comes online).
|
||||||
*/
|
*/
|
||||||
zio_suspend(zio->io_spa, zio);
|
zio_suspend(zio->io_spa, zio, ZIO_SUSPEND_IOERR);
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
* Reexecution is potentially a huge amount of work.
|
* Reexecution is potentially a huge amount of work.
|
||||||
|
Loading…
Reference in New Issue
Block a user