From 3eb3a13628d3d58b0294e4e0c0e3b47483a4053e Mon Sep 17 00:00:00 2001 From: Olaf Faaland Date: Thu, 15 Mar 2018 10:56:55 -0700 Subject: [PATCH] Report pool suspended due to MMP When the pool is suspended, record whether it was due to an I/O error or due to MMP writes failing to succeed within the required time. Change spa_suspended from uint8_t to zio_suspend_reason_t to store the reason. When userspace queries pool status via spa_tryimport(), report the reason the pool was suspended in a new key, ZPOOL_CONFIG_SUSPENDED_REASON. In libzfs, when interpreting the returned config nvlist, report suspension due to MMP with a new pool status enum value, ZPOOL_STATUS_IO_FAILURE_MMP. In status_callback(), which generates and emits the message when 'zpool status' is executed, add a case to print an appropriate message for the new pool status enum value. Reviewed-by: George Melikov Reviewed-by: Giuseppe Di Natale Reviewed-by: Brian Behlendorf Reviewed-by: Tony Hutter Signed-off-by: Olaf Faaland Closes #7296 --- cmd/zpool/zpool_main.c | 9 +++++++++ include/libzfs.h | 1 + include/sys/fs/zfs.h | 1 + include/sys/spa_impl.h | 2 +- include/sys/zio.h | 8 +++++++- lib/libzfs/libzfs_status.c | 8 +++++++- module/zfs/mmp.c | 2 +- module/zfs/spa.c | 8 ++++++-- module/zfs/spa_misc.c | 2 +- module/zfs/zio.c | 8 ++++---- 10 files changed, 38 insertions(+), 11 deletions(-) diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 1cfff3ade..b07569389 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -6395,6 +6395,15 @@ status_callback(zpool_handle_t *zhp, void *data) "to be recovered.\n")); break; + case ZPOOL_STATUS_IO_FAILURE_MMP: + (void) printf(gettext("status: The pool is suspended because " + "multihost writes failed or were delayed;\n\tanother " + "system could import the pool undetected.\n")); + (void) printf(gettext("action: Make sure the pool's devices " + "are connected, then reboot your system and\n\timport the " + "pool.\n")); + break; + case ZPOOL_STATUS_IO_FAILURE_WAIT: case ZPOOL_STATUS_IO_FAILURE_CONTINUE: (void) printf(gettext("status: One or more devices are " diff --git a/include/libzfs.h b/include/libzfs.h index 147589bbf..945bd5b86 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -331,6 +331,7 @@ typedef enum { ZPOOL_STATUS_HOSTID_REQUIRED, /* multihost=on and hostid=0 */ ZPOOL_STATUS_IO_FAILURE_WAIT, /* failed I/O, failmode 'wait' */ ZPOOL_STATUS_IO_FAILURE_CONTINUE, /* failed I/O, failmode 'continue' */ + ZPOOL_STATUS_IO_FAILURE_MMP, /* failed MMP, failmode not 'panic' */ ZPOOL_STATUS_BAD_LOG, /* cannot read log chain(s) */ ZPOOL_STATUS_ERRATA, /* informational errata available */ diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 13b25a695..b7912313b 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -638,6 +638,7 @@ typedef struct zpool_rewind_policy { #define ZPOOL_CONFIG_RESILVER_TXG "resilver_txg" #define ZPOOL_CONFIG_COMMENT "comment" #define ZPOOL_CONFIG_SUSPENDED "suspended" /* not stored on disk */ +#define ZPOOL_CONFIG_SUSPENDED_REASON "suspended_reason" /* not stored */ #define ZPOOL_CONFIG_TIMESTAMP "timestamp" /* not stored on disk */ #define ZPOOL_CONFIG_BOOTFS "bootfs" /* not stored on disk */ #define ZPOOL_CONFIG_MISSING_DEVICES "missing_vdevs" /* not stored on disk */ diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index 06de24421..73ad1c60c 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -233,7 +233,7 @@ struct spa { zio_t *spa_suspend_zio_root; /* root of all suspended I/O */ kmutex_t spa_suspend_lock; /* protects suspend_zio_root */ kcondvar_t spa_suspend_cv; /* notification of resume */ - uint8_t spa_suspended; /* pool is suspended */ + zio_suspend_reason_t spa_suspended; /* pool is suspended */ uint8_t spa_claiming; /* pool is doing zil_claim() */ boolean_t spa_debug; /* debug enabled? */ boolean_t spa_is_root; /* pool is root */ diff --git a/include/sys/zio.h b/include/sys/zio.h index 0d741f8e2..4b0eecc2e 100644 --- a/include/sys/zio.h +++ b/include/sys/zio.h @@ -144,6 +144,12 @@ enum zio_checksum { #define ZIO_FAILURE_MODE_CONTINUE 1 #define ZIO_FAILURE_MODE_PANIC 2 +typedef enum zio_suspend_reason { + ZIO_SUSPEND_NONE = 0, + ZIO_SUSPEND_IOERR, + ZIO_SUSPEND_MMP, +} zio_suspend_reason_t; + enum zio_flag { /* * Flags inherited by gang, ddt, and vdev children, @@ -577,7 +583,7 @@ extern enum zio_checksum zio_checksum_dedup_select(spa_t *spa, extern enum zio_compress zio_compress_select(spa_t *spa, enum zio_compress child, enum zio_compress parent); -extern void zio_suspend(spa_t *spa, zio_t *zio); +extern void zio_suspend(spa_t *spa, zio_t *zio, zio_suspend_reason_t); extern int zio_resume(spa_t *spa); extern void zio_resume_wait(spa_t *spa); diff --git a/lib/libzfs/libzfs_status.c b/lib/libzfs/libzfs_status.c index 05a9afce8..6cdcd3827 100644 --- a/lib/libzfs/libzfs_status.c +++ b/lib/libzfs/libzfs_status.c @@ -274,10 +274,16 @@ check_status(nvlist_t *config, boolean_t isimport, zpool_errata_t *erratap) return (ZPOOL_STATUS_BAD_GUID_SUM); /* - * Check whether the pool has suspended due to failed I/O. + * Check whether the pool has suspended. */ if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SUSPENDED, &suspended) == 0) { + uint64_t reason; + + if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SUSPENDED_REASON, + &reason) == 0 && reason == ZIO_SUSPEND_MMP) + return (ZPOOL_STATUS_IO_FAILURE_MMP); + if (suspended == ZIO_FAILURE_MODE_CONTINUE) return (ZPOOL_STATUS_IO_FAILURE_CONTINUE); return (ZPOOL_STATUS_IO_FAILURE_WAIT); diff --git a/module/zfs/mmp.c b/module/zfs/mmp.c index a08e0864e..fc8346dc3 100644 --- a/module/zfs/mmp.c +++ b/module/zfs/mmp.c @@ -519,7 +519,7 @@ mmp_thread(spa_t *spa) "succeeded in over %llus; suspending pool", spa_name(spa), NSEC2SEC(start - mmp->mmp_last_write)); - zio_suspend(spa, NULL); + zio_suspend(spa, NULL, ZIO_SUSPEND_MMP); } if (multihost && !suspended) diff --git a/module/zfs/spa.c b/module/zfs/spa.c index a7a2f6281..561f4d04b 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -3778,10 +3778,14 @@ spa_get_stats(const char *name, nvlist_t **config, ZPOOL_CONFIG_ERRCOUNT, spa_get_errlog_size(spa)) == 0); - if (spa_suspended(spa)) + if (spa_suspended(spa)) { VERIFY(nvlist_add_uint64(*config, ZPOOL_CONFIG_SUSPENDED, spa->spa_failmode) == 0); + VERIFY(nvlist_add_uint64(*config, + ZPOOL_CONFIG_SUSPENDED_REASON, + spa->spa_suspended) == 0); + } spa_add_spares(spa, *config); spa_add_l2cache(spa, *config); @@ -6969,7 +6973,7 @@ spa_sync(spa_t *spa, uint64_t txg) if (error == 0) break; - zio_suspend(spa, NULL); + zio_suspend(spa, NULL, ZIO_SUSPEND_IOERR); zio_resume_wait(spa); } dmu_tx_commit(tx); diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 3787e010f..e92c39482 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -1691,7 +1691,7 @@ spa_get_failmode(spa_t *spa) boolean_t spa_suspended(spa_t *spa) { - return (spa->spa_suspended); + return (spa->spa_suspended != ZIO_SUSPEND_NONE); } uint64_t diff --git a/module/zfs/zio.c b/module/zfs/zio.c index cd0a473e0..9a465e1be 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -1864,7 +1864,7 @@ zio_reexecute(zio_t *pio) } void -zio_suspend(spa_t *spa, zio_t *zio) +zio_suspend(spa_t *spa, zio_t *zio, zio_suspend_reason_t reason) { if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_PANIC) fm_panic("Pool '%s' has encountered an uncorrectable I/O " @@ -1883,7 +1883,7 @@ zio_suspend(spa_t *spa, zio_t *zio) ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); - spa->spa_suspended = B_TRUE; + spa->spa_suspended = reason; if (zio != NULL) { ASSERT(!(zio->io_flags & ZIO_FLAG_GODFATHER)); @@ -1906,7 +1906,7 @@ zio_resume(spa_t *spa) * Reexecute all previously suspended i/o. */ mutex_enter(&spa->spa_suspend_lock); - spa->spa_suspended = B_FALSE; + spa->spa_suspended = ZIO_SUSPEND_NONE; cv_broadcast(&spa->spa_suspend_cv); pio = spa->spa_suspend_zio_root; spa->spa_suspend_zio_root = NULL; @@ -3975,7 +3975,7 @@ zio_done(zio_t *zio) * We'd fail again if we reexecuted now, so suspend * until conditions improve (e.g. device comes online). */ - zio_suspend(zio->io_spa, zio); + zio_suspend(zio->io_spa, zio, ZIO_SUSPEND_IOERR); } else { /* * Reexecution is potentially a huge amount of work.