From 55de40fe47ceadabedb47dcfd9402535b15e5c43 Mon Sep 17 00:00:00 2001 From: Olaf Faaland Date: Wed, 9 Sep 2020 10:12:54 -0700 Subject: [PATCH] Initialize mmp_last_write when the mmp thread starts A great deal of time may go by between when mmp_init() is called and the MMP thread starts, particularly if there are bad devices, because there is I/O checking configs etc. If this time is too long, (gethrtime() - mmp_last_write) > mmp_fail_ns at the time the MMP thread starts. If MMP is configured to suspend the pool, the pool will be suspended immediately. This can be seen in issue #10838 The value of mmp_last_write doesn't matter before the mmp thread starts. To give the MMP thread time to issue and land MMP writes, initialize mmp_last_write when the MMP thread starts. Reviewed-by: Giuseppe Di Natale Reviewed-by: Brian Behlendorf Signed-off-by: Olaf Faaland Closes #10873 --- module/zfs/mmp.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/module/zfs/mmp.c b/module/zfs/mmp.c index 1b97de468..99852521b 100644 --- a/module/zfs/mmp.c +++ b/module/zfs/mmp.c @@ -198,14 +198,6 @@ mmp_init(spa_t *spa) cv_init(&mmp->mmp_thread_cv, NULL, CV_DEFAULT, NULL); mutex_init(&mmp->mmp_io_lock, NULL, MUTEX_DEFAULT, NULL); mmp->mmp_kstat_id = 1; - - /* - * mmp_write_done() calculates mmp_delay based on prior mmp_delay and - * the elapsed time since the last write. For the first mmp write, - * there is no "last write", so we start with fake non-zero values. - */ - mmp->mmp_last_write = gethrtime(); - mmp->mmp_delay = MSEC2NSEC(MMP_INTERVAL_OK(zfs_multihost_interval)); } void @@ -557,6 +549,18 @@ mmp_thread(void *arg) mmp_thread_enter(mmp, &cpr); + /* + * There have been no MMP writes yet. Setting mmp_last_write here gives + * us one mmp_fail_ns period, which is consistent with the activity + * check duration, to try to land an MMP write before MMP suspends the + * pool (if so configured). + */ + + mutex_enter(&mmp->mmp_io_lock); + mmp->mmp_last_write = gethrtime(); + mmp->mmp_delay = MSEC2NSEC(MMP_INTERVAL_OK(zfs_multihost_interval)); + mutex_exit(&mmp->mmp_io_lock); + while (!mmp->mmp_thread_exiting) { hrtime_t next_time = gethrtime() + MSEC2NSEC(MMP_DEFAULT_INTERVAL);