Add callback for zfs_multihost_interval

Add a callback to wake all running mmp threads when
zfs_multihost_interval is changed.

This is necessary when the interval is changed from a very large value
to a significantly lower one, while pools are imported that have the
multihost property enabled.

Without this commit, the mmp thread does not wake up and detect the new
interval until after it has waited the old multihost interval time.  A
user monitoring mmp writes via the provided kstat would be led to
believe that the changed setting did not work.

Added a test in the ZTS under mmp to verify the new functionality is
working.

Added a test to ztest which starts and stops mmp threads, and calls into
the code to signal sleeping mmp threads, to test for deadlocks or
similar locking issues.

Reviewed-by: Giuseppe Di Natale <dinatale2@llnl.gov>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Olaf Faaland <faaland1@llnl.gov>
Closes #6387
This commit is contained in:
Olaf Faaland 2017-07-20 17:54:26 -07:00 committed by Brian Behlendorf
parent 60f5103445
commit 0582e40322
7 changed files with 164 additions and 2 deletions

View File

@ -326,6 +326,7 @@ ztest_func_t ztest_spa_create_destroy;
ztest_func_t ztest_fault_inject; ztest_func_t ztest_fault_inject;
ztest_func_t ztest_ddt_repair; ztest_func_t ztest_ddt_repair;
ztest_func_t ztest_dmu_snapshot_hold; ztest_func_t ztest_dmu_snapshot_hold;
ztest_func_t ztest_mmp_enable_disable;
ztest_func_t ztest_spa_rename; ztest_func_t ztest_spa_rename;
ztest_func_t ztest_scrub; ztest_func_t ztest_scrub;
ztest_func_t ztest_dsl_dataset_promote_busy; ztest_func_t ztest_dsl_dataset_promote_busy;
@ -375,6 +376,7 @@ ztest_info_t ztest_info[] = {
ZTI_INIT(ztest_fault_inject, 1, &zopt_sometimes), ZTI_INIT(ztest_fault_inject, 1, &zopt_sometimes),
ZTI_INIT(ztest_ddt_repair, 1, &zopt_sometimes), ZTI_INIT(ztest_ddt_repair, 1, &zopt_sometimes),
ZTI_INIT(ztest_dmu_snapshot_hold, 1, &zopt_sometimes), ZTI_INIT(ztest_dmu_snapshot_hold, 1, &zopt_sometimes),
ZTI_INIT(ztest_mmp_enable_disable, 1, &zopt_sometimes),
ZTI_INIT(ztest_reguid, 1, &zopt_rarely), ZTI_INIT(ztest_reguid, 1, &zopt_rarely),
ZTI_INIT(ztest_spa_rename, 1, &zopt_rarely), ZTI_INIT(ztest_spa_rename, 1, &zopt_rarely),
ZTI_INIT(ztest_scrub, 1, &zopt_rarely), ZTI_INIT(ztest_scrub, 1, &zopt_rarely),
@ -2660,6 +2662,47 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)
(void) rw_unlock(&ztest_name_lock); (void) rw_unlock(&ztest_name_lock);
} }
/*
* Start and then stop the MMP threads to ensure the startup and shutdown code
* works properly. Actual protection and property-related code tested via ZTS.
*/
/* ARGSUSED */
void
ztest_mmp_enable_disable(ztest_ds_t *zd, uint64_t id)
{
ztest_shared_opts_t *zo = &ztest_opts;
spa_t *spa = ztest_spa;
if (zo->zo_mmp_test)
return;
spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
mutex_enter(&spa->spa_props_lock);
if (!spa_multihost(spa)) {
spa->spa_multihost = B_TRUE;
mmp_thread_start(spa);
}
mutex_exit(&spa->spa_props_lock);
spa_config_exit(spa, SCL_CONFIG, FTAG);
txg_wait_synced(spa_get_dsl(spa), 0);
mmp_signal_all_threads();
txg_wait_synced(spa_get_dsl(spa), 0);
spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
mutex_enter(&spa->spa_props_lock);
if (spa_multihost(spa)) {
mmp_thread_stop(spa);
spa->spa_multihost = B_FALSE;
}
mutex_exit(&spa->spa_props_lock);
spa_config_exit(spa, SCL_CONFIG, FTAG);
}
/* ARGSUSED */ /* ARGSUSED */
void void
ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id) ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id)

View File

@ -50,6 +50,7 @@ extern void mmp_fini(struct spa *spa);
extern void mmp_thread_start(struct spa *spa); extern void mmp_thread_start(struct spa *spa);
extern void mmp_thread_stop(struct spa *spa); extern void mmp_thread_stop(struct spa *spa);
extern void mmp_update_uberblock(struct spa *spa, struct uberblock *ub); extern void mmp_update_uberblock(struct spa *spa, struct uberblock *ub);
extern void mmp_signal_all_threads(void);
/* Global tuning */ /* Global tuning */
extern ulong_t zfs_multihost_interval; extern ulong_t zfs_multihost_interval;

View File

@ -459,13 +459,61 @@ mmp_thread(spa_t *spa)
mmp_thread_exit(mmp, &mmp->mmp_thread, &cpr); mmp_thread_exit(mmp, &mmp->mmp_thread, &cpr);
} }
/*
* Signal the MMP thread to wake it, when it is sleeping on
* its cv. Used when some module parameter has changed and
* we want the thread to know about it.
* Only signal if the pool is active and mmp thread is
* running, otherwise there is no thread to wake.
*/
static void
mmp_signal_thread(spa_t *spa)
{
mmp_thread_t *mmp = &spa->spa_mmp;
mutex_enter(&mmp->mmp_thread_lock);
if (mmp->mmp_thread)
cv_broadcast(&mmp->mmp_thread_cv);
mutex_exit(&mmp->mmp_thread_lock);
}
void
mmp_signal_all_threads(void)
{
spa_t *spa = NULL;
mutex_enter(&spa_namespace_lock);
while ((spa = spa_next(spa))) {
if (spa->spa_state == POOL_STATE_ACTIVE)
mmp_signal_thread(spa);
}
mutex_exit(&spa_namespace_lock);
}
#if defined(_KERNEL) && defined(HAVE_SPL) #if defined(_KERNEL) && defined(HAVE_SPL)
#include <linux/mod_compat.h>
static int
param_set_multihost_interval(const char *val, zfs_kernel_param_t *kp)
{
int ret;
ret = param_set_ulong(val, kp);
if (ret < 0)
return (ret);
mmp_signal_all_threads();
return (ret);
}
/* BEGIN CSTYLED */ /* BEGIN CSTYLED */
module_param(zfs_multihost_fail_intervals, uint, 0644); module_param(zfs_multihost_fail_intervals, uint, 0644);
MODULE_PARM_DESC(zfs_multihost_fail_intervals, MODULE_PARM_DESC(zfs_multihost_fail_intervals,
"Max allowed period without a successful mmp write"); "Max allowed period without a successful mmp write");
module_param(zfs_multihost_interval, ulong, 0644); module_param_call(zfs_multihost_interval, param_set_multihost_interval,
param_get_ulong, &zfs_multihost_interval, 0644);
MODULE_PARM_DESC(zfs_multihost_interval, MODULE_PARM_DESC(zfs_multihost_interval,
"Milliseconds between mmp writes to each leaf"); "Milliseconds between mmp writes to each leaf");

View File

@ -413,7 +413,7 @@ tests = ['mmap_write_001_pos', 'mmap_read_001_pos']
[tests/functional/mmp] [tests/functional/mmp]
tests = ['mmp_on_thread', 'mmp_on_uberblocks', 'mmp_on_off', 'mmp_interval', tests = ['mmp_on_thread', 'mmp_on_uberblocks', 'mmp_on_off', 'mmp_interval',
'mmp_active_import', 'mmp_inactive_import', 'mmp_exported_import', 'mmp_active_import', 'mmp_inactive_import', 'mmp_exported_import',
'mmp_write_uberblocks'] 'mmp_write_uberblocks', 'mmp_reset_interval']
[tests/functional/mount] [tests/functional/mount]
tests = ['umount_001', 'umountall_001'] tests = ['umount_001', 'umountall_001']

View File

@ -8,6 +8,7 @@ dist_pkgdata_SCRIPTS = \
mmp_inactive_import.ksh \ mmp_inactive_import.ksh \
mmp_exported_import.ksh \ mmp_exported_import.ksh \
mmp_write_uberblocks.ksh \ mmp_write_uberblocks.ksh \
mmp_reset_interval.ksh \
setup.ksh \ setup.ksh \
cleanup.ksh \ cleanup.ksh \
mmp.kshlib \ mmp.kshlib \

View File

@ -31,7 +31,9 @@ export TXG_TIMEOUT_DEFAULT=5
export MMP_POOL=mmppool export MMP_POOL=mmppool
export MMP_DIR=$TEST_BASE_DIR/mmp export MMP_DIR=$TEST_BASE_DIR/mmp
export MMP_HISTORY=100 export MMP_HISTORY=100
export MMP_HISTORY_OFF=0
export MMP_INTERVAL_HOUR=$((60*60*1000))
export MMP_INTERVAL_DEFAULT=1000 export MMP_INTERVAL_DEFAULT=1000
export MMP_INTERVAL_MIN=100 export MMP_INTERVAL_MIN=100

View File

@ -0,0 +1,67 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
# CDDL HEADER END
#
#
# Copyright (c) 2017 by Lawrence Livermore National Security, LLC.
#
# DESCRIPTION:
# Ensure that the MMP thread is notified when zfs_multihost_interval is
# reduced.
#
# STRATEGY:
# 1. Set zfs_multihost_interval to much longer than the test duration
# 2. Create a zpool and enable multihost
# 3. Verify no MMP writes occurred
# 4. Set zfs_multihost_interval to 1 second
# 5. Sleep briefly
# 6. Verify MMP writes began
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/mmp/mmp.cfg
. $STF_SUITE/tests/functional/mmp/mmp.kshlib
verify_runnable "both"
function cleanup
{
default_cleanup_noexit
log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_DEFAULT
log_must mmp_clear_hostid
}
log_assert "mmp threads notified when zfs_multihost_interval reduced"
log_onexit cleanup
log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_HOUR
log_must mmp_set_hostid $HOSTID1
default_setup_noexit $DISK
log_must zpool set multihost=on $TESTPOOL
prev_count=$(wc -l /proc/spl/kstat/zfs/$TESTPOOL/multihost | cut -f1 -d' ')
log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_DEFAULT
# slight delay to allow time for the mmp write to complete
sleep 1
curr_count=$(wc -l /proc/spl/kstat/zfs/$TESTPOOL/multihost | cut -f1 -d' ')
if [ $curr_count -eq $prev_count ]; then
log_fail "mmp writes did not start when zfs_multihost_interval reduced"
fi
log_pass "mmp threads notified when zfs_multihost_interval reduced"