Record skipped MMP writes in multihost_history

Once per pass through the MMP thread's loop, the vdev tree is walked to
find a suitable leaf to write the next MMP block to.  If no such leaf is
found, the thread sleeps for a while and resumes at the top of the loop.

Add an entry to multihost_history when no leaf can be found, and record
the reason in the error column.  The error code for such entries is a
bitfield, displayed in hex:

0x1  At least one vdev (interior or leaf) was not writeable.
0x2  At least one writeable leaf vdev was found, but it had a pending
MMP write.

timestamp = the time in seconds since the epoch when no leaf could be
found originally.

duration = the time (in ns) during which no MMP block was written for
this reason.  This does not include the preceeding inter-write period
nor the following inter-write period.

vdev_guid = the number of sequential cycles of the MMP thread looop when
this occurred.

Sample output, truncated to fit:

For records of skipped MMP writes the right-most column, vdev_path, is
reported as "-".

id   txg  timestamp   error  duration    mmp_delay  vdev_guid     ...
936  11   1520036441  0      146264      891422313  1740883117838 ...
937  11   1520036441  0      163956      888356657  7320395061548 ...
938  11   1520036442  0      130690      885314969  7320395061548 ...
939  11   1520036442  0      2001068577  882296582  1740883117838 ...
940  11   1520036443  0      161806      882296582  7320395061548 ...
941  11   1520036443  0x2    0           998020546  1             ...
942  11   1520036444  0      136585      998020546  7320395061548 ...
943  11   1520036444  0x2    0           998020257  1             ...
944  11   1520036445  5      2002662964  994160219  1740883117838 ...
945  11   1520036445  0x2    998073118   994160219  3             ...
946  11   1520036447  0      247136      994160219  7320395061548 ...

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Olaf Faaland <faaland1@llnl.gov>
Closes #7212
This commit is contained in:
Olaf Faaland
2018-02-26 20:32:49 -05:00
committed by Brian Behlendorf
parent 14c240cede
commit d2160d0538
11 changed files with 263 additions and 50 deletions
+1 -1
View File
@@ -574,7 +574,7 @@ tags = ['functional', 'mmap']
[tests/functional/mmp]
tests = ['mmp_on_thread', 'mmp_on_uberblocks', 'mmp_on_off', 'mmp_interval',
'mmp_active_import', 'mmp_inactive_import', 'mmp_exported_import',
'mmp_write_uberblocks', 'mmp_reset_interval']
'mmp_write_uberblocks', 'mmp_reset_interval', 'multihost_history']
tags = ['functional', 'mmp']
[tests/functional/mount]
@@ -1,5 +1,6 @@
pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/mmp
dist_pkgdata_SCRIPTS = \
multihost_history.ksh \
mmp_on_thread.ksh \
mmp_on_uberblocks.ksh \
mmp_on_off.ksh \
@@ -93,11 +93,10 @@ function mmp_clear_hostid
rm -f $HOSTID_FILE
}
function mmp_pool_create # pool dir
function mmp_pool_create_simple # pool dir
{
typeset pool=${1:-$MMP_POOL}
typeset dir=${2:-$MMP_DIR}
typeset opts="-VVVVV -T120 -M -k0 -f $dir -E -p $pool"
log_must mkdir -p $dir
log_must rm -f $dir/*
@@ -108,6 +107,16 @@ function mmp_pool_create # pool dir
log_must zpool create -f -o cachefile=$MMP_CACHE $pool \
mirror $dir/vdev1 $dir/vdev2
log_must zpool set multihost=on $pool
}
function mmp_pool_create # pool dir
{
typeset pool=${1:-$MMP_POOL}
typeset dir=${2:-$MMP_DIR}
typeset opts="-VVVVV -T120 -M -k0 -f $dir -E -p $pool"
mmp_pool_create_simple $pool $dir
log_must mv $MMP_CACHE ${MMP_CACHE}.stale
log_must zpool export $pool
log_must mmp_clear_hostid
@@ -192,12 +201,22 @@ function clear_mmp_history
log_must set_tunable64 zfs_multihost_history $MMP_HISTORY
}
function count_uberblocks # pool duration
function count_skipped_mmp_writes # pool duration
{
typeset pool=$1
typeset -i duration=$2
typeset hist_path="/proc/spl/kstat/zfs/$pool/multihost"
sleep $duration
echo $(cat "$hist_path" | sed '1,2d' | wc -l)
awk 'BEGIN {count=0}; $NF == "-" {count++}; END {print count};' "$hist_path"
}
function count_mmp_writes # pool duration
{
typeset pool=$1
typeset -i duration=$2
typeset hist_path="/proc/spl/kstat/zfs/$pool/multihost"
sleep $duration
awk 'BEGIN {count=0}; $NF != "-" {count++}; END {print count};' "$hist_path"
}
@@ -58,7 +58,7 @@ log_must mmp_set_hostid $HOSTID1
default_setup_noexit "$DISKS"
log_must zpool set multihost=on $TESTPOOL
clear_mmp_history
UBER_CHANGES=$(count_uberblocks $TESTPOOL 10)
UBER_CHANGES=$(count_mmp_writes $TESTPOOL 10)
log_note "Uberblock changed $UBER_CHANGES times"
@@ -55,7 +55,7 @@ log_must zpool set multihost=on $TESTPOOL
clear_mmp_history
log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_DEFAULT
uber_count=$(count_uberblocks $TESTPOOL 1)
uber_count=$(count_mmp_writes $TESTPOOL 1)
if [ $uber_count -eq 0 ]; then
log_fail "mmp writes did not start when zfs_multihost_interval reduced"
@@ -49,7 +49,7 @@ default_mirror_setup_noexit $DISKS
log_must zpool set multihost=on $TESTPOOL
log_must zinject -d ${DISK[0]} -e io -T write -f 50 $TESTPOOL -L uber
clear_mmp_history
uber_count=$(count_uberblocks $TESTPOOL 3)
uber_count=$(count_mmp_writes $TESTPOOL 3)
if [ $uber_count -eq 0 ]; then
log_fail "mmp writes did not occur when uberblock IO errors injected"
+67
View File
@@ -0,0 +1,67 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
# CDDL HEADER END
#
#
# Copyright (c) 2017 by Lawrence Livermore National Security, LLC.
#
# DESCRIPTION:
# zfs_multihost_history should report both writes issued and gaps
#
# STRATEGY:
# 1. Create a 2-vdev pool with mmp enabled
# 2. Delay writes by 2*MMP_INTERVAL_DEFAULT
# 3. Check multihost_history for both issued writes, and for gaps where
# no write could be issued because all vdevs are busy
#
# During the first MMP_INTERVAL period 2 MMP writes will be issued - one to
# each vdev. At the third scheduled attempt to write, at time t0+MMP_INTERVAL,
# both vdevs will still have outstanding writes, so a skipped write entry will
# be recorded in the multihost_history.
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/mmp/mmp.cfg
. $STF_SUITE/tests/functional/mmp/mmp.kshlib
verify_runnable "both"
function cleanup
{
log_must zinject -c all
mmp_pool_destroy $MMP_POOL $MMP_DIR
log_must mmp_clear_hostid
}
log_assert "zfs_multihost_history records writes and skipped writes"
log_onexit cleanup
mmp_pool_create_simple $MMP_POOL $MMP_DIR
log_must zinject -d $MMP_DIR/vdev1 -D$((2*MMP_INTERVAL_DEFAULT)):10 $MMP_POOL
log_must zinject -d $MMP_DIR/vdev2 -D$((2*MMP_INTERVAL_DEFAULT)):10 $MMP_POOL
mmp_writes=$(count_mmp_writes $MMP_POOL $((MMP_INTERVAL_DEFAULT/1000)))
mmp_skips=$(count_skipped_mmp_writes $MMP_POOL $((MMP_INTERVAL_DEFAULT/1000)))
if [ $mmp_writes -lt 1 ]; then
log_fail "mmp writes entries missing when delays injected"
fi
if [ $mmp_skips -lt 1 ]; then
log_fail "mmp skipped write entries missing when delays injected"
fi
log_pass "zfs_multihost_history records writes and skipped writes"