Add auto-online test for ZED/FMA as part of the ZTS

Automated auto-online test to go along with ZED FMA integration (PR 4673)
auto_online_001.pos works with real devices (sd- and mpath) and with non-real
block devices (loop) by adding a scsi_debug device to the pool

Note: In order for test group to run, ZED must not currently be running.
Kernel 3.16.37 or higher needed for scsi_debug to work properly
If timeout occurs on test using a scsi_debug device (error noticed on Ubuntu
system), a reboot might be needed in order for test to pass. (more
investigation into this)

Also suppressed output from is_real_device/is_loop_device/is_mpath_device -
was making the log file very cluttered with useless error messages
"ie /dev/mapper/sdc is not a block device" from previous patch

Reviewed-by: Don Brady <don.brady@intel.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: David Quigley <david.quigley@intel.com>
Signed-off-by: Sydney Vanda <sydney.m.vanda@intel.com>
Closes #5774
This commit is contained in:
Sydney Vanda 2016-09-23 13:51:08 -07:00 committed by Brian Behlendorf
parent 4859fe796c
commit ec0e24c232
13 changed files with 524 additions and 21 deletions

View File

@ -108,6 +108,9 @@ AC_DEFUN([ZFS_AC_CONFIG_USER_COMMANDS_LINUX], [
AC_PATH_TOOL(IOSTAT, iostat, "")
AC_PATH_TOOL(LOCKFS, lsof, "")
AC_PATH_TOOL(LSBLK, lsblk, "")
AC_PATH_TOOL(LSMOD, lsmod, "")
AC_PATH_TOOL(LSSCSI, lsscsi, "")
AC_PATH_TOOL(MODLOAD, modprobe, "")
AC_PATH_TOOL(MODUNLOAD, rmmod, "")
AC_PATH_TOOL(MPSTAT, mpstat, "")
AC_PATH_TOOL(NEWFS, mke2fs, "")

View File

@ -235,6 +235,7 @@ AC_CONFIG_FILES([
tests/zfs-tests/tests/functional/delegate/Makefile
tests/zfs-tests/tests/functional/devices/Makefile
tests/zfs-tests/tests/functional/exec/Makefile
tests/zfs-tests/tests/functional/fault/Makefile
tests/zfs-tests/tests/functional/features/async_destroy/Makefile
tests/zfs-tests/tests/functional/features/large_dnode/Makefile
tests/zfs-tests/tests/functional/features/Makefile

View File

@ -409,6 +409,9 @@ tests = ['devices_003_pos']
[tests/functional/exec]
tests = ['exec_001_pos']
[tests/functional/fault]
tests = ['auto_online_001_pos']
[tests/functional/features/async_destroy]
tests = ['async_destroy_001_pos']

View File

@ -56,12 +56,15 @@ export LOFIADM="@LOFIADM@"
export LOGNAME="@LOGNAME@"
export LS="@LS@"
export LSBLK="@LSBLK@"
export LSMOD="@LSMOD@"
export LSSCSI="@LSSCSI@"
export MD5SUM="@MD5SUM@"
export MKDIR="@MKDIR@"
export MKNOD="@MKNOD@"
export MKTEMP="@MKTEMP@"
export MNTTAB="@MNTTAB@"
export MODINFO="@MODINFO@"
export MODLOAD="@MODLOAD@"
export MODUNLOAD="@MODUNLOAD@"
export MOUNT="@MOUNT@"
export MPSTAT="@MPSTAT@"

View File

@ -34,6 +34,10 @@
# Common paths
bindir=@bindir@
sbindir=@sbindir@
etcdir=@sysconfdir@
# ZFS Directories
export ZEDLETDIR=${ZEDLETDIR:-${etcdir}/zfs/zed.d}
# ZFS Commands
export ZDB=${ZDB:-${sbindir}/zdb}
@ -47,6 +51,7 @@ export RAIDZ_TEST=${RAIDZ_TEST:-${bindir}/raidz_test}
export ARC_SUMMARY=${ARC_SUMMARY:-${bindir}/arc_summary.py}
export ARCSTAT=${ARCSTAT:-${bindir}/arcstat.py}
export DBUFSTAT=${DBUFSTAT:-${bindir}/dbufstat.py}
export ZED=${ZED:-${sbindir}/zed}
. $STF_SUITE/include/libtest.shlib

View File

@ -1645,22 +1645,144 @@ function reexport_pool
}
#
# Verify a given disk is online or offline
# Verify a given disk or pool state
#
# Return 0 is pool/disk matches expected state, 1 otherwise
#
function check_state # pool disk state{online,offline}
function check_state # pool disk state{online,offline,degraded}
{
typeset pool=$1
typeset disk=${2#$DEV_DSKDIR/}
typeset state=$3
$ZPOOL status -v $pool | grep "$disk" \
| grep -i "$state" > /dev/null 2>&1
[[ -z $pool ]] || [[ -z $state ]] \
&& log_fail "Arguments invalid or missing"
if [[ -z $disk ]]; then
#check pool state only
$ZPOOL get -H -o value health $pool \
| grep -i "$state" > /dev/null 2>&1
else
$ZPOOL status -v $pool | grep "$disk" \
| grep -i "$state" > /dev/null 2>&1
fi
return $?
}
#
# Cause a scan of all scsi host adapters by default
#
# $1 optional host number
#
function scan_scsi_hosts
{
typeset hostnum=${1}
if [[ -z $hostnum ]]; then
for host in /sys/class/scsi_host/host*; do
echo '- - -' > $host/scan
done
else
echo "/sys/class/scsi_host/host$hostnum/scan"
echo '- - -' > "/sys/class/scsi_host/host$hostnum/scan"
fi
}
#
# Wait for newly created block devices to have their minors created.
#
function block_device_wait
{
if is_linux; then
$UDEVADM trigger
$UDEVADM settle
fi
}
#
# Online or offline a disk on the system
#
# First checks state of disk. Test will fail if disk is not properly onlined
# or offlined. Online is a full rescan of SCSI disks by echoing to every
# host entry.
#
function on_off_disk # disk state{online,offline} host
{
typeset disk=$1
typeset state=$2
typeset host=$3
[[ -z $disk ]] || [[ -z $state ]] && \
log_fail "Arguments invalid or missing"
if is_linux; then
if [[ $state == "offline" ]] && ( is_mpath_device $disk ); then
dm_name="$($READLINK $DEV_DSKDIR/$disk \
| $NAWK -F / '{print $2}')"
slave="$($LS /sys/block/${dm_name}/slaves \
| $NAWK '{print $1}')"
while [[ -n $slave ]]; do
#check if disk is online
$LSSCSI | $EGREP $slave > /dev/null
if (($? == 0)); then
slave_dir="/sys/block/${dm_name}"
slave_dir+="/slaves/${slave}/device"
ss="${slave_dir}/state"
sd="${slave_dir}/delete"
log_must eval "$ECHO 'offline' > ${ss}"
log_must eval "$ECHO '1' > ${sd}"
$LSSCSI | $EGREP $slave > /dev/null
if (($? == 0)); then
log_fail "Offlining" \
"$disk failed"
fi
fi
slave="$($LS /sys/block/$dm_name/slaves \
2>/dev/null | $NAWK '{print $1}')"
done
elif [[ $state == "offline" ]] && ( is_real_device $disk ); then
#check if disk is online
$LSSCSI | $EGREP $disk > /dev/null
if (($? == 0)); then
dev_state="/sys/block/$disk/device/state"
dev_delete="/sys/block/$disk/device/delete"
log_must eval "$ECHO 'offline' > ${dev_state}"
log_must eval "$ECHO '1' > ${dev_delete}"
$LSSCSI | $EGREP $disk > /dev/null
if (($? == 0)); then
log_fail "Offlining $disk" \
"failed"
fi
else
log_note "$disk is already offline"
fi
elif [[ $state == "online" ]]; then
#force a full rescan
log_must scan_scsi_hosts $host
block_device_wait
if is_mpath_device $disk; then
dm_name="$($READLINK $DEV_DSKDIR/$disk \
| $NAWK -F / '{print $2}')"
slave="$($LS /sys/block/$dm_name/slaves \
| $NAWK '{print $1}')"
$LSSCSI | $EGREP $slave > /dev/null
if (($? != 0)); then
log_fail "Onlining $disk failed"
fi
elif is_real_device $disk; then
$LSSCSI | $EGREP $disk > /dev/null
if (($? != 0)); then
log_fail "Onlining $disk failed"
fi
else
log_fail "$disk is not a real dev"
fi
else
log_fail "$disk failed to $state"
fi
fi
}
#
# Get the mountpoint of snapshot
# For the snapshot use <mp_filesystem>/.zfs/snapshot/<snap>
@ -2754,7 +2876,8 @@ function is_real_device #disk
[[ -z $disk ]] && log_fail "No argument for disk given."
if is_linux; then
$LSBLK $DEV_RDSKDIR/$disk -o TYPE | $EGREP disk > /dev/null 2>&1
($LSBLK $DEV_RDSKDIR/$disk -o TYPE | $EGREP disk > /dev/null) \
2>/dev/null
return $?
fi
}
@ -2768,7 +2891,8 @@ function is_loop_device #disk
[[ -z $disk ]] && log_fail "No argument for disk given."
if is_linux; then
$LSBLK $DEV_RDSKDIR/$disk -o TYPE | $EGREP loop > /dev/null 2>&1
($LSBLK $DEV_RDSKDIR/$disk -o TYPE | $EGREP loop > /dev/null) \
2>/dev/null
return $?
fi
}
@ -2784,7 +2908,8 @@ function is_mpath_device #disk
[[ -z $disk ]] && log_fail "No argument for disk given."
if is_linux; then
$LSBLK $DEV_MPATHDIR/$disk -o TYPE | $EGREP mpath > /dev/null 2>&1
($LSBLK $DEV_MPATHDIR/$disk -o TYPE | $EGREP mpath >/dev/null) \
2>/dev/null
if (($? == 0)); then
$READLINK $DEV_MPATHDIR/$disk > /dev/null 2>&1
return $?
@ -2807,11 +2932,13 @@ function set_slice_prefix
if is_linux; then
while (( i < $DISK_ARRAY_NUM )); do
disk="$($ECHO $DISKS | $NAWK '{print $(i + 1)}')"
if ( is_mpath_device $disk ) && [[ -z $($ECHO $disk | awk 'substr($1,18,1)\
~ /^[[:digit:]]+$/') ]] || ( is_real_device $disk ); then
if ( is_mpath_device $disk ) && [[ -z $($ECHO $disk \
| awk 'substr($1,18,1) ~ /^[[:digit:]]+$/') ]] || \
( is_real_device $disk ); then
export SLICE_PREFIX=""
return 0
elif ( is_mpath_device $disk || is_loop_device $disk ); then
elif ( is_mpath_device $disk || is_loop_device \
$disk ); then
export SLICE_PREFIX="p"
return 0
else
@ -2872,6 +2999,34 @@ function get_device_dir #device
fi
}
#
# Get persistent name for given disk
#
function get_persistent_disk_name #device
{
typeset device=$1
typeset dev_id
if is_linux; then
if is_real_device $device; then
dev_id="$($UDEVADM info -q all -n $DEV_DSKDIR/$device \
| $EGREP disk/by-id | $NAWK '{print $2; exit}' \
| $NAWK -F / '{print $3}')"
$ECHO $dev_id
elif is_mpath_device $device; then
dev_id="$($UDEVADM info -q all -n $DEV_DSKDIR/$device \
| $EGREP disk/by-id/dm-uuid \
| $NAWK '{print $2; exit}' \
| $NAWK -F / '{print $3}')"
$ECHO $dev_id
else
$ECHO $device
fi
else
$ECHO $device
fi
}
#
# Get the package name
#
@ -3028,17 +3183,6 @@ function get_min
echo $min
}
#
# Wait for newly created block devices to have their minors created.
#
function block_device_wait
{
if is_linux; then
$UDEVADM trigger
$UDEVADM settle
fi
}
#
# Synchronize all the data in pool
#

View File

@ -15,6 +15,7 @@ SUBDIRS = \
delegate \
devices \
exec \
fault \
features \
grow_pool \
grow_replicas \

View File

@ -0,0 +1,6 @@
pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/fault
dist_pkgdata_SCRIPTS = \
fault.cfg \
setup.ksh \
cleanup.ksh \
auto_online_001_pos.ksh

View File

@ -0,0 +1,142 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2016 by Intel Corporation. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/fault/fault.cfg
#
# DESCRIPTION:
# Tesing auto-online FMA ZED logic.
#
# STRATEGY:
# 1. Create a pool
# 2. export a pool
# 3. offline disk
# 4. import pool with missing disk
# 5. online disk
# 6. ZED polls for an event change for online disk to be automatically
# added back to the pool.
# 7. Creates a raidz1 zpool using persistent disk path names
# (ie not /dev/sdc).
# 8. Tests import using pool guid and cache file.
#
# If loop devices are used, then a scsi_debug device is added to the pool.
#
verify_runnable "both"
if ! is_physical_device $DISKS; then
log_unsupported "Unsupported disks for this test."
fi
function cleanup
{
#online last disk before fail
on_off_disk $offline_disk "online"
poolexists $TESTPOOL && destroy_pool $TESTPOOL
}
log_assert "Testing auto-online FMA ZED logic"
log_onexit cleanup
target=$TESTPOOL
if is_loop_device $DISK1; then
SD=$($LSSCSI | $NAWK '/scsi_debug/ {print $6; exit}')
SDDEVICE=$($ECHO $SD | $NAWK -F / '{print $3}')
SDDEVICE_ID=$(get_persistent_disk_name $SDDEVICE)
autoonline_disks="$SDDEVICE"
else
autoonline_disks="$DISK1 $DISK2 $DISK3"
fi
# Clear disk labels
for i in {0..2}
do
log_must $ZPOOL labelclear -f /dev/disk/by-id/"${devs_id[i]}"
done
if is_loop_device $DISK1; then
#create a pool with one scsi_debug device and 3 loop devices
log_must $ZPOOL create -f $TESTPOOL raidz1 $SDDEVICE_ID $DISK1 \
$DISK2 $DISK3
elif ( is_real_device $DISK1 || is_mpath_device $DISK1 ); then
log_must $ZPOOL create -f $TESTPOOL raidz1 ${devs_id[0]} \
${devs_id[1]} ${devs_id[2]}
else
log_fail "Disks are not supported for this test"
fi
#add some data to the pool
log_must $MKFILE $FSIZE /$TESTPOOL/data
#pool guid import
typeset guid=$(get_config $TESTPOOL pool_guid)
if (( RANDOM % 2 == 0 )) ; then
target=$guid
fi
for offline_disk in $autoonline_disks
do
log_must $ZPOOL export -F $TESTPOOL
host=$($LS /sys/block/$offline_disk/device/scsi_device | $NAWK -F : '{ print $1}')
#offline disk
on_off_disk $offline_disk "offline"
#reimport pool with drive missing
log_must $ZPOOL import $target
check_state $TESTPOOL "" "degraded"
if (($? != 0)); then
log_fail "$TESTPOOL is not degraded"
fi
#online disk
on_off_disk $offline_disk "online" $host
log_note "Delay for ZED auto-online"
typeset -i timeout=0
$CAT ${ZEDLET_DIR}/zedlog | \
$EGREP "zfs_iter_vdev: matched devid" > /dev/null
while (($? != 0)); do
if ((timeout == $MAXTIMEOUT)); then
log_fail "Timeout occured"
fi
((timeout++))
$SLEEP 1
$CAT ${ZEDLET_DIR}/zedlog | \
$EGREP "zfs_iter_vdev: matched devid" > /dev/null
done
check_state $TESTPOOL "" "online"
if (($? != 0)); then
log_fail "$TESTPOOL is not back online"
fi
$SLEEP 2
done
log_must $ZPOOL destroy $TESTPOOL
log_pass "Auto-online test successful"

View File

@ -0,0 +1,58 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
# Copyright (c) 2016 by Intel Corporation. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/fault/fault.cfg
verify_runnable "global"
cleanup_devices $DISKS
if [[ -f ${ZEDLET_DIR}/zed.pid ]]; then
zedpid=$($CAT ${ZEDLET_DIR}/zed.pid)
log_must $KILL $zedpid
fi
log_must $RM ${ZEDLET_DIR}/all-syslog.sh
log_must $RM ${ZEDLET_DIR}/zed.pid
log_must $RM ${ZEDLET_DIR}/zedlog
log_must $RM ${ZEDLET_DIR}/state
log_must $RMDIR $ZEDLET_DIR
if is_loop_device $DISK1; then
SD=$($LSSCSI | $NAWK '/scsi_debug/ {print $6; exit}')
SDDEVICE=$($ECHO $SD | $NAWK -F / '{print $3}')
if [[ -z $SDDEVICE ]]; then
log_pass
fi
#offline disk
on_off_disk $SDDEVICE "offline"
block_device_wait
log_must $MODUNLOAD scsi_debug
fi
log_pass

View File

@ -0,0 +1,50 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
# Copyright (c) 2016 by Intel Corporation. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
verify_runnable "global"
export DISK_ARRAY_NUM=$($ECHO ${DISKS} | $NAWK '{print NF}')
export DISKSARRAY=$DISKS
export FSIZE=10M
export MAXTIMEOUT=20
export DISK1=$($ECHO $DISKS | $NAWK '{print $1}')
export DISK2=$($ECHO $DISKS | $NAWK '{print $2}')
export DISK3=$($ECHO $DISKS | $NAWK '{print $3}')
export ZEDLET_DIR=/var/tmp/zed
if is_linux; then
set_slice_prefix
set_device_dir
devs_id[0]=$(get_persistent_disk_name $DISK1)
devs_id[1]=$(get_persistent_disk_name $DISK2)
devs_id[2]=$(get_persistent_disk_name $DISK3)
export devs_id
else
DEV_DSKDIR="/dev"
fi

View File

@ -0,0 +1,85 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
# Copyright (c) 2016 by Intel Corporation. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/fault/fault.cfg
typeset SDSIZE=256
typeset SDHOSTS=1
typeset SDTGTS=1
typeset SDLUNS=1
[[ -z $UDEVADM ]] && log_fail "Missing UDEVADM command"
[[ -z $NAWK ]] && log_fail "Missing NAWK command"
[[ -z $EGREP ]] && log_fail "Missing EGREP command"
[[ -z $LSSCSI ]] && log_fail "Missing LSSCSI command"
[[ -z $MODUNLOAD ]] && log_fail "Missing MODUNLOAD command"
[[ -z $PGREP ]] && log_fail "Missing PGREP command"
verify_runnable "global"
if [[ ! -d /var/tmp/zed ]]; then
log_must $MKDIR /var/tmp/zed
fi
modprobe -n scsi_debug
if (($? != 0)); then
log_unsupported "Platform does not have scsi_debug module"
fi
# Verify the ZED is not already running.
$PGREP -x zed > /dev/null
if (($? == 0)); then
log_fail "ZED already running"
fi
log_must $CP ${ZEDLETDIR}/all-syslog.sh $ZEDLET_DIR
log_note "Starting ZED"
#run ZED in the background and redirect foreground logging output to zedlog
log_must eval "$ZED -vF -d $ZEDLET_DIR -p $ZEDLET_DIR/zed.pid -s" \
"$ZEDLET_DIR/state 2>${ZEDLET_DIR}/zedlog &"
#if using loop devices, create a scsi_debug device to be used with
#auto-online test
if is_loop_device $DISK1; then
$LSMOD | $EGREP scsi_debug > /dev/zero
if (($? == 0)); then
log_fail "SCSI_DEBUG module already installed"
else
log_must $MODLOAD scsi_debug dev_size_mb=$SDSIZE \
add_host=$SDHOSTS num_tgts=$SDTGTS max_luns=$SDLUNS
block_device_wait
$LSSCSI | $EGREP scsi_debug > /dev/null
if (($? == 1)); then
log_fail "scsi_debug failed"
else
SDDEVICE=$($LSSCSI \
| $NAWK '/scsi_debug/ {print $6; exit}')
log_must $FORMAT -s $SDDEVICE mklabel gpt
fi
fi
fi
log_pass

View File

@ -20,6 +20,7 @@ export ETCDIR=${SRCDIR}/etc
export TESTSDIR=${SRCDIR}/tests
export RUNFILEDIR=${TESTSDIR}/runfiles
export UDEVRULEDIR=${BUILDDIR}/udev/rules.d
export ZEDLETDIR=${SRCDIR}/cmd/zed/zed.d
export ZDB=${CMDDIR}/zdb/zdb
export ZFS=${CMDDIR}/zfs/zfs
@ -32,6 +33,7 @@ export RAIDZ_TEST=${CMDDIR}/raidz_test/raidz_test
export ARC_SUMMARY=${CMDDIR}/arc_summary/arc_summary.py
export ARCSTAT=${CMDDIR}/arcstat/arcstat.py
export DBUFSTAT=${CMDDIR}/dbufstat/dbufstat.py
export ZED=${CMDDIR}/zed/zed
export COMMON_SH=${SCRIPTDIR}/common.sh
export ZFS_SH=${SCRIPTDIR}/zfs.sh