zpool: allow sharing of spare device among pools

ZFS allows, by default, sharing of spare devices among different pools;
this commit simply restores this functionality for disk devices and
adds an additional tests case to the ZFS Test Suite to prevent future
regression.

Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: loli10K <ezomori.nozomu@gmail.com>
Closes #7999
This commit is contained in:
LOLi 2018-10-17 20:21:07 +02:00 committed by Brian Behlendorf
parent 49394a7708
commit 2e55034471
5 changed files with 130 additions and 3 deletions

View File

@ -419,11 +419,16 @@ check_disk(const char *path, blkid_cache cache, int force,
char slice_path[MAXPATHLEN]; char slice_path[MAXPATHLEN];
int err = 0; int err = 0;
int fd, i; int fd, i;
int flags = O_RDONLY|O_DIRECT;
if (!iswholedisk) if (!iswholedisk)
return (check_slice(path, cache, force, isspare)); return (check_slice(path, cache, force, isspare));
if ((fd = open(path, O_RDONLY|O_DIRECT|O_EXCL)) < 0) { /* only spares can be shared, other devices require exclusive access */
if (!isspare)
flags |= O_EXCL;
if ((fd = open(path, flags)) < 0) {
char *value = blkid_get_tag_value(cache, "TYPE", path); char *value = blkid_get_tag_value(cache, "TYPE", path);
(void) fprintf(stderr, gettext("%s is in use and contains " (void) fprintf(stderr, gettext("%s is in use and contains "
"a %s filesystem.\n"), path, value ? value : "unknown"); "a %s filesystem.\n"), path, value ? value : "unknown");
@ -547,7 +552,7 @@ is_spare(nvlist_t *config, const char *path)
uint_t i, nspares; uint_t i, nspares;
boolean_t inuse; boolean_t inuse;
if ((fd = open(path, O_RDONLY)) < 0) if ((fd = open(path, O_RDONLY|O_DIRECT)) < 0)
return (B_FALSE); return (B_FALSE);
if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) != 0 || if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) != 0 ||

View File

@ -541,7 +541,8 @@ tags = ['functional', 'exec']
[tests/functional/fault] [tests/functional/fault]
tests = ['auto_online_001_pos', 'auto_replace_001_pos', 'auto_spare_001_pos', tests = ['auto_online_001_pos', 'auto_replace_001_pos', 'auto_spare_001_pos',
'auto_spare_002_pos', 'auto_spare_ashift', 'auto_spare_multiple', 'auto_spare_002_pos', 'auto_spare_ashift', 'auto_spare_multiple',
'scrub_after_resilver', 'decrypt_fault', 'decompress_fault'] 'auto_spare_shared', 'scrub_after_resilver', 'decrypt_fault',
'decompress_fault']
tags = ['functional', 'fault'] tags = ['functional', 'fault']
[tests/functional/features/async_destroy] [tests/functional/features/async_destroy]

View File

@ -3190,6 +3190,7 @@ function wait_scrubbed
{ {
typeset pool=${1:-$TESTPOOL} typeset pool=${1:-$TESTPOOL}
typeset iter=${2:-10} typeset iter=${2:-10}
typeset -i i=0
for i in {1..$iter} ; do for i in {1..$iter} ; do
if is_pool_scrubbed $pool ; then if is_pool_scrubbed $pool ; then
return 0 return 0

View File

@ -8,6 +8,7 @@ dist_pkgdata_SCRIPTS = \
auto_spare_002_pos.ksh \ auto_spare_002_pos.ksh \
auto_spare_ashift.ksh \ auto_spare_ashift.ksh \
auto_spare_multiple.ksh \ auto_spare_multiple.ksh \
auto_spare_shared.ksh \
decrypt_fault.ksh \ decrypt_fault.ksh \
decompress_fault.ksh \ decompress_fault.ksh \
scrub_after_resilver.ksh scrub_after_resilver.ksh

View File

@ -0,0 +1,119 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
# CDDL HEADER END
#
#
# Copyright 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/include/math.shlib
. $STF_SUITE/tests/functional/fault/fault.cfg
#
# DESCRIPTION:
# Spare devices (both files and disks) can be shared among different ZFS pools.
#
# STRATEGY:
# 1. Create two pools
# 2. Add the same spare device to different pools
# 3. Inject IO errors with a zinject error handler
# 4. Start a scrub
# 5. Verify the ZED kicks in a hot spare and check pool/device status
# 6. Clear the fault
# 7. Verify the hot spare is available and check pool/device status
#
verify_runnable "both"
if is_linux; then
# Add one 512b spare device (4Kn would generate IO errors on replace)
# NOTE: must be larger than other "file" vdevs and minimum SPA devsize:
# add 32m of fudge
load_scsi_debug $(($SPA_MINDEVSIZE/1024/1024+32)) 1 1 1 '512b'
else
log_unsupported "scsi debug module unsupported"
fi
function cleanup
{
log_must zinject -c all
destroy_pool $TESTPOOL
destroy_pool $TESTPOOL1
unload_scsi_debug
rm -f $SAFE_FILEDEVPOOL1 $SAFE_FILEDEVPOOL2 $FAIL_FILEDEVPOOL1 \
$FAIL_FILEDEVPOOL2 $SPARE_FILEDEV
}
log_assert "Spare devices can be shared among different ZFS pools"
log_onexit cleanup
# Clear events from previous runs
zed_events_drain
SAFE_FILEDEVPOOL1="$TEST_BASE_DIR/file-safe-dev1"
FAIL_FILEDEVPOOL1="$TEST_BASE_DIR/file-fail-dev1"
SAFE_FILEDEVPOOL2="$TEST_BASE_DIR/file-safe-dev2"
FAIL_FILEDEVPOOL2="$TEST_BASE_DIR/file-fail-dev2"
SPARE_FILEDEV="$TEST_BASE_DIR/file-spare-dev"
SPARE_DISKDEV="$(get_debug_device)"
for vdev in $SAFE_FILEDEVPOOL1 $SAFE_FILEDEVPOOL2 $FAIL_FILEDEVPOOL1 \
$FAIL_FILEDEVPOOL2 $SPARE_FILEDEV; do
log_must truncate -s $SPA_MINDEVSIZE $vdev
done
for spare in $SPARE_FILEDEV $SPARE_DISKDEV; do
# 1. Create two pools
log_must zpool create -f $TESTPOOL mirror $SAFE_FILEDEVPOOL1 $FAIL_FILEDEVPOOL1
log_must zpool create -f $TESTPOOL1 mirror $SAFE_FILEDEVPOOL2 $FAIL_FILEDEVPOOL2
# 2. Add the same spare device to different pools
log_must_busy zpool add $TESTPOOL spare $spare
log_must_busy zpool add $TESTPOOL1 spare $spare
log_must wait_hotspare_state $TESTPOOL $spare "AVAIL"
log_must wait_hotspare_state $TESTPOOL1 $spare "AVAIL"
# 3. Inject IO errors with a zinject error handler
log_must zinject -d $FAIL_FILEDEVPOOL1 -e io -T all -f 100 $TESTPOOL
log_must zinject -d $FAIL_FILEDEVPOOL2 -e io -T all -f 100 $TESTPOOL1
# 4. Start a scrub
log_must zpool scrub $TESTPOOL
log_must zpool scrub $TESTPOOL1
# 5. Verify the ZED kicks in a hot spare and check pool/device status
log_note "Wait for ZED to auto-spare"
log_must wait_vdev_state $TESTPOOL $FAIL_FILEDEVPOOL1 "FAULTED" 60
log_must wait_vdev_state $TESTPOOL $spare "ONLINE" 60
log_must wait_hotspare_state $TESTPOOL $spare "INUSE"
log_must check_state $TESTPOOL "" "DEGRADED"
# 6. Clear the fault
log_must zinject -c all
log_must zpool clear $TESTPOOL $FAIL_FILEDEVPOOL1
# 7. Verify the hot spare is available and check pool/device status
log_must wait_vdev_state $TESTPOOL $FAIL_FILEDEVPOOL1 "ONLINE" 60
log_must wait_hotspare_state $TESTPOOL $spare "AVAIL"
log_must is_pool_resilvered $TESTPOOL
log_must check_state $TESTPOOL "" "ONLINE"
# Cleanup
destroy_pool $TESTPOOL
destroy_pool $TESTPOOL1
done
log_pass "Spare devices can be shared among different ZFS pools"