Fix double spares for failed vdev

It's possible for two spares to get attached to a single failed vdev.
This happens when you have a failed disk that is spared, and then you
replace the failed disk with a new disk, but during the resilver
the new disk fails, and ZED kicks in a spare for the failed new
disk.  This commit checks for that condition and disallows it.

Reviewed-by: Akash B <akash-b@hpe.com>
Reviewed-by: Ameer Hamza <ahamza@ixsystems.com>
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tony Hutter <hutter2@llnl.gov>
Closes: #16547
Closes: #17231
This commit is contained in:
Tony Hutter 2025-05-02 09:03:11 -07:00 committed by GitHub
parent 3b18877269
commit f40ab9e399
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 209 additions and 4 deletions

View File

@ -7430,6 +7430,82 @@ spa_vdev_add(spa_t *spa, nvlist_t *nvroot, boolean_t check_ashift)
return (0); return (0);
} }
/*
* Given a vdev to be replaced and its parent, check for a possible
* "double spare" condition if a vdev is to be replaced by a spare. When this
* happens, you can get two spares assigned to one failed vdev.
*
* To trigger a double spare condition:
*
* 1. disk1 fails
* 2. 1st spare is kicked in for disk1 and it resilvers
* 3. Someone replaces disk1 with a new blank disk
* 4. New blank disk starts resilvering
* 5. While resilvering, new blank disk has IO errors and faults
* 6. 2nd spare is kicked in for new blank disk
* 7. At this point two spares are kicked in for the original disk1.
*
* It looks like this:
*
* NAME STATE READ WRITE CKSUM
* tank2 DEGRADED 0 0 0
* draid2:6d:10c:2s-0 DEGRADED 0 0 0
* scsi-0QEMU_QEMU_HARDDISK_d1 ONLINE 0 0 0
* scsi-0QEMU_QEMU_HARDDISK_d2 ONLINE 0 0 0
* scsi-0QEMU_QEMU_HARDDISK_d3 ONLINE 0 0 0
* scsi-0QEMU_QEMU_HARDDISK_d4 ONLINE 0 0 0
* scsi-0QEMU_QEMU_HARDDISK_d5 ONLINE 0 0 0
* scsi-0QEMU_QEMU_HARDDISK_d6 ONLINE 0 0 0
* scsi-0QEMU_QEMU_HARDDISK_d7 ONLINE 0 0 0
* scsi-0QEMU_QEMU_HARDDISK_d8 ONLINE 0 0 0
* scsi-0QEMU_QEMU_HARDDISK_d9 ONLINE 0 0 0
* spare-9 DEGRADED 0 0 0
* replacing-0 DEGRADED 0 93 0
* scsi-0QEMU_QEMU_HARDDISK_d10-part1/old UNAVAIL 0 0 0
* spare-1 DEGRADED 0 0 0
* scsi-0QEMU_QEMU_HARDDISK_d10 REMOVED 0 0 0
* draid2-0-0 ONLINE 0 0 0
* draid2-0-1 ONLINE 0 0 0
* spares
* draid2-0-0 INUSE currently in use
* draid2-0-1 INUSE currently in use
*
* ARGS:
*
* newvd: New spare disk
* pvd: Parent vdev_t the spare should attach to
*
* This function returns B_TRUE if adding the new vdev would create a double
* spare condition, B_FALSE otherwise.
*/
static boolean_t
spa_vdev_new_spare_would_cause_double_spares(vdev_t *newvd, vdev_t *pvd)
{
vdev_t *ppvd;
ppvd = pvd->vdev_parent;
if (ppvd == NULL)
return (B_FALSE);
/*
* To determine if this configuration would cause a double spare, we
* look at the vdev_op of the parent vdev, and of the parent's parent
* vdev. We also look at vdev_isspare on the new disk. A double spare
* condition looks like this:
*
* 1. parent of parent's op is a spare or draid spare
* 2. parent's op is replacing
* 3. new disk is a spare
*/
if ((ppvd->vdev_ops == &vdev_spare_ops) ||
(ppvd->vdev_ops == &vdev_draid_spare_ops))
if (pvd->vdev_ops == &vdev_replacing_ops)
if (newvd->vdev_isspare)
return (B_TRUE);
return (B_FALSE);
}
/* /*
* Attach a device to a vdev specified by its guid. The vdev type can be * Attach a device to a vdev specified by its guid. The vdev type can be
* a mirror, a raidz, or a leaf device that is also a top-level (e.g. a * a mirror, a raidz, or a leaf device that is also a top-level (e.g. a
@ -7604,6 +7680,12 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing,
return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
} }
if (spa_vdev_new_spare_would_cause_double_spares(newvd, pvd)) {
vdev_dbgmsg(newvd,
"disk would create double spares, ignore.");
return (spa_vdev_exit(spa, newrootvd, txg, EEXIST));
}
if (newvd->vdev_isspare) if (newvd->vdev_isspare)
pvops = &vdev_spare_ops; pvops = &vdev_spare_ops;
else else

View File

@ -123,10 +123,10 @@ tags = ['functional', 'fallocate']
[tests/functional/fault:Linux] [tests/functional/fault:Linux]
tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_online_002_pos', tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_online_002_pos',
'auto_replace_001_pos', 'auto_replace_002_pos', 'auto_spare_001_pos', 'auto_replace_001_pos', 'auto_replace_002_pos', 'auto_spare_001_pos',
'auto_spare_002_pos', 'auto_spare_multiple', 'auto_spare_ashift', 'auto_spare_002_pos', 'auto_spare_double', 'auto_spare_multiple',
'auto_spare_shared', 'decrypt_fault', 'decompress_fault', 'auto_spare_ashift', 'auto_spare_shared', 'decrypt_fault',
'fault_limits', 'scrub_after_resilver', 'suspend_on_probe_errors', 'decompress_fault', 'fault_limits', 'scrub_after_resilver',
'suspend_resume_single', 'zpool_status_-s'] 'suspend_on_probe_errors', 'suspend_resume_single', 'zpool_status_-s']
tags = ['functional', 'fault'] tags = ['functional', 'fault']
[tests/functional/features/large_dnode:Linux] [tests/functional/features/large_dnode:Linux]

View File

@ -1535,6 +1535,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/fault/auto_spare_001_pos.ksh \ functional/fault/auto_spare_001_pos.ksh \
functional/fault/auto_spare_002_pos.ksh \ functional/fault/auto_spare_002_pos.ksh \
functional/fault/auto_spare_ashift.ksh \ functional/fault/auto_spare_ashift.ksh \
functional/fault/auto_spare_double.ksh \
functional/fault/auto_spare_multiple.ksh \ functional/fault/auto_spare_multiple.ksh \
functional/fault/auto_spare_shared.ksh \ functional/fault/auto_spare_shared.ksh \
functional/fault/cleanup.ksh \ functional/fault/cleanup.ksh \

View File

@ -0,0 +1,122 @@
#!/bin/ksh -p
# SPDX-License-Identifier: CDDL-1.0
#
# CDDL HEADER START
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
# CDDL HEADER END
#
#
# Copyright (c) 2025 by Lawrence Livermore National Security, LLC.
#
# DESCRIPTION:
# Try do induce a double spare condition and verify we're prevented from doing
# it.
#
# STRATEGY:
# 1. Fail a drive
# 2. Kick in first spare
# 3. Bring new drive back online and start resilvering to it
# 4. Immediately after the resilver starts, fail the new drive.
# 5. Try to kick in the second spare for the new drive (which is now failed)
# 6. Verify that we can't kick in the second spare
#
# Repeat this test for both traditional spares and dRAID spares.
#
. $STF_SUITE/include/libtest.shlib
LAST_VDEV=3
SIZE_MB=300
ZED_PID="$(zed_check)"
function cleanup
{
destroy_pool $TESTPOOL
for i in {0..$LAST_VDEV} ; do
log_must rm -f $TEST_BASE_DIR/file$i
done
# Restore ZED if it was running before this test
if [ -n $ZED_PID ] ; then
log_must zed_start
fi
}
log_assert "Cannot attach two spares to same failed vdev"
log_onexit cleanup
# Stop ZED if it's running
if [ -n $ZED_PID ] ; then
log_must zed_stop
fi
log_must truncate -s ${SIZE_MB}M $TEST_BASE_DIR/file{0..$LAST_VDEV}
ZFS_DBGMSG=/proc/spl/kstat/zfs/dbgmsg
# Run the test - we assume the pool is already created.
# $1: disk to fail
# $2: 1st spare name
# $3: 2nd spare name
function do_test {
FAIL_DRIVE=$1
SPARE0=$2
SPARE1=$3
echo 0 > $ZFS_DBGMSG
log_must zpool status
log_note "Kicking in first spare ($SPARE0)"
log_must zpool offline -f $TESTPOOL $FAIL_DRIVE
log_must zpool replace $TESTPOOL $FAIL_DRIVE $SPARE0
# Fill the pool with data to make the resilver take a little
# time.
dd if=/dev/zero of=/$TESTPOOL/testfile bs=1M || true
# Zero our failed disk. It will appear as a blank.
rm -f $FAIL_DRIVE
truncate -s ${SIZE_MB}M $FAIL_DRIVE
# Attempt to replace our failed disk, then immediately fault it.
log_must zpool replace $TESTPOOL $FAIL_DRIVE
log_must zpool offline -f $TESTPOOL $FAIL_DRIVE
log_must check_state $TESTPOOL $FAIL_DRIVE "faulted"
log_note "Kicking in second spare ($SPARE0)... This should not work..."
log_mustnot zpool replace $TESTPOOL $FAIL_DRIVE $SPARE1
# Verify the debug line in dbgmsg
log_must grep 'disk would create double spares' $ZFS_DBGMSG
# Disk should still be faulted
log_must check_state $TESTPOOL $FAIL_DRIVE "faulted"
}
# Test with traditional spares
log_must zpool create -O compression=off -O recordsize=4k -O primarycache=none \
$TESTPOOL mirror $TEST_BASE_DIR/file{0,1} spare $TEST_BASE_DIR/file{2,3}
do_test $TEST_BASE_DIR/file1 $TEST_BASE_DIR/file2 $TEST_BASE_DIR/file3
destroy_pool $TESTPOOL
# Clear vdev files for next test
for i in {0..$LAST_VDEV} ; do
log_must rm -f $TEST_BASE_DIR/file$i
done
log_must truncate -s ${SIZE_MB}M $TEST_BASE_DIR/file{0..$LAST_VDEV}
# Test with dRAID spares
log_must zpool create -O compression=off -O recordsize=4k -O primarycache=none \
$TESTPOOL draid1:1d:4c:2s $TEST_BASE_DIR/file{0..3}
do_test $TEST_BASE_DIR/file1 draid1-0-0 draid1-0-1
log_pass "Verified we cannot attach two spares to same failed vdev"