diff --git a/module/zfs/spa.c b/module/zfs/spa.c index d35a97dda..66dfe629d 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -7430,6 +7430,82 @@ spa_vdev_add(spa_t *spa, nvlist_t *nvroot, boolean_t check_ashift) return (0); } +/* + * Given a vdev to be replaced and its parent, check for a possible + * "double spare" condition if a vdev is to be replaced by a spare. When this + * happens, you can get two spares assigned to one failed vdev. + * + * To trigger a double spare condition: + * + * 1. disk1 fails + * 2. 1st spare is kicked in for disk1 and it resilvers + * 3. Someone replaces disk1 with a new blank disk + * 4. New blank disk starts resilvering + * 5. While resilvering, new blank disk has IO errors and faults + * 6. 2nd spare is kicked in for new blank disk + * 7. At this point two spares are kicked in for the original disk1. + * + * It looks like this: + * + * NAME STATE READ WRITE CKSUM + * tank2 DEGRADED 0 0 0 + * draid2:6d:10c:2s-0 DEGRADED 0 0 0 + * scsi-0QEMU_QEMU_HARDDISK_d1 ONLINE 0 0 0 + * scsi-0QEMU_QEMU_HARDDISK_d2 ONLINE 0 0 0 + * scsi-0QEMU_QEMU_HARDDISK_d3 ONLINE 0 0 0 + * scsi-0QEMU_QEMU_HARDDISK_d4 ONLINE 0 0 0 + * scsi-0QEMU_QEMU_HARDDISK_d5 ONLINE 0 0 0 + * scsi-0QEMU_QEMU_HARDDISK_d6 ONLINE 0 0 0 + * scsi-0QEMU_QEMU_HARDDISK_d7 ONLINE 0 0 0 + * scsi-0QEMU_QEMU_HARDDISK_d8 ONLINE 0 0 0 + * scsi-0QEMU_QEMU_HARDDISK_d9 ONLINE 0 0 0 + * spare-9 DEGRADED 0 0 0 + * replacing-0 DEGRADED 0 93 0 + * scsi-0QEMU_QEMU_HARDDISK_d10-part1/old UNAVAIL 0 0 0 + * spare-1 DEGRADED 0 0 0 + * scsi-0QEMU_QEMU_HARDDISK_d10 REMOVED 0 0 0 + * draid2-0-0 ONLINE 0 0 0 + * draid2-0-1 ONLINE 0 0 0 + * spares + * draid2-0-0 INUSE currently in use + * draid2-0-1 INUSE currently in use + * + * ARGS: + * + * newvd: New spare disk + * pvd: Parent vdev_t the spare should attach to + * + * This function returns B_TRUE if adding the new vdev would create a double + * spare condition, B_FALSE otherwise. + */ +static boolean_t +spa_vdev_new_spare_would_cause_double_spares(vdev_t *newvd, vdev_t *pvd) +{ + vdev_t *ppvd; + + ppvd = pvd->vdev_parent; + if (ppvd == NULL) + return (B_FALSE); + + /* + * To determine if this configuration would cause a double spare, we + * look at the vdev_op of the parent vdev, and of the parent's parent + * vdev. We also look at vdev_isspare on the new disk. A double spare + * condition looks like this: + * + * 1. parent of parent's op is a spare or draid spare + * 2. parent's op is replacing + * 3. new disk is a spare + */ + if ((ppvd->vdev_ops == &vdev_spare_ops) || + (ppvd->vdev_ops == &vdev_draid_spare_ops)) + if (pvd->vdev_ops == &vdev_replacing_ops) + if (newvd->vdev_isspare) + return (B_TRUE); + + return (B_FALSE); +} + /* * Attach a device to a vdev specified by its guid. The vdev type can be * a mirror, a raidz, or a leaf device that is also a top-level (e.g. a @@ -7604,6 +7680,12 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing, return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); } + if (spa_vdev_new_spare_would_cause_double_spares(newvd, pvd)) { + vdev_dbgmsg(newvd, + "disk would create double spares, ignore."); + return (spa_vdev_exit(spa, newrootvd, txg, EEXIST)); + } + if (newvd->vdev_isspare) pvops = &vdev_spare_ops; else diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index a6185f982..8d3cb0607 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -123,10 +123,10 @@ tags = ['functional', 'fallocate'] [tests/functional/fault:Linux] tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_online_002_pos', 'auto_replace_001_pos', 'auto_replace_002_pos', 'auto_spare_001_pos', - 'auto_spare_002_pos', 'auto_spare_multiple', 'auto_spare_ashift', - 'auto_spare_shared', 'decrypt_fault', 'decompress_fault', - 'fault_limits', 'scrub_after_resilver', 'suspend_on_probe_errors', - 'suspend_resume_single', 'zpool_status_-s'] + 'auto_spare_002_pos', 'auto_spare_double', 'auto_spare_multiple', + 'auto_spare_ashift', 'auto_spare_shared', 'decrypt_fault', + 'decompress_fault', 'fault_limits', 'scrub_after_resilver', + 'suspend_on_probe_errors', 'suspend_resume_single', 'zpool_status_-s'] tags = ['functional', 'fault'] [tests/functional/features/large_dnode:Linux] diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index 6a5d11761..db1ef0d03 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -1535,6 +1535,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/fault/auto_spare_001_pos.ksh \ functional/fault/auto_spare_002_pos.ksh \ functional/fault/auto_spare_ashift.ksh \ + functional/fault/auto_spare_double.ksh \ functional/fault/auto_spare_multiple.ksh \ functional/fault/auto_spare_shared.ksh \ functional/fault/cleanup.ksh \ diff --git a/tests/zfs-tests/tests/functional/fault/auto_spare_double.ksh b/tests/zfs-tests/tests/functional/fault/auto_spare_double.ksh new file mode 100755 index 000000000..6a905c867 --- /dev/null +++ b/tests/zfs-tests/tests/functional/fault/auto_spare_double.ksh @@ -0,0 +1,122 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025 by Lawrence Livermore National Security, LLC. +# + +# DESCRIPTION: +# Try do induce a double spare condition and verify we're prevented from doing +# it. +# +# STRATEGY: +# 1. Fail a drive +# 2. Kick in first spare +# 3. Bring new drive back online and start resilvering to it +# 4. Immediately after the resilver starts, fail the new drive. +# 5. Try to kick in the second spare for the new drive (which is now failed) +# 6. Verify that we can't kick in the second spare +# +# Repeat this test for both traditional spares and dRAID spares. +# +. $STF_SUITE/include/libtest.shlib + +LAST_VDEV=3 +SIZE_MB=300 + +ZED_PID="$(zed_check)" + +function cleanup +{ + destroy_pool $TESTPOOL + for i in {0..$LAST_VDEV} ; do + log_must rm -f $TEST_BASE_DIR/file$i + done + + # Restore ZED if it was running before this test + if [ -n $ZED_PID ] ; then + log_must zed_start + fi +} + +log_assert "Cannot attach two spares to same failed vdev" +log_onexit cleanup + +# Stop ZED if it's running +if [ -n $ZED_PID ] ; then + log_must zed_stop +fi + +log_must truncate -s ${SIZE_MB}M $TEST_BASE_DIR/file{0..$LAST_VDEV} + +ZFS_DBGMSG=/proc/spl/kstat/zfs/dbgmsg + +# Run the test - we assume the pool is already created. +# $1: disk to fail +# $2: 1st spare name +# $3: 2nd spare name +function do_test { + FAIL_DRIVE=$1 + SPARE0=$2 + SPARE1=$3 + echo 0 > $ZFS_DBGMSG + log_must zpool status + + log_note "Kicking in first spare ($SPARE0)" + log_must zpool offline -f $TESTPOOL $FAIL_DRIVE + log_must zpool replace $TESTPOOL $FAIL_DRIVE $SPARE0 + + # Fill the pool with data to make the resilver take a little + # time. + dd if=/dev/zero of=/$TESTPOOL/testfile bs=1M || true + + # Zero our failed disk. It will appear as a blank. + rm -f $FAIL_DRIVE + truncate -s ${SIZE_MB}M $FAIL_DRIVE + + # Attempt to replace our failed disk, then immediately fault it. + log_must zpool replace $TESTPOOL $FAIL_DRIVE + log_must zpool offline -f $TESTPOOL $FAIL_DRIVE + log_must check_state $TESTPOOL $FAIL_DRIVE "faulted" + + log_note "Kicking in second spare ($SPARE0)... This should not work..." + log_mustnot zpool replace $TESTPOOL $FAIL_DRIVE $SPARE1 + # Verify the debug line in dbgmsg + log_must grep 'disk would create double spares' $ZFS_DBGMSG + + # Disk should still be faulted + log_must check_state $TESTPOOL $FAIL_DRIVE "faulted" +} + +# Test with traditional spares +log_must zpool create -O compression=off -O recordsize=4k -O primarycache=none \ + $TESTPOOL mirror $TEST_BASE_DIR/file{0,1} spare $TEST_BASE_DIR/file{2,3} +do_test $TEST_BASE_DIR/file1 $TEST_BASE_DIR/file2 $TEST_BASE_DIR/file3 +destroy_pool $TESTPOOL + +# Clear vdev files for next test +for i in {0..$LAST_VDEV} ; do + log_must rm -f $TEST_BASE_DIR/file$i +done +log_must truncate -s ${SIZE_MB}M $TEST_BASE_DIR/file{0..$LAST_VDEV} + +# Test with dRAID spares +log_must zpool create -O compression=off -O recordsize=4k -O primarycache=none \ + $TESTPOOL draid1:1d:4c:2s $TEST_BASE_DIR/file{0..3} +do_test $TEST_BASE_DIR/file1 draid1-0-0 draid1-0-1 + +log_pass "Verified we cannot attach two spares to same failed vdev"