mirror of
				https://git.proxmox.com/git/mirror_zfs.git
				synced 2025-10-26 18:05:04 +03:00 
			
		
		
		
	Fix double spares for failed vdev
It's possible for two spares to get attached to a single failed vdev.
This happens when you have a failed disk that is spared, and then you
replace the failed disk with a new disk, but during the resilver
the new disk fails, and ZED kicks in a spare for the failed new
disk.  This commit checks for that condition and disallows it.
Reviewed-by: Akash B <akash-b@hpe.com>
Reviewed-by: Ameer Hamza <ahamza@ixsystems.com>
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tony Hutter <hutter2@llnl.gov>
Closes: #16547
Closes: #17231
(cherry picked from commit f40ab9e399)
			
			
This commit is contained in:
		
							parent
							
								
									cd777ba5ad
								
							
						
					
					
						commit
						4b014840ea
					
				| @ -7430,6 +7430,82 @@ spa_vdev_add(spa_t *spa, nvlist_t *nvroot, boolean_t check_ashift) | |||||||
| 	return (0); | 	return (0); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /*
 | ||||||
|  |  * Given a vdev to be replaced and its parent, check for a possible | ||||||
|  |  * "double spare" condition if a vdev is to be replaced by a spare.  When this | ||||||
|  |  * happens, you can get two spares assigned to one failed vdev. | ||||||
|  |  * | ||||||
|  |  * To trigger a double spare condition: | ||||||
|  |  * | ||||||
|  |  * 1. disk1 fails | ||||||
|  |  * 2. 1st spare is kicked in for disk1 and it resilvers | ||||||
|  |  * 3. Someone replaces disk1 with a new blank disk | ||||||
|  |  * 4. New blank disk starts resilvering | ||||||
|  |  * 5. While resilvering, new blank disk has IO errors and faults | ||||||
|  |  * 6. 2nd spare is kicked in for new blank disk | ||||||
|  |  * 7. At this point two spares are kicked in for the original disk1. | ||||||
|  |  * | ||||||
|  |  * It looks like this: | ||||||
|  |  * | ||||||
|  |  * NAME                                            STATE     READ WRITE CKSUM | ||||||
|  |  * tank2                                           DEGRADED     0     0     0 | ||||||
|  |  *   draid2:6d:10c:2s-0                            DEGRADED     0     0     0 | ||||||
|  |  *     scsi-0QEMU_QEMU_HARDDISK_d1                 ONLINE       0     0     0 | ||||||
|  |  *     scsi-0QEMU_QEMU_HARDDISK_d2                 ONLINE       0     0     0 | ||||||
|  |  *     scsi-0QEMU_QEMU_HARDDISK_d3                 ONLINE       0     0     0 | ||||||
|  |  *     scsi-0QEMU_QEMU_HARDDISK_d4                 ONLINE       0     0     0 | ||||||
|  |  *     scsi-0QEMU_QEMU_HARDDISK_d5                 ONLINE       0     0     0 | ||||||
|  |  *     scsi-0QEMU_QEMU_HARDDISK_d6                 ONLINE       0     0     0 | ||||||
|  |  *     scsi-0QEMU_QEMU_HARDDISK_d7                 ONLINE       0     0     0 | ||||||
|  |  *     scsi-0QEMU_QEMU_HARDDISK_d8                 ONLINE       0     0     0 | ||||||
|  |  *     scsi-0QEMU_QEMU_HARDDISK_d9                 ONLINE       0     0     0 | ||||||
|  |  *     spare-9                                     DEGRADED     0     0     0 | ||||||
|  |  *       replacing-0                               DEGRADED     0    93     0 | ||||||
|  |  *         scsi-0QEMU_QEMU_HARDDISK_d10-part1/old  UNAVAIL      0     0     0 | ||||||
|  |  *         spare-1                                 DEGRADED     0     0     0 | ||||||
|  |  *           scsi-0QEMU_QEMU_HARDDISK_d10          REMOVED      0     0     0 | ||||||
|  |  *           draid2-0-0                            ONLINE       0     0     0 | ||||||
|  |  *       draid2-0-1                                ONLINE       0     0     0 | ||||||
|  |  * spares | ||||||
|  |  *   draid2-0-0                                    INUSE     currently in use | ||||||
|  |  *   draid2-0-1                                    INUSE     currently in use | ||||||
|  |  * | ||||||
|  |  * ARGS: | ||||||
|  |  * | ||||||
|  |  * newvd:  New spare disk | ||||||
|  |  * pvd:    Parent vdev_t the spare should attach to | ||||||
|  |  * | ||||||
|  |  * This function returns B_TRUE if adding the new vdev would create a double | ||||||
|  |  * spare condition, B_FALSE otherwise. | ||||||
|  |  */ | ||||||
|  | static boolean_t | ||||||
|  | spa_vdev_new_spare_would_cause_double_spares(vdev_t *newvd, vdev_t *pvd) | ||||||
|  | { | ||||||
|  | 	vdev_t *ppvd; | ||||||
|  | 
 | ||||||
|  | 	ppvd = pvd->vdev_parent; | ||||||
|  | 	if (ppvd == NULL) | ||||||
|  | 		return (B_FALSE); | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * To determine if this configuration would cause a double spare, we | ||||||
|  | 	 * look at the vdev_op of the parent vdev, and of the parent's parent | ||||||
|  | 	 * vdev.  We also look at vdev_isspare on the new disk.  A double spare | ||||||
|  | 	 * condition looks like this: | ||||||
|  | 	 * | ||||||
|  | 	 * 1. parent of parent's op is a spare or draid spare | ||||||
|  | 	 * 2. parent's op is replacing | ||||||
|  | 	 * 3. new disk is a spare | ||||||
|  | 	 */ | ||||||
|  | 	if ((ppvd->vdev_ops == &vdev_spare_ops) || | ||||||
|  | 	    (ppvd->vdev_ops == &vdev_draid_spare_ops)) | ||||||
|  | 		if (pvd->vdev_ops == &vdev_replacing_ops) | ||||||
|  | 			if (newvd->vdev_isspare) | ||||||
|  | 				return (B_TRUE); | ||||||
|  | 
 | ||||||
|  | 	return (B_FALSE); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Attach a device to a vdev specified by its guid.  The vdev type can be |  * Attach a device to a vdev specified by its guid.  The vdev type can be | ||||||
|  * a mirror, a raidz, or a leaf device that is also a top-level (e.g. a |  * a mirror, a raidz, or a leaf device that is also a top-level (e.g. a | ||||||
| @ -7604,6 +7680,12 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing, | |||||||
| 			return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); | 			return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
|  | 		if (spa_vdev_new_spare_would_cause_double_spares(newvd, pvd)) { | ||||||
|  | 			vdev_dbgmsg(newvd, | ||||||
|  | 			    "disk would create double spares, ignore."); | ||||||
|  | 			return (spa_vdev_exit(spa, newrootvd, txg, EEXIST)); | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
| 		if (newvd->vdev_isspare) | 		if (newvd->vdev_isspare) | ||||||
| 			pvops = &vdev_spare_ops; | 			pvops = &vdev_spare_ops; | ||||||
| 		else | 		else | ||||||
|  | |||||||
| @ -123,10 +123,10 @@ tags = ['functional', 'fallocate'] | |||||||
| [tests/functional/fault:Linux] | [tests/functional/fault:Linux] | ||||||
| tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_online_002_pos', | tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_online_002_pos', | ||||||
|     'auto_replace_001_pos', 'auto_replace_002_pos', 'auto_spare_001_pos', |     'auto_replace_001_pos', 'auto_replace_002_pos', 'auto_spare_001_pos', | ||||||
|     'auto_spare_002_pos', 'auto_spare_multiple', 'auto_spare_ashift', |     'auto_spare_002_pos', 'auto_spare_double', 'auto_spare_multiple', | ||||||
|     'auto_spare_shared', 'decrypt_fault', 'decompress_fault', |     'auto_spare_ashift', 'auto_spare_shared', 'decrypt_fault', | ||||||
|     'fault_limits', 'scrub_after_resilver', 'suspend_on_probe_errors', |     'decompress_fault', 'fault_limits', 'scrub_after_resilver', | ||||||
|     'suspend_resume_single', 'zpool_status_-s'] |     'suspend_on_probe_errors', 'suspend_resume_single', 'zpool_status_-s'] | ||||||
| tags = ['functional', 'fault'] | tags = ['functional', 'fault'] | ||||||
| 
 | 
 | ||||||
| [tests/functional/features/large_dnode:Linux] | [tests/functional/features/large_dnode:Linux] | ||||||
|  | |||||||
| @ -1532,6 +1532,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ | |||||||
| 	functional/fault/auto_spare_001_pos.ksh \
 | 	functional/fault/auto_spare_001_pos.ksh \
 | ||||||
| 	functional/fault/auto_spare_002_pos.ksh \
 | 	functional/fault/auto_spare_002_pos.ksh \
 | ||||||
| 	functional/fault/auto_spare_ashift.ksh \
 | 	functional/fault/auto_spare_ashift.ksh \
 | ||||||
|  | 	functional/fault/auto_spare_double.ksh \
 | ||||||
| 	functional/fault/auto_spare_multiple.ksh \
 | 	functional/fault/auto_spare_multiple.ksh \
 | ||||||
| 	functional/fault/auto_spare_shared.ksh \
 | 	functional/fault/auto_spare_shared.ksh \
 | ||||||
| 	functional/fault/cleanup.ksh \
 | 	functional/fault/cleanup.ksh \
 | ||||||
|  | |||||||
							
								
								
									
										122
									
								
								tests/zfs-tests/tests/functional/fault/auto_spare_double.ksh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										122
									
								
								tests/zfs-tests/tests/functional/fault/auto_spare_double.ksh
									
									
									
									
									
										Executable file
									
								
							| @ -0,0 +1,122 @@ | |||||||
|  | #!/bin/ksh -p | ||||||
|  | # SPDX-License-Identifier: CDDL-1.0 | ||||||
|  | # | ||||||
|  | # CDDL HEADER START | ||||||
|  | # | ||||||
|  | # This file and its contents are supplied under the terms of the | ||||||
|  | # Common Development and Distribution License ("CDDL"), version 1.0. | ||||||
|  | # You may only use this file in accordance with the terms of version | ||||||
|  | # 1.0 of the CDDL. | ||||||
|  | # | ||||||
|  | # A full copy of the text of the CDDL should have accompanied this | ||||||
|  | # source.  A copy of the CDDL is also available via the Internet at | ||||||
|  | # http://www.illumos.org/license/CDDL. | ||||||
|  | # | ||||||
|  | # CDDL HEADER END | ||||||
|  | # | ||||||
|  | 
 | ||||||
|  | # | ||||||
|  | # Copyright (c) 2025 by Lawrence Livermore National Security, LLC. | ||||||
|  | # | ||||||
|  | 
 | ||||||
|  | # DESCRIPTION: | ||||||
|  | # Try do induce a double spare condition and verify we're prevented from doing | ||||||
|  | # it. | ||||||
|  | # | ||||||
|  | # STRATEGY: | ||||||
|  | # 1. Fail a drive | ||||||
|  | # 2. Kick in first spare | ||||||
|  | # 3. Bring new drive back online and start resilvering to it | ||||||
|  | # 4. Immediately after the resilver starts, fail the new drive. | ||||||
|  | # 5. Try to kick in the second spare for the new drive (which is now failed) | ||||||
|  | # 6. Verify that we can't kick in the second spare | ||||||
|  | # | ||||||
|  | # Repeat this test for both traditional spares and dRAID spares. | ||||||
|  | # | ||||||
|  | . $STF_SUITE/include/libtest.shlib | ||||||
|  | 
 | ||||||
|  | LAST_VDEV=3 | ||||||
|  | SIZE_MB=300 | ||||||
|  | 
 | ||||||
|  | ZED_PID="$(zed_check)" | ||||||
|  | 
 | ||||||
|  | function cleanup | ||||||
|  | { | ||||||
|  | 	destroy_pool $TESTPOOL | ||||||
|  | 	for i in {0..$LAST_VDEV} ; do | ||||||
|  | 		log_must rm -f $TEST_BASE_DIR/file$i | ||||||
|  | 	done | ||||||
|  | 
 | ||||||
|  | 	# Restore ZED if it was running before this test | ||||||
|  | 	if [ -n $ZED_PID ] ; then | ||||||
|  | 		log_must zed_start | ||||||
|  | 	fi | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | log_assert "Cannot attach two spares to same failed vdev" | ||||||
|  | log_onexit cleanup | ||||||
|  | 
 | ||||||
|  | # Stop ZED if it's running | ||||||
|  | if [ -n $ZED_PID ] ; then | ||||||
|  | 	log_must zed_stop | ||||||
|  | fi | ||||||
|  | 
 | ||||||
|  | log_must truncate -s ${SIZE_MB}M $TEST_BASE_DIR/file{0..$LAST_VDEV} | ||||||
|  | 
 | ||||||
|  | ZFS_DBGMSG=/proc/spl/kstat/zfs/dbgmsg | ||||||
|  | 
 | ||||||
|  | # Run the test - we assume the pool is already created. | ||||||
|  | # $1: disk to fail | ||||||
|  | # $2: 1st spare name | ||||||
|  | # $3: 2nd spare name | ||||||
|  | function do_test { | ||||||
|  | 	FAIL_DRIVE=$1 | ||||||
|  | 	SPARE0=$2 | ||||||
|  | 	SPARE1=$3 | ||||||
|  | 	echo 0 > $ZFS_DBGMSG | ||||||
|  | 	log_must zpool status | ||||||
|  | 
 | ||||||
|  | 	log_note "Kicking in first spare ($SPARE0)" | ||||||
|  | 	log_must zpool offline -f $TESTPOOL $FAIL_DRIVE | ||||||
|  | 	log_must zpool replace $TESTPOOL $FAIL_DRIVE $SPARE0 | ||||||
|  | 
 | ||||||
|  | 	# Fill the pool with data to make the resilver take a little | ||||||
|  | 	# time. | ||||||
|  | 	dd if=/dev/zero of=/$TESTPOOL/testfile bs=1M || true | ||||||
|  | 
 | ||||||
|  | 	# Zero our failed disk.  It will appear as a blank. | ||||||
|  | 	rm -f $FAIL_DRIVE | ||||||
|  | 	truncate -s ${SIZE_MB}M $FAIL_DRIVE | ||||||
|  | 
 | ||||||
|  | 	# Attempt to replace our failed disk, then immediately fault it. | ||||||
|  | 	log_must zpool replace $TESTPOOL $FAIL_DRIVE | ||||||
|  | 	log_must zpool offline -f $TESTPOOL $FAIL_DRIVE | ||||||
|  | 	log_must check_state $TESTPOOL $FAIL_DRIVE "faulted" | ||||||
|  | 
 | ||||||
|  | 	log_note "Kicking in second spare ($SPARE0)... This should not work..." | ||||||
|  | 	log_mustnot zpool replace $TESTPOOL $FAIL_DRIVE $SPARE1 | ||||||
|  | 	# Verify the debug line in dbgmsg | ||||||
|  | 	log_must grep 'disk would create double spares' $ZFS_DBGMSG | ||||||
|  | 
 | ||||||
|  | 	# Disk should still be faulted | ||||||
|  | 	log_must check_state $TESTPOOL $FAIL_DRIVE "faulted" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Test with traditional spares | ||||||
|  | log_must zpool create -O compression=off -O recordsize=4k -O primarycache=none \ | ||||||
|  | 	$TESTPOOL mirror $TEST_BASE_DIR/file{0,1} spare $TEST_BASE_DIR/file{2,3} | ||||||
|  | do_test $TEST_BASE_DIR/file1 $TEST_BASE_DIR/file2 $TEST_BASE_DIR/file3 | ||||||
|  | destroy_pool $TESTPOOL | ||||||
|  | 
 | ||||||
|  | # Clear vdev files for next test | ||||||
|  | for i in {0..$LAST_VDEV} ; do | ||||||
|  | 	log_must rm -f $TEST_BASE_DIR/file$i | ||||||
|  | done | ||||||
|  | log_must truncate -s ${SIZE_MB}M $TEST_BASE_DIR/file{0..$LAST_VDEV} | ||||||
|  | 
 | ||||||
|  | # Test with dRAID spares | ||||||
|  | log_must zpool create -O compression=off -O recordsize=4k -O primarycache=none \ | ||||||
|  | 	$TESTPOOL draid1:1d:4c:2s $TEST_BASE_DIR/file{0..3} | ||||||
|  | do_test $TEST_BASE_DIR/file1 draid1-0-0 draid1-0-1 | ||||||
|  | 
 | ||||||
|  | log_pass "Verified we cannot attach two spares to same failed vdev" | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Tony Hutter
						Tony Hutter