mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 02:27:36 +03:00
Fix double spares for failed vdev
It's possible for two spares to get attached to a single failed vdev.
This happens when you have a failed disk that is spared, and then you
replace the failed disk with a new disk, but during the resilver
the new disk fails, and ZED kicks in a spare for the failed new
disk. This commit checks for that condition and disallows it.
Reviewed-by: Akash B <akash-b@hpe.com>
Reviewed-by: Ameer Hamza <ahamza@ixsystems.com>
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tony Hutter <hutter2@llnl.gov>
Closes: #16547
Closes: #17231
(cherry picked from commit f40ab9e399)
This commit is contained in:
@@ -7430,6 +7430,82 @@ spa_vdev_add(spa_t *spa, nvlist_t *nvroot, boolean_t check_ashift)
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Given a vdev to be replaced and its parent, check for a possible
|
||||
* "double spare" condition if a vdev is to be replaced by a spare. When this
|
||||
* happens, you can get two spares assigned to one failed vdev.
|
||||
*
|
||||
* To trigger a double spare condition:
|
||||
*
|
||||
* 1. disk1 fails
|
||||
* 2. 1st spare is kicked in for disk1 and it resilvers
|
||||
* 3. Someone replaces disk1 with a new blank disk
|
||||
* 4. New blank disk starts resilvering
|
||||
* 5. While resilvering, new blank disk has IO errors and faults
|
||||
* 6. 2nd spare is kicked in for new blank disk
|
||||
* 7. At this point two spares are kicked in for the original disk1.
|
||||
*
|
||||
* It looks like this:
|
||||
*
|
||||
* NAME STATE READ WRITE CKSUM
|
||||
* tank2 DEGRADED 0 0 0
|
||||
* draid2:6d:10c:2s-0 DEGRADED 0 0 0
|
||||
* scsi-0QEMU_QEMU_HARDDISK_d1 ONLINE 0 0 0
|
||||
* scsi-0QEMU_QEMU_HARDDISK_d2 ONLINE 0 0 0
|
||||
* scsi-0QEMU_QEMU_HARDDISK_d3 ONLINE 0 0 0
|
||||
* scsi-0QEMU_QEMU_HARDDISK_d4 ONLINE 0 0 0
|
||||
* scsi-0QEMU_QEMU_HARDDISK_d5 ONLINE 0 0 0
|
||||
* scsi-0QEMU_QEMU_HARDDISK_d6 ONLINE 0 0 0
|
||||
* scsi-0QEMU_QEMU_HARDDISK_d7 ONLINE 0 0 0
|
||||
* scsi-0QEMU_QEMU_HARDDISK_d8 ONLINE 0 0 0
|
||||
* scsi-0QEMU_QEMU_HARDDISK_d9 ONLINE 0 0 0
|
||||
* spare-9 DEGRADED 0 0 0
|
||||
* replacing-0 DEGRADED 0 93 0
|
||||
* scsi-0QEMU_QEMU_HARDDISK_d10-part1/old UNAVAIL 0 0 0
|
||||
* spare-1 DEGRADED 0 0 0
|
||||
* scsi-0QEMU_QEMU_HARDDISK_d10 REMOVED 0 0 0
|
||||
* draid2-0-0 ONLINE 0 0 0
|
||||
* draid2-0-1 ONLINE 0 0 0
|
||||
* spares
|
||||
* draid2-0-0 INUSE currently in use
|
||||
* draid2-0-1 INUSE currently in use
|
||||
*
|
||||
* ARGS:
|
||||
*
|
||||
* newvd: New spare disk
|
||||
* pvd: Parent vdev_t the spare should attach to
|
||||
*
|
||||
* This function returns B_TRUE if adding the new vdev would create a double
|
||||
* spare condition, B_FALSE otherwise.
|
||||
*/
|
||||
static boolean_t
|
||||
spa_vdev_new_spare_would_cause_double_spares(vdev_t *newvd, vdev_t *pvd)
|
||||
{
|
||||
vdev_t *ppvd;
|
||||
|
||||
ppvd = pvd->vdev_parent;
|
||||
if (ppvd == NULL)
|
||||
return (B_FALSE);
|
||||
|
||||
/*
|
||||
* To determine if this configuration would cause a double spare, we
|
||||
* look at the vdev_op of the parent vdev, and of the parent's parent
|
||||
* vdev. We also look at vdev_isspare on the new disk. A double spare
|
||||
* condition looks like this:
|
||||
*
|
||||
* 1. parent of parent's op is a spare or draid spare
|
||||
* 2. parent's op is replacing
|
||||
* 3. new disk is a spare
|
||||
*/
|
||||
if ((ppvd->vdev_ops == &vdev_spare_ops) ||
|
||||
(ppvd->vdev_ops == &vdev_draid_spare_ops))
|
||||
if (pvd->vdev_ops == &vdev_replacing_ops)
|
||||
if (newvd->vdev_isspare)
|
||||
return (B_TRUE);
|
||||
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Attach a device to a vdev specified by its guid. The vdev type can be
|
||||
* a mirror, a raidz, or a leaf device that is also a top-level (e.g. a
|
||||
@@ -7604,6 +7680,12 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing,
|
||||
return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
|
||||
}
|
||||
|
||||
if (spa_vdev_new_spare_would_cause_double_spares(newvd, pvd)) {
|
||||
vdev_dbgmsg(newvd,
|
||||
"disk would create double spares, ignore.");
|
||||
return (spa_vdev_exit(spa, newrootvd, txg, EEXIST));
|
||||
}
|
||||
|
||||
if (newvd->vdev_isspare)
|
||||
pvops = &vdev_spare_ops;
|
||||
else
|
||||
|
||||
Reference in New Issue
Block a user