mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-04-06 17:49:11 +03:00
Restrict raidz faulted vdev count
Specifically, a child in a replacing vdev won't count when assessing the dtl during a vdev_fault() Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Tino Reichardt <milky-zfs@mcmilk.de> Signed-off-by: Don Brady <don.brady@klarasystems.com> Closes #16569
This commit is contained in:
parent
7e957fde73
commit
141368a4b6
@ -3149,9 +3149,9 @@ vdev_dtl_should_excise(vdev_t *vd, boolean_t rebuild_done)
|
|||||||
* Reassess DTLs after a config change or scrub completion. If txg == 0 no
|
* Reassess DTLs after a config change or scrub completion. If txg == 0 no
|
||||||
* write operations will be issued to the pool.
|
* write operations will be issued to the pool.
|
||||||
*/
|
*/
|
||||||
void
|
static void
|
||||||
vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
|
vdev_dtl_reassess_impl(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
|
||||||
boolean_t scrub_done, boolean_t rebuild_done)
|
boolean_t scrub_done, boolean_t rebuild_done, boolean_t faulting)
|
||||||
{
|
{
|
||||||
spa_t *spa = vd->vdev_spa;
|
spa_t *spa = vd->vdev_spa;
|
||||||
avl_tree_t reftree;
|
avl_tree_t reftree;
|
||||||
@ -3160,8 +3160,8 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
|
|||||||
ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0);
|
ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0);
|
||||||
|
|
||||||
for (int c = 0; c < vd->vdev_children; c++)
|
for (int c = 0; c < vd->vdev_children; c++)
|
||||||
vdev_dtl_reassess(vd->vdev_child[c], txg,
|
vdev_dtl_reassess_impl(vd->vdev_child[c], txg,
|
||||||
scrub_txg, scrub_done, rebuild_done);
|
scrub_txg, scrub_done, rebuild_done, faulting);
|
||||||
|
|
||||||
if (vd == spa->spa_root_vdev || !vdev_is_concrete(vd) || vd->vdev_aux)
|
if (vd == spa->spa_root_vdev || !vdev_is_concrete(vd) || vd->vdev_aux)
|
||||||
return;
|
return;
|
||||||
@ -3255,11 +3255,21 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
|
|||||||
if (scrub_done)
|
if (scrub_done)
|
||||||
range_tree_vacate(vd->vdev_dtl[DTL_SCRUB], NULL, NULL);
|
range_tree_vacate(vd->vdev_dtl[DTL_SCRUB], NULL, NULL);
|
||||||
range_tree_vacate(vd->vdev_dtl[DTL_OUTAGE], NULL, NULL);
|
range_tree_vacate(vd->vdev_dtl[DTL_OUTAGE], NULL, NULL);
|
||||||
if (!vdev_readable(vd))
|
|
||||||
|
/*
|
||||||
|
* For the faulting case, treat members of a replacing vdev
|
||||||
|
* as if they are not available. It's more likely than not that
|
||||||
|
* a vdev in a replacing vdev could encounter read errors so
|
||||||
|
* treat it as not being able to contribute.
|
||||||
|
*/
|
||||||
|
if (!vdev_readable(vd) ||
|
||||||
|
(faulting && vd->vdev_parent != NULL &&
|
||||||
|
vd->vdev_parent->vdev_ops == &vdev_replacing_ops)) {
|
||||||
range_tree_add(vd->vdev_dtl[DTL_OUTAGE], 0, -1ULL);
|
range_tree_add(vd->vdev_dtl[DTL_OUTAGE], 0, -1ULL);
|
||||||
else
|
} else {
|
||||||
range_tree_walk(vd->vdev_dtl[DTL_MISSING],
|
range_tree_walk(vd->vdev_dtl[DTL_MISSING],
|
||||||
range_tree_add, vd->vdev_dtl[DTL_OUTAGE]);
|
range_tree_add, vd->vdev_dtl[DTL_OUTAGE]);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the vdev was resilvering or rebuilding and no longer
|
* If the vdev was resilvering or rebuilding and no longer
|
||||||
@ -3321,6 +3331,14 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
|
||||||
|
boolean_t scrub_done, boolean_t rebuild_done)
|
||||||
|
{
|
||||||
|
return (vdev_dtl_reassess_impl(vd, txg, scrub_txg, scrub_done,
|
||||||
|
rebuild_done, B_FALSE));
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Iterate over all the vdevs except spare, and post kobj events
|
* Iterate over all the vdevs except spare, and post kobj events
|
||||||
*/
|
*/
|
||||||
@ -3548,7 +3566,11 @@ vdev_dtl_sync(vdev_t *vd, uint64_t txg)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Determine whether the specified vdev can be offlined/detached/removed
|
* Determine whether the specified vdev can be
|
||||||
|
* - offlined
|
||||||
|
* - detached
|
||||||
|
* - removed
|
||||||
|
* - faulted
|
||||||
* without losing data.
|
* without losing data.
|
||||||
*/
|
*/
|
||||||
boolean_t
|
boolean_t
|
||||||
@ -3558,6 +3580,7 @@ vdev_dtl_required(vdev_t *vd)
|
|||||||
vdev_t *tvd = vd->vdev_top;
|
vdev_t *tvd = vd->vdev_top;
|
||||||
uint8_t cant_read = vd->vdev_cant_read;
|
uint8_t cant_read = vd->vdev_cant_read;
|
||||||
boolean_t required;
|
boolean_t required;
|
||||||
|
boolean_t faulting = vd->vdev_state == VDEV_STATE_FAULTED;
|
||||||
|
|
||||||
ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
|
ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
|
||||||
|
|
||||||
@ -3570,10 +3593,10 @@ vdev_dtl_required(vdev_t *vd)
|
|||||||
* If not, we can safely offline/detach/remove the device.
|
* If not, we can safely offline/detach/remove the device.
|
||||||
*/
|
*/
|
||||||
vd->vdev_cant_read = B_TRUE;
|
vd->vdev_cant_read = B_TRUE;
|
||||||
vdev_dtl_reassess(tvd, 0, 0, B_FALSE, B_FALSE);
|
vdev_dtl_reassess_impl(tvd, 0, 0, B_FALSE, B_FALSE, faulting);
|
||||||
required = !vdev_dtl_empty(tvd, DTL_OUTAGE);
|
required = !vdev_dtl_empty(tvd, DTL_OUTAGE);
|
||||||
vd->vdev_cant_read = cant_read;
|
vd->vdev_cant_read = cant_read;
|
||||||
vdev_dtl_reassess(tvd, 0, 0, B_FALSE, B_FALSE);
|
vdev_dtl_reassess_impl(tvd, 0, 0, B_FALSE, B_FALSE, faulting);
|
||||||
|
|
||||||
if (!required && zio_injection_enabled) {
|
if (!required && zio_injection_enabled) {
|
||||||
required = !!zio_handle_device_injection(vd, NULL,
|
required = !!zio_handle_device_injection(vd, NULL,
|
||||||
|
@ -125,7 +125,8 @@ tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_online_002_pos',
|
|||||||
'auto_replace_001_pos', 'auto_replace_002_pos', 'auto_spare_001_pos',
|
'auto_replace_001_pos', 'auto_replace_002_pos', 'auto_spare_001_pos',
|
||||||
'auto_spare_002_pos', 'auto_spare_multiple', 'auto_spare_ashift',
|
'auto_spare_002_pos', 'auto_spare_multiple', 'auto_spare_ashift',
|
||||||
'auto_spare_shared', 'decrypt_fault', 'decompress_fault',
|
'auto_spare_shared', 'decrypt_fault', 'decompress_fault',
|
||||||
'scrub_after_resilver', 'suspend_resume_single', 'zpool_status_-s']
|
'fault_limits', 'scrub_after_resilver', 'suspend_resume_single',
|
||||||
|
'zpool_status_-s']
|
||||||
tags = ['functional', 'fault']
|
tags = ['functional', 'fault']
|
||||||
|
|
||||||
[tests/functional/features/large_dnode:Linux]
|
[tests/functional/features/large_dnode:Linux]
|
||||||
|
@ -1525,6 +1525,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
|
|||||||
functional/fault/cleanup.ksh \
|
functional/fault/cleanup.ksh \
|
||||||
functional/fault/decompress_fault.ksh \
|
functional/fault/decompress_fault.ksh \
|
||||||
functional/fault/decrypt_fault.ksh \
|
functional/fault/decrypt_fault.ksh \
|
||||||
|
functional/fault/fault_limits.ksh \
|
||||||
functional/fault/scrub_after_resilver.ksh \
|
functional/fault/scrub_after_resilver.ksh \
|
||||||
functional/fault/suspend_resume_single.ksh \
|
functional/fault/suspend_resume_single.ksh \
|
||||||
functional/fault/setup.ksh \
|
functional/fault/setup.ksh \
|
||||||
|
96
tests/zfs-tests/tests/functional/fault/fault_limits.ksh
Executable file
96
tests/zfs-tests/tests/functional/fault/fault_limits.ksh
Executable file
@ -0,0 +1,96 @@
|
|||||||
|
#!/bin/ksh -p
|
||||||
|
#
|
||||||
|
# CDDL HEADER START
|
||||||
|
#
|
||||||
|
# The contents of this file are subject to the terms of the
|
||||||
|
# Common Development and Distribution License (the "License").
|
||||||
|
# You may not use this file except in compliance with the License.
|
||||||
|
#
|
||||||
|
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||||
|
# or https://opensource.org/licenses/CDDL-1.0.
|
||||||
|
# See the License for the specific language governing permissions
|
||||||
|
# and limitations under the License.
|
||||||
|
#
|
||||||
|
# When distributing Covered Code, include this CDDL HEADER in each
|
||||||
|
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||||
|
# If applicable, add the following below this CDDL HEADER, with the
|
||||||
|
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||||
|
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||||
|
#
|
||||||
|
# CDDL HEADER END
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# Copyright (c) 2024 by Klara, Inc.
|
||||||
|
#
|
||||||
|
|
||||||
|
. $STF_SUITE/include/libtest.shlib
|
||||||
|
. $STF_SUITE/tests/functional/fault/fault.cfg
|
||||||
|
|
||||||
|
#
|
||||||
|
# DESCRIPTION: Verify that raidz children vdev fault count is restricted
|
||||||
|
#
|
||||||
|
# STRATEGY:
|
||||||
|
# 1. Create a raidz2 or raidz3 pool and add some data to it
|
||||||
|
# 2. Replace one of the child vdevs to create a replacing vdev
|
||||||
|
# 3. While it is resilvering, attempt to fault disks
|
||||||
|
# 4. Verify that less than parity count was faulted while replacing
|
||||||
|
#
|
||||||
|
|
||||||
|
TESTPOOL="fault-test-pool"
|
||||||
|
PARITY=$((RANDOM%(2) + 2))
|
||||||
|
VDEV_CNT=$((4 + (2 * PARITY)))
|
||||||
|
VDEV_SIZ=512M
|
||||||
|
|
||||||
|
function cleanup
|
||||||
|
{
|
||||||
|
poolexists "$TESTPOOL" && log_must_busy zpool destroy "$TESTPOOL"
|
||||||
|
|
||||||
|
for i in {0..$((VDEV_CNT - 1))}; do
|
||||||
|
log_must rm -f "$TEST_BASE_DIR/dev-$i"
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
log_onexit cleanup
|
||||||
|
log_assert "restricts raidz children vdev fault count"
|
||||||
|
|
||||||
|
log_note "creating $VDEV_CNT vdevs for parity $PARITY test"
|
||||||
|
typeset -a disks
|
||||||
|
for i in {0..$((VDEV_CNT - 1))}; do
|
||||||
|
device=$TEST_BASE_DIR/dev-$i
|
||||||
|
log_must truncate -s $VDEV_SIZ $device
|
||||||
|
disks[${#disks[*]}+1]=$device
|
||||||
|
done
|
||||||
|
|
||||||
|
log_must zpool create -f ${TESTPOOL} raidz${PARITY} ${disks[1..$((VDEV_CNT - 1))]}
|
||||||
|
|
||||||
|
# Add some data to the pool
|
||||||
|
log_must zfs create $TESTPOOL/fs
|
||||||
|
MNTPOINT="$(get_prop mountpoint $TESTPOOL/fs)"
|
||||||
|
log_must fill_fs $MNTPOINT $PARITY 200 32768 1000 Z
|
||||||
|
sync_pool $TESTPOOL
|
||||||
|
|
||||||
|
# Replace the last child vdev to form a replacing vdev
|
||||||
|
log_must zpool replace ${TESTPOOL} ${disks[$((VDEV_CNT - 1))]} ${disks[$VDEV_CNT]}
|
||||||
|
# imediately offline replacement disk to keep replacing vdev around
|
||||||
|
log_must zpool offline ${TESTPOOL} ${disks[$VDEV_CNT]}
|
||||||
|
|
||||||
|
# Fault disks while a replacing vdev is still active
|
||||||
|
for disk in ${disks[0..$PARITY]}; do
|
||||||
|
log_must zpool offline -tf ${TESTPOOL} $disk
|
||||||
|
done
|
||||||
|
|
||||||
|
zpool status $TESTPOOL
|
||||||
|
|
||||||
|
# Count the faults that succeeded
|
||||||
|
faults=0
|
||||||
|
for disk in ${disks[0..$PARITY]}; do
|
||||||
|
state=$(zpool get -H -o value state ${TESTPOOL} ${disk})
|
||||||
|
if [ "$state" = "FAULTED" ] ; then
|
||||||
|
((faults=faults+1))
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
log_must test "$faults" -lt "$PARITY"
|
||||||
|
log_must test "$faults" -gt 0
|
||||||
|
|
||||||
|
log_pass "restricts raidz children vdev fault count"
|
Loading…
Reference in New Issue
Block a user