draid: allow seq resilver reads from degraded vdevs

When sequentially resilvering allow a dRAID child to be read
as long as the DTLs indicate it should have a good copy of the
data and the leaf isn't being rebuilt.  The previous check was
slightly too broad and would skip dRAID spare and replacing
vdevs if one of their children was being replaced.  As long
as there exists enough additional redundancy this is fine, but
when there isn't this vdev must be read in order to correctly
reconstruct the missing data.

A new test case has been added which exhausts the available
redundancy, faults another device causing it to be degraded,
and then performs a sequential resilver for the degraded device.
In such a situation enough redundancy exists to perform the
replacement and a scrub should detect no checksum errors.

Reviewed-by: Alexander Motin <alexander.motin@TrueNAS.com>
Reviewed-by: Andriy Tkachuk <andriy.tkachuk@seagate.com>
Reviewed-by: Akash B <akash-b@hpe.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #18405
This commit is contained in:
Brian Behlendorf
2026-04-07 10:48:27 -07:00
committed by Tony Hutter
parent 63b8da8ff7
commit e9a8c6e080
6 changed files with 162 additions and 35 deletions
+3 -2
View File
@@ -913,8 +913,9 @@ timeout = 1200
[tests/functional/redundancy]
tests = ['redundancy_draid', 'redundancy_draid1', 'redundancy_draid2',
'redundancy_draid3', 'redundancy_draid_damaged1',
'redundancy_draid_damaged2', 'redundancy_draid_spare1',
'redundancy_draid_spare2', 'redundancy_draid_spare3', 'redundancy_mirror',
'redundancy_draid_damaged2', 'redundancy_draid_degraded1',
'redundancy_draid_spare1', 'redundancy_draid_spare2',
'redundancy_draid_spare3', 'redundancy_mirror',
'redundancy_raidz', 'redundancy_raidz1', 'redundancy_raidz2',
'redundancy_raidz3', 'redundancy_stripe']
tags = ['functional', 'redundancy']
-2
View File
@@ -252,8 +252,6 @@ maybe = {
'projectquota/setup': ['SKIP', exec_reason],
'raidz/raidz_002_pos': ['FAIL', known_reason],
'raidz/raidz_expand_001_pos': ['FAIL', 16421],
'redundancy/redundancy_draid_spare1': ['FAIL', 18307],
'redundancy/redundancy_draid_spare3': ['FAIL', 18319],
'removal/removal_condense_export': ['FAIL', known_reason],
'renameat2/setup': ['SKIP', renameat2_reason],
'reservation/reservation_008_pos': ['FAIL', 7741],
+1
View File
@@ -1890,6 +1890,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/redundancy/redundancy_draid3.ksh \
functional/redundancy/redundancy_draid_damaged1.ksh \
functional/redundancy/redundancy_draid_damaged2.ksh \
functional/redundancy/redundancy_draid_degraded1.ksh \
functional/redundancy/redundancy_draid.ksh \
functional/redundancy/redundancy_draid_spare1.ksh \
functional/redundancy/redundancy_draid_spare2.ksh \
@@ -0,0 +1,141 @@
#!/bin/ksh -p
# SPDX-License-Identifier: CDDL-1.0
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2026 by Lawrence Livermore National Security, LLC.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
#
# DESCRIPTION:
# When sequentially resilvering a dRAID pool with multiple vdevs
# and N faulted vdevs, where N=parity, ensure that when another leaf
# is marked degraded the pool can still be sequentially resilvered
# without introducing new checksum errors. Note we've exhausted
# the available redundancy so no silent correction can be tolerated.
#
# STRATEGY:
# 1. Create block device files for the test draid pool
# 2. For each parity value [1..3]
# - create draid pool
# - fill it with some directories/files
# - fault N=parity vdevs eliminating any redundancy
# - force fault an additional vdev causing it to be degraded
# - replace the degraded (but online) vdev using a sequential
# resilver. The minimum pool redundancy requirements are met so
# reconstruction is possible when reading from all online vdevs.
# - verify that the draid spare was correctly reconstructed and
# no checksum errors were introduced.
# - destroy the draid pool
#
typeset -r devs=7
typeset -r dev_size_mb=512
typeset -a disks
prefetch_disable=$(get_tunable PREFETCH_DISABLE)
rebuild_scrub_enabled=$(get_tunable REBUILD_SCRUB_ENABLED)
function cleanup
{
poolexists "$TESTPOOL" && destroy_pool "$TESTPOOL"
for i in {0..$devs}; do
rm -f "$TEST_BASE_DIR/dev-$i"
done
set_tunable32 PREFETCH_DISABLE $prefetch_disable
set_tunable32 REBUILD_SCRUB_ENABLED $rebuild_scrub_enabled
}
function test_sequential_resilver # <pool> <parity> <dir>
{
typeset pool=$1
typeset nparity=$2
typeset dir=$3
# Fault N=parity devices
for (( i=0; i<$nparity; i=i+1 )); do
log_must zpool offline -f $pool $dir/dev-$i
done
# Parity is exhausted, faulting another device marks it degraded
log_must zpool offline -f $pool $dir/dev-$nparity
# Replace the degraded vdev with a distributed spare
spare=draid${nparity}-0-0
log_must zpool replace -fsw $pool $dir/dev-$nparity $spare
log_must zpool scrub -w $pool
log_must zpool status $pool
log_must check_pool_status $pool "scan" "repaired 0B"
log_must check_pool_status $pool "errors" "No known data errors"
log_must check_pool_status $pool "scan" "with 0 errors"
}
log_onexit cleanup
log_must set_tunable32 PREFETCH_DISABLE 1
log_must set_tunable32 REBUILD_SCRUB_ENABLED 0
# Disk files which will be used by pool
for i in {0..$(($devs - 1))}; do
device=$TEST_BASE_DIR/dev-$i
log_must truncate -s ${dev_size_mb}M $device
disks[${#disks[*]}+1]=$device
done
# Disk file which will be attached
log_must truncate -s 512M $TEST_BASE_DIR/dev-$devs
for nparity in 1 2 3; do
raid=draid${nparity}:${nparity}s
dir=$TEST_BASE_DIR
log_must zpool create -O compression=off -f -o cachefile=none $TESTPOOL $raid ${disks[@]}
log_must zfs set primarycache=metadata $TESTPOOL
log_must zfs create $TESTPOOL/fs
log_must fill_fs /$TESTPOOL/fs 1 512 102400 1 R
log_must zfs create -o compress=on $TESTPOOL/fs2
log_must fill_fs /$TESTPOOL/fs2 1 512 102400 1 R
log_must zfs create -o compress=on -o recordsize=8k $TESTPOOL/fs3
log_must fill_fs /$TESTPOOL/fs3 1 512 102400 1 R
log_must zpool export $TESTPOOL
log_must zpool import -o cachefile=none -d $dir $TESTPOOL
log_must check_pool_status $TESTPOOL "errors" "No known data errors"
test_sequential_resilver $TESTPOOL $nparity $dir
log_must zpool destroy "$TESTPOOL"
done
log_pass "draid degraded device(s) test succeeded."