diff --git a/module/zfs/vdev_draid.c b/module/zfs/vdev_draid.c index 8c132fb5a..48e5fbd4b 100644 --- a/module/zfs/vdev_draid.c +++ b/module/zfs/vdev_draid.c @@ -1191,7 +1191,7 @@ vdev_draid_min_alloc(vdev_t *vd) } /* - * Returns true if the txg range does not exist on any leaf vdev. + * Returns false if the txg range exists on any leaf vdev, true otherwise. * * A dRAID spare does not fit into the DTL model. While it has child vdevs * there is no redundancy among them, and the effective child vdev is @@ -1932,34 +1932,15 @@ vdev_draid_io_start_read(zio_t *zio, raidz_row_t *rr) vdev_t *svd; /* - * Sequential rebuilds need to always consider the data - * on the child being rebuilt to be stale. This is - * important when all columns are available to aid - * known reconstruction in identifing which columns - * contain incorrect data. - * - * Furthermore, all repairs need to be constrained to - * the devices being rebuilt because without a checksum - * we cannot verify the data is actually correct and - * performing an incorrect repair could result in - * locking in damage and making the data unrecoverable. + * Repairs need to be constrained to the devices being + * rebuilt since without a checksum we cannot verify the + * data is actually correct and performing an incorrect + * repair could result in locking in the damage and + * making the data unrecoverable. */ - if (zio->io_priority == ZIO_PRIORITY_REBUILD) { - if (vdev_draid_rebuilding(cvd)) { - if (c >= rr->rr_firstdatacol) - rr->rr_missingdata++; - else - rr->rr_missingparity++; - rc->rc_error = SET_ERROR(ESTALE); - rc->rc_skipped = 1; - rc->rc_allow_repair = 1; - continue; - } else { - rc->rc_allow_repair = 0; - } - } else { - rc->rc_allow_repair = 1; - } + if (zio->io_priority == ZIO_PRIORITY_REBUILD && + !vdev_draid_rebuilding(cvd)) + rc->rc_allow_repair = 0; /* * If this child is a distributed spare then the diff --git a/module/zfs/vdev_mirror.c b/module/zfs/vdev_mirror.c index 18efdaac0..2048aa5a2 100644 --- a/module/zfs/vdev_mirror.c +++ b/module/zfs/vdev_mirror.c @@ -674,9 +674,14 @@ vdev_mirror_io_start(zio_t *zio) /* * When sequentially resilvering only issue write repair - * IOs to the vdev which is being rebuilt since performance - * is limited by the slowest child. This is an issue for - * faster replacement devices such as distributed spares. + * IOs to the vdev which is being rebuilt for two reasons: + * 1. The repair IO data calculated from parity has no checksum + * to validate and could be incorrect. Existing data must + * never be overwritten with unconfirmed data to ensure we + * never lock in unrecoverable damage to the pool. + * 2. Performance is limited by the slowest child device. We + * don't want a slower device to limit the rebuild rate for + * faster replacement devices such as distributed spares. */ if ((zio->io_priority == ZIO_PRIORITY_REBUILD) && (zio->io_flags & ZIO_FLAG_IO_REPAIR) && diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index 4cbde6a37..ea4d2b2f5 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -913,8 +913,9 @@ timeout = 1200 [tests/functional/redundancy] tests = ['redundancy_draid', 'redundancy_draid1', 'redundancy_draid2', 'redundancy_draid3', 'redundancy_draid_damaged1', - 'redundancy_draid_damaged2', 'redundancy_draid_spare1', - 'redundancy_draid_spare2', 'redundancy_draid_spare3', 'redundancy_mirror', + 'redundancy_draid_damaged2', 'redundancy_draid_degraded1', + 'redundancy_draid_spare1', 'redundancy_draid_spare2', + 'redundancy_draid_spare3', 'redundancy_mirror', 'redundancy_raidz', 'redundancy_raidz1', 'redundancy_raidz2', 'redundancy_raidz3', 'redundancy_stripe'] tags = ['functional', 'redundancy'] diff --git a/tests/test-runner/bin/zts-report.py.in b/tests/test-runner/bin/zts-report.py.in index ebfcea776..c9b0ddf58 100755 --- a/tests/test-runner/bin/zts-report.py.in +++ b/tests/test-runner/bin/zts-report.py.in @@ -252,8 +252,6 @@ maybe = { 'projectquota/setup': ['SKIP', exec_reason], 'raidz/raidz_002_pos': ['FAIL', known_reason], 'raidz/raidz_expand_001_pos': ['FAIL', 16421], - 'redundancy/redundancy_draid_spare1': ['FAIL', 18307], - 'redundancy/redundancy_draid_spare3': ['FAIL', 18319], 'removal/removal_condense_export': ['FAIL', known_reason], 'renameat2/setup': ['SKIP', renameat2_reason], 'reservation/reservation_008_pos': ['FAIL', 7741], diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index 5aeb9065a..c5a448ac9 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -1890,6 +1890,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/redundancy/redundancy_draid3.ksh \ functional/redundancy/redundancy_draid_damaged1.ksh \ functional/redundancy/redundancy_draid_damaged2.ksh \ + functional/redundancy/redundancy_draid_degraded1.ksh \ functional/redundancy/redundancy_draid.ksh \ functional/redundancy/redundancy_draid_spare1.ksh \ functional/redundancy/redundancy_draid_spare2.ksh \ diff --git a/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_degraded1.ksh b/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_degraded1.ksh new file mode 100755 index 000000000..ae65d3a21 --- /dev/null +++ b/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_degraded1.ksh @@ -0,0 +1,141 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2026 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib + +# +# DESCRIPTION: +# When sequentially resilvering a dRAID pool with multiple vdevs +# and N faulted vdevs, where N=parity, ensure that when another leaf +# is marked degraded the pool can still be sequentially resilvered +# without introducing new checksum errors. Note we've exhausted +# the available redundancy so no silent correction can be tolerated. +# +# STRATEGY: +# 1. Create block device files for the test draid pool +# 2. For each parity value [1..3] +# - create draid pool +# - fill it with some directories/files +# - fault N=parity vdevs eliminating any redundancy +# - force fault an additional vdev causing it to be degraded +# - replace the degraded (but online) vdev using a sequential +# resilver. The minimum pool redundancy requirements are met so +# reconstruction is possible when reading from all online vdevs. +# - verify that the draid spare was correctly reconstructed and +# no checksum errors were introduced. +# - destroy the draid pool +# + +typeset -r devs=7 +typeset -r dev_size_mb=512 + +typeset -a disks + +prefetch_disable=$(get_tunable PREFETCH_DISABLE) +rebuild_scrub_enabled=$(get_tunable REBUILD_SCRUB_ENABLED) + +function cleanup +{ + poolexists "$TESTPOOL" && destroy_pool "$TESTPOOL" + + for i in {0..$devs}; do + rm -f "$TEST_BASE_DIR/dev-$i" + done + + set_tunable32 PREFETCH_DISABLE $prefetch_disable + set_tunable32 REBUILD_SCRUB_ENABLED $rebuild_scrub_enabled +} + +function test_sequential_resilver # +{ + typeset pool=$1 + typeset nparity=$2 + typeset dir=$3 + + # Fault N=parity devices + for (( i=0; i<$nparity; i=i+1 )); do + log_must zpool offline -f $pool $dir/dev-$i + done + + # Parity is exhausted, faulting another device marks it degraded + log_must zpool offline -f $pool $dir/dev-$nparity + + # Replace the degraded vdev with a distributed spare + spare=draid${nparity}-0-0 + log_must zpool replace -fsw $pool $dir/dev-$nparity $spare + + log_must zpool scrub -w $pool + log_must zpool status $pool + + log_must check_pool_status $pool "scan" "repaired 0B" + log_must check_pool_status $pool "errors" "No known data errors" + log_must check_pool_status $pool "scan" "with 0 errors" +} + +log_onexit cleanup + +log_must set_tunable32 PREFETCH_DISABLE 1 +log_must set_tunable32 REBUILD_SCRUB_ENABLED 0 + +# Disk files which will be used by pool +for i in {0..$(($devs - 1))}; do + device=$TEST_BASE_DIR/dev-$i + log_must truncate -s ${dev_size_mb}M $device + disks[${#disks[*]}+1]=$device +done + +# Disk file which will be attached +log_must truncate -s 512M $TEST_BASE_DIR/dev-$devs + +for nparity in 1 2 3; do + raid=draid${nparity}:${nparity}s + dir=$TEST_BASE_DIR + + log_must zpool create -O compression=off -f -o cachefile=none $TESTPOOL $raid ${disks[@]} + log_must zfs set primarycache=metadata $TESTPOOL + + log_must zfs create $TESTPOOL/fs + log_must fill_fs /$TESTPOOL/fs 1 512 102400 1 R + + log_must zfs create -o compress=on $TESTPOOL/fs2 + log_must fill_fs /$TESTPOOL/fs2 1 512 102400 1 R + + log_must zfs create -o compress=on -o recordsize=8k $TESTPOOL/fs3 + log_must fill_fs /$TESTPOOL/fs3 1 512 102400 1 R + + log_must zpool export $TESTPOOL + log_must zpool import -o cachefile=none -d $dir $TESTPOOL + + log_must check_pool_status $TESTPOOL "errors" "No known data errors" + + test_sequential_resilver $TESTPOOL $nparity $dir + + log_must zpool destroy "$TESTPOOL" +done + +log_pass "draid degraded device(s) test succeeded."