mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 02:27:36 +03:00
Defer new resilvers until the current one ends
Currently, if a resilver is triggered for any reason while an existing one is running, zfs will immediately restart the existing resilver from the beginning to include the new drive. This causes problems for system administrators when a drive fails while another is already resilvering. In this case, the optimal thing to do to reduce risk of data loss is to wait for the current resilver to end before immediately replacing the second failed drive, which allows the system to operate with two incomplete drives for the minimum amount of time. This patch introduces the resilver_defer feature that essentially does this for the admin without forcing them to wait and monitor the resilver manually. The change requires an on-disk feature since we must mark drives that are part of a deferred resilver in the vdev config to ensure that we do not assume they are done resilvering when an existing resilver completes. Reviewed-by: Matthew Ahrens <mahrens@delphix.com> Reviewed-by: John Kennedy <john.kennedy@delphix.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: @mmaybee Signed-off-by: Tom Caputi <tcaputi@datto.com> Closes #7732
This commit is contained in:
committed by
Brian Behlendorf
parent
9f438c5f94
commit
80a91e7469
@@ -421,6 +421,10 @@ tags = ['functional', 'cli_root', 'zpool_reopen']
|
||||
tests = ['zpool_replace_001_neg', 'replace-o_ashift', 'replace_prop_ashift']
|
||||
tags = ['functional', 'cli_root', 'zpool_replace']
|
||||
|
||||
[tests/functional/cli_root/zpool_resilver]
|
||||
tests = ['zpool_resilver_bad_args', 'zpool_resilver_restart']
|
||||
tags = ['functional', 'cli_root', 'zpool_resilver']
|
||||
|
||||
[tests/functional/cli_root/zpool_scrub]
|
||||
tests = ['zpool_scrub_001_neg', 'zpool_scrub_002_pos', 'zpool_scrub_003_pos',
|
||||
'zpool_scrub_004_pos', 'zpool_scrub_005_pos',
|
||||
|
||||
@@ -52,6 +52,7 @@ SUBDIRS = \
|
||||
zpool_remove \
|
||||
zpool_reopen \
|
||||
zpool_replace \
|
||||
zpool_resilver \
|
||||
zpool_scrub \
|
||||
zpool_set \
|
||||
zpool_split \
|
||||
|
||||
@@ -87,5 +87,6 @@ if is_linux; then
|
||||
"feature@encryption"
|
||||
"feature@project_quota"
|
||||
"feature@allocation_classes"
|
||||
"feature@resilver_defer"
|
||||
)
|
||||
fi
|
||||
|
||||
@@ -115,3 +115,10 @@ function is_scan_restarted #pool
|
||||
zpool history -i $pool | grep -q "scan aborted, restarting"
|
||||
return $?
|
||||
}
|
||||
|
||||
function is_deferred_scan_started #pool
|
||||
{
|
||||
typeset pool=$1
|
||||
zpool history -i $pool | grep -q "starting deferred resilver"
|
||||
return $?
|
||||
}
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
# 4. Execute scrub.
|
||||
# 5. "Plug back" disk.
|
||||
# 6. Reopen a pool with an -n flag.
|
||||
# 7. Check if scrub scan is NOT replaced by resilver.
|
||||
# 7. Check if resilver was deferred.
|
||||
# 8. Check if trying to put device to offline fails because of no valid
|
||||
# replicas.
|
||||
#
|
||||
@@ -75,11 +75,12 @@ log_must check_state $TESTPOOL "$REMOVED_DISK_ID" "online"
|
||||
log_must zinject -c all
|
||||
# 7. Check if scrub scan is NOT replaced by resilver.
|
||||
log_must wait_for_scrub_end $TESTPOOL $MAXTIMEOUT
|
||||
log_mustnot is_scan_restarted $TESTPOOL
|
||||
log_must is_deferred_scan_started $TESTPOOL
|
||||
|
||||
# 8. Check if trying to put device to offline fails because of no valid
|
||||
# replicas.
|
||||
log_mustnot zpool offline $TESTPOOL $DISK2
|
||||
log_must wait_for_resilver_end $TESTPOOL $MAXTIMEOUT
|
||||
log_must zpool offline $TESTPOOL $DISK2
|
||||
|
||||
# clean up
|
||||
log_must zpool destroy $TESTPOOL
|
||||
|
||||
@@ -72,13 +72,13 @@ log_must zinject -d $REMOVED_DISK_ID -D25:1 $TESTPOOL
|
||||
log_must wait_for_resilver_start $TESTPOOL $MAXTIMEOUT
|
||||
|
||||
# 6. Reopen a pool again with -n flag.
|
||||
zpool reopen -n $TESTPOOL
|
||||
log_must zpool reopen -n $TESTPOOL
|
||||
|
||||
# 7. Wait until resilvering is finished and check if it was restarted.
|
||||
log_must wait_for_resilver_end $TESTPOOL $MAXTIMEOUT
|
||||
# remove delay from disk
|
||||
log_must zinject -c all
|
||||
log_must is_scan_restarted $TESTPOOL
|
||||
log_mustnot is_scan_restarted $TESTPOOL
|
||||
|
||||
# clean up
|
||||
log_must zpool destroy $TESTPOOL
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cli_root/zpool_resilver
|
||||
dist_pkgdata_SCRIPTS = \
|
||||
setup.ksh \
|
||||
cleanup.ksh \
|
||||
zpool_resilver_bad_args.ksh \
|
||||
zpool_resilver_restart.ksh
|
||||
|
||||
dist_pkgdata_DATA = \
|
||||
zpool_resilver.cfg
|
||||
@@ -0,0 +1,33 @@
|
||||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or http://www.opensolaris.org/os/licensing.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
# Use is subject to license terms.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/cli_root/zpool_scrub/zpool_scrub.cfg
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
destroy_mirrors
|
||||
@@ -0,0 +1,39 @@
|
||||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or http://www.opensolaris.org/os/licensing.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2018 by Datto. All rights reserved.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/cli_root/zpool_resilver/zpool_resilver.cfg
|
||||
|
||||
verify_runnable "global"
|
||||
verify_disk_count "$DISKS" 3
|
||||
|
||||
default_mirror_setup_noexit $DISK1 $DISK2 $DISK3
|
||||
|
||||
mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS)
|
||||
|
||||
# Create 256M of data
|
||||
log_must file_write -b 1048576 -c 256 -o create -d 0 -f $mntpnt/bigfile
|
||||
log_pass
|
||||
@@ -0,0 +1,33 @@
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or http://www.opensolaris.org/os/licensing.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2018 by Datto. All rights reserved.
|
||||
#
|
||||
|
||||
export DISK1=$(echo $DISKS | nawk '{print $1}')
|
||||
export DISK2=$(echo $DISKS | nawk '{print $2}')
|
||||
export DISK3=$(echo $DISKS | nawk '{print $3}')
|
||||
|
||||
export ZFS_SCAN_VDEV_LIMIT_SLOW=$((128*1024))
|
||||
export ZFS_SCAN_VDEV_LIMIT_DEFAULT=$((4*1024*1024))
|
||||
|
||||
export MAXTIMEOUT=80
|
||||
+58
@@ -0,0 +1,58 @@
|
||||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or http://www.opensolaris.org/os/licensing.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2018 by Datto. All rights reserved.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# A badly formed parameter passed to 'zpool resilver' should
|
||||
# return an error.
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create an array containing bad 'zpool reilver' parameters.
|
||||
# 2. For each element, execute the sub-command.
|
||||
# 3. Verify it returns an error.
|
||||
#
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
set -A args "" "-?" "blah blah" "-%" "--?" "-*" "-=" \
|
||||
"-a" "-b" "-c" "-d" "-e" "-f" "-g" "-h" "-i" "-j" "-k" "-l" \
|
||||
"-m" "-n" "-o" "-p" "-q" "-r" "-s" "-t" "-u" "-v" "-w" "-x" "-y" "-z" \
|
||||
"-A" "-B" "-C" "-D" "-E" "-F" "-G" "-H" "-I" "-J" "-K" "-L" \
|
||||
"-M" "-N" "-O" "-P" "-Q" "-R" "-S" "-T" "-U" "-V" "-W" "-X" "-W" "-Z"
|
||||
|
||||
|
||||
log_assert "Execute 'zpool resilver' using invalid parameters."
|
||||
|
||||
typeset -i i=0
|
||||
while [[ $i -lt ${#args[*]} ]]; do
|
||||
log_mustnot zpool resilver ${args[i]}
|
||||
|
||||
((i = i + 1))
|
||||
done
|
||||
|
||||
log_pass "Badly formed 'zpool resilver' parameters fail as expected."
|
||||
+95
@@ -0,0 +1,95 @@
|
||||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or http://www.opensolaris.org/os/licensing.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2018 Datto Inc.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/cli_root/zpool_reopen/zpool_reopen.shlib
|
||||
. $STF_SUITE/tests/functional/cli_root/zpool_resilver/zpool_resilver.cfg
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# "Verify 'zpool resilver' restarts in-progress resilvers"
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Write some data and detatch the first drive so it has resilver
|
||||
# work to do
|
||||
# 2. Repeat the process with a second disk
|
||||
# 3. Reattach the drives, causing the second drive's resilver to be
|
||||
# deferred
|
||||
# 4. Manually restart the resilver with all drives
|
||||
#
|
||||
# NOTES:
|
||||
# Artificially limit the scrub speed by setting the zfs_scan_vdev_limit
|
||||
# low and adding a 50ms zio delay in order to ensure that the resilver
|
||||
# does not complete early.
|
||||
#
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
function cleanup
|
||||
{
|
||||
log_must zinject -c all
|
||||
log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_DEFAULT
|
||||
log_must rm -f $mntpnt/biggerfile1
|
||||
log_must rm -f $mntpnt/biggerfile2
|
||||
}
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
log_assert "Verify 'zpool resilver' restarts in-progress resilvers"
|
||||
|
||||
mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS)
|
||||
|
||||
# 1. Write some data and detatch the first drive so it has resilver work to do
|
||||
log_must file_write -b 524288 -c 1024 -o create -d 0 -f $mntpnt/biggerfile1
|
||||
log_must sync
|
||||
log_must zpool detach $TESTPOOL $DISK2
|
||||
|
||||
# 2. Repeat the process with a second disk
|
||||
log_must file_write -b 524288 -c 1024 -o create -d 0 -f $mntpnt/biggerfile2
|
||||
log_must sync
|
||||
log_must zpool detach $TESTPOOL $DISK3
|
||||
|
||||
# 3. Reattach the drives, causing the second drive's resilver to be deferred
|
||||
log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_SLOW
|
||||
|
||||
log_must zpool attach $TESTPOOL $DISK1 $DISK2
|
||||
log_must zinject -d $DISK2 -D50:1 $TESTPOOL
|
||||
log_must is_pool_resilvering $TESTPOOL true
|
||||
|
||||
log_must zpool attach $TESTPOOL $DISK1 $DISK3
|
||||
log_must zinject -d $DISK3 -D50:1 $TESTPOOL
|
||||
log_must is_pool_resilvering $TESTPOOL true
|
||||
|
||||
# 4. Manually restart the resilver with all drives
|
||||
log_must zpool resilver $TESTPOOL
|
||||
log_must zinject -c all
|
||||
log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_DEFAULT
|
||||
log_must wait_for_resilver_end $TESTPOOL $MAXTIMEOUT
|
||||
log_must is_deferred_scan_started $TESTPOOL
|
||||
log_must check_state $TESTPOOL "$DISK2" "online"
|
||||
log_must check_state $TESTPOOL "$DISK3" "online"
|
||||
|
||||
log_pass "Verified 'zpool resilver' restarts in-progress resilvers"
|
||||
@@ -25,6 +25,7 @@
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/cli_root/zpool_reopen/zpool_reopen.shlib
|
||||
. $STF_SUITE/tests/functional/cli_root/zpool_scrub/zpool_scrub.cfg
|
||||
|
||||
#
|
||||
@@ -95,6 +96,7 @@ DISK1="$TEST_BASE_DIR/zpool_disk1.dat"
|
||||
DISK2="$TEST_BASE_DIR/zpool_disk2.dat"
|
||||
DISK3="$TEST_BASE_DIR/zpool_disk3.dat"
|
||||
DISK4="$TEST_BASE_DIR/zpool_disk4.dat"
|
||||
RESILVER_TIMEOUT=40
|
||||
|
||||
# 1. Create the pool
|
||||
log_must truncate -s $DEVSIZE $DISK1
|
||||
@@ -117,6 +119,7 @@ zpool_scrub_sync $TESTPOOL
|
||||
# 5. Online the first device and offline the second device
|
||||
zpool_do_sync 'online' $TESTPOOL $DISK1
|
||||
zpool_do_sync 'offline' $TESTPOOL $DISK2
|
||||
log_must wait_for_resilver_end $TESTPOOL $RESILVER_TIMEOUT
|
||||
|
||||
# 6. Scrub the pool again
|
||||
zpool_scrub_sync $TESTPOOL
|
||||
|
||||
Reference in New Issue
Block a user