mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-23 02:44:41 +03:00
Sequential scrub and resilvers
Currently, scrubs and resilvers can take an extremely long time to complete. This is largely due to the fact that zfs scans process pools in logical order, as determined by each block's bookmark. This makes sense from a simplicity perspective, but blocks in zfs are often scattered randomly across disks, particularly due to zfs's copy-on-write mechanisms. This patch improves performance by splitting scrubs and resilvers into a metadata scanning phase and an IO issuing phase. The metadata scan reads through the structure of the pool and gathers an in-memory queue of I/Os, sorted by size and offset on disk. The issuing phase will then issue the scrub I/Os as sequentially as possible, greatly improving performance. This patch also updates and cleans up some of the scan code which has not been updated in several years. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Authored-by: Saso Kiselkov <saso.kiselkov@nexenta.com> Authored-by: Alek Pinchuk <apinchuk@datto.com> Authored-by: Tom Caputi <tcaputi@datto.com> Signed-off-by: Tom Caputi <tcaputi@datto.com> Closes #3625 Closes #6256
This commit is contained in:
committed by
Brian Behlendorf
parent
e301113c17
commit
d4a72f2386
@@ -33,7 +33,7 @@
|
||||
# 8. Put another device offline and check if the test file checksum is correct.
|
||||
#
|
||||
# NOTES:
|
||||
# A 25ms delay is added to make sure that the scrub is running while
|
||||
# A 250ms delay is added to make sure that the scrub is running while
|
||||
# the reopen kicks the resilver.
|
||||
#
|
||||
|
||||
@@ -70,7 +70,7 @@ log_must md5sum $TESTFILE > $TESTFILE_MD5
|
||||
|
||||
# 4. Execute scrub.
|
||||
# add delay to I/O requests for remaining disk in pool
|
||||
log_must zinject -d $DISK2 -D25:1 $TESTPOOL
|
||||
log_must zinject -d $DISK2 -D250:1 $TESTPOOL
|
||||
log_must zpool scrub $TESTPOOL
|
||||
|
||||
# 5. "Plug back" disk.
|
||||
@@ -81,12 +81,12 @@ log_must check_state $TESTPOOL "$REMOVED_DISK_ID" "online"
|
||||
# 7. Check if scrub scan is replaced by resilver.
|
||||
# the scrub operation has to be running while reopen is executed
|
||||
log_must is_pool_scrubbing $TESTPOOL true
|
||||
# remove delay from disk
|
||||
log_must zinject -c all
|
||||
# the scrub will be replaced by resilver, wait until it ends
|
||||
log_must wait_for_resilver_end $TESTPOOL $MAXTIMEOUT
|
||||
# check if the scrub scan has been interrupted by resilver
|
||||
log_must is_scan_restarted $TESTPOOL
|
||||
# remove delay from disk
|
||||
log_must zinject -c all
|
||||
|
||||
# 8. Put another device offline and check if the test file checksum is correct.
|
||||
log_must zpool offline $TESTPOOL $DISK2
|
||||
|
||||
@@ -34,7 +34,7 @@
|
||||
# replicas.
|
||||
#
|
||||
# NOTES:
|
||||
# A 25ms delay is added to make sure that the scrub is running while
|
||||
# A 125ms delay is added to make sure that the scrub is running while
|
||||
# the reopen is invoked.
|
||||
#
|
||||
|
||||
@@ -64,20 +64,19 @@ log_must check_state $TESTPOOL "$REMOVED_DISK_ID" "unavail"
|
||||
log_must generate_random_file /$TESTPOOL/data $LARGE_FILE_SIZE
|
||||
# 4. Execute scrub.
|
||||
# add delay to I/O requests for remaining disk in pool
|
||||
log_must zinject -d $DISK2 -D25:1 $TESTPOOL
|
||||
log_must zinject -d $DISK2 -D125:1 $TESTPOOL
|
||||
log_must zpool scrub $TESTPOOL
|
||||
# 5. "Plug back" disk.
|
||||
insert_disk $REMOVED_DISK $scsi_host
|
||||
# 6. Reopen a pool with an -n flag.
|
||||
log_must zpool reopen -n $TESTPOOL
|
||||
log_must check_state $TESTPOOL "$REMOVED_DISK_ID" "online"
|
||||
# remove delay from disk
|
||||
log_must zinject -c all
|
||||
# 7. Check if scrub scan is NOT replaced by resilver.
|
||||
log_must wait_for_scrub_end $TESTPOOL $MAXTIMEOUT
|
||||
log_mustnot is_scan_restarted $TESTPOOL
|
||||
|
||||
# remove delay from disk
|
||||
log_must zinject -c all
|
||||
|
||||
# 8. Check if trying to put device to offline fails because of no valid
|
||||
# replicas.
|
||||
log_mustnot zpool offline $TESTPOOL $DISK2
|
||||
|
||||
@@ -26,7 +26,9 @@
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/cli_root/zpool_scrub/zpool_scrub.cfg
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_DEFAULT
|
||||
destroy_mirrors
|
||||
|
||||
@@ -37,8 +37,8 @@ verify_disk_count "$DISKS" 2
|
||||
|
||||
default_mirror_setup_noexit $DISK1 $DISK2
|
||||
|
||||
mntpnt=$(get_prop mountpoint $TESTPOOL)
|
||||
mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS)
|
||||
|
||||
# Create 100MB of data
|
||||
log_must file_write -b 1048576 -c 100 -o create -d 0 -f $mntpnt/bigfile
|
||||
# Create 256M of data
|
||||
log_must file_write -b 1048576 -c 256 -o create -d 0 -f $mntpnt/bigfile
|
||||
log_pass
|
||||
|
||||
@@ -30,3 +30,6 @@
|
||||
|
||||
export DISK1=${DISKS%% *}
|
||||
export DISK2=$(echo $DISKS | awk '{print $2}')
|
||||
|
||||
export ZFS_SCAN_VDEV_LIMIT_SLOW=$((128*1024))
|
||||
export ZFS_SCAN_VDEV_LIMIT_DEFAULT=$((4*1024*1024))
|
||||
|
||||
@@ -46,9 +46,9 @@
|
||||
# 6. Verify zpool scrub -s succeed when the system is scrubbing.
|
||||
#
|
||||
# NOTES:
|
||||
# A 10ms delay is added to the ZIOs in order to ensure that the
|
||||
# scrub does not complete before it has a chance to be cancelled.
|
||||
# This can occur when testing with small pools or very fast hardware.
|
||||
# Artificially limit the scrub speed by setting the zfs_scan_vdev_limit
|
||||
# low and adding a 50ms zio delay in order to ensure that the scrub does
|
||||
# not complete early.
|
||||
#
|
||||
|
||||
verify_runnable "global"
|
||||
@@ -56,13 +56,21 @@ verify_runnable "global"
|
||||
function cleanup
|
||||
{
|
||||
log_must zinject -c all
|
||||
log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_DEFAULT
|
||||
log_must rm -f $mntpnt/biggerfile
|
||||
}
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
log_assert "Verify scrub, scrub -p, and scrub -s show the right status."
|
||||
|
||||
log_must zinject -d $DISK1 -D20:1 $TESTPOOL
|
||||
# Create 1G of additional data
|
||||
mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS)
|
||||
log_must file_write -b 1048576 -c 1024 -o create -d 0 -f $mntpnt/biggerfile
|
||||
log_must sync
|
||||
|
||||
log_must zinject -d $DISK1 -D50:1 $TESTPOOL
|
||||
log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_SLOW
|
||||
log_must zpool scrub $TESTPOOL
|
||||
log_must is_pool_scrubbing $TESTPOOL true
|
||||
log_must zpool scrub -p $TESTPOOL
|
||||
|
||||
@@ -43,23 +43,22 @@
|
||||
# 2. Kick off a second scrub and verify it fails
|
||||
#
|
||||
# NOTES:
|
||||
# A 10ms delay is added to the ZIOs in order to ensure that the
|
||||
# scrub does not complete before it has a chance to be restarted.
|
||||
# This can occur when testing with small pools or very fast hardware.
|
||||
# Artificially limit the scrub speed by setting the zfs_scan_vdev_limit
|
||||
# low in order to ensure that the scrub does not complete early.
|
||||
#
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
function cleanup
|
||||
{
|
||||
log_must zinject -c all
|
||||
log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_DEFAULT
|
||||
}
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
log_assert "Scrub command fails when there is already a scrub in progress"
|
||||
|
||||
log_must zinject -d $DISK1 -D10:1 $TESTPOOL
|
||||
log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_SLOW
|
||||
log_must zpool scrub $TESTPOOL
|
||||
log_must is_pool_scrubbing $TESTPOOL true
|
||||
log_mustnot zpool scrub $TESTPOOL
|
||||
|
||||
@@ -42,13 +42,13 @@
|
||||
# 3. Verify scrub failed until the resilver completed
|
||||
#
|
||||
# NOTES:
|
||||
# A 10ms delay is added to 10% of zio's in order to ensure that the
|
||||
# resilver does not complete before the scrub can be issued. This
|
||||
# can occur when testing with small pools or very fast hardware.
|
||||
# Artificially limit the scrub speed by setting the zfs_scan_vdev_limit
|
||||
# low in order to ensure that the scrub does not complete early.
|
||||
#
|
||||
|
||||
function cleanup
|
||||
{
|
||||
log_must zinject -c all
|
||||
log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_DEFAULT
|
||||
}
|
||||
|
||||
verify_runnable "global"
|
||||
@@ -62,13 +62,12 @@ log_onexit cleanup
|
||||
|
||||
log_assert "Resilver prevent scrub from starting until the resilver completes"
|
||||
|
||||
log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_SLOW
|
||||
log_must zpool detach $TESTPOOL $DISK2
|
||||
log_must zinject -d $DISK1 -D10:1 $TESTPOOL
|
||||
log_must zpool attach $TESTPOOL $DISK1 $DISK2
|
||||
log_must is_pool_resilvering $TESTPOOL
|
||||
log_mustnot zpool scrub $TESTPOOL
|
||||
|
||||
# Allow the resilver to finish, or it will interfere with the next test.
|
||||
while ! is_pool_resilvered $TESTPOOL; do
|
||||
sleep 1
|
||||
done
|
||||
|
||||
@@ -63,4 +63,8 @@ log_must zpool scrub $TESTPOOL
|
||||
log_must zpool detach $TESTPOOL $DISK1
|
||||
log_must zpool attach $TESTPOOL $DISK2 $DISK1
|
||||
|
||||
while ! is_pool_resilvered $TESTPOOL; do
|
||||
sleep 1
|
||||
done
|
||||
|
||||
log_pass "When scrubbing, detach device should not break system."
|
||||
|
||||
+4
-2
@@ -49,7 +49,7 @@ verify_runnable "global"
|
||||
function cleanup
|
||||
{
|
||||
poolexists $TESTPOOL && destroy_pool $TESTPOOL
|
||||
log_must rm -f $DISK1 $DISK2 $DISK3
|
||||
log_must rm -f $DISK1 $DISK2 $DISK3 $DISK4
|
||||
}
|
||||
|
||||
#
|
||||
@@ -94,14 +94,16 @@ TESTDIR="$TEST_BASE_DIR/zpool_scrub_offline_device"
|
||||
DISK1="$TEST_BASE_DIR/zpool_disk1.dat"
|
||||
DISK2="$TEST_BASE_DIR/zpool_disk2.dat"
|
||||
DISK3="$TEST_BASE_DIR/zpool_disk3.dat"
|
||||
DISK4="$TEST_BASE_DIR/zpool_disk4.dat"
|
||||
|
||||
# 1. Create the pool
|
||||
log_must truncate -s $DEVSIZE $DISK1
|
||||
log_must truncate -s $DEVSIZE $DISK2
|
||||
log_must truncate -s $DEVSIZE $DISK3
|
||||
log_must truncate -s $DEVSIZE $DISK4
|
||||
poolexists $TESTPOOL && destroy_pool $TESTPOOL
|
||||
log_must zpool create -O mountpoint=$TESTDIR $TESTPOOL \
|
||||
raidz1 $DISK1 $DISK2 $DISK3
|
||||
raidz2 $DISK1 $DISK2 $DISK3 $DISK4
|
||||
|
||||
# 2. Offline the first device
|
||||
zpool_do_sync 'offline' $TESTPOOL $DISK1
|
||||
|
||||
@@ -81,6 +81,10 @@ log_must truncate -s 0 $ZED_DEBUG_LOG
|
||||
# 4. Generate additional events.
|
||||
log_must zpool offline $MPOOL $VDEV1
|
||||
log_must zpool online $MPOOL $VDEV1
|
||||
while ! is_pool_resilvered $MPOOL; do
|
||||
sleep 1
|
||||
done
|
||||
|
||||
log_must zpool scrub $MPOOL
|
||||
|
||||
# Wait for the scrub to wrap, or is_healthy will be wrong.
|
||||
|
||||
@@ -78,7 +78,6 @@ function run_and_verify
|
||||
zedlog=${zedlog:-$ZED_DEBUG_LOG}
|
||||
fullcmd="$1"
|
||||
cmd=$(echo $fullcmd | awk '{print $1}')
|
||||
subcmd=$(echo $fullcmd | awk '{print $2}')
|
||||
|
||||
# If we aren't running zpool or zfs, something is wrong
|
||||
[[ $cmd == "zpool" || $cmd == "zfs" ]] || \
|
||||
|
||||
Reference in New Issue
Block a user