Implemented zpool scrub pause/resume

Currently, there is no way to pause a scrub. Pausing may
be useful when the pool is busy with other I/O to preserve
bandwidth.

This patch adds the ability to pause and resume scrubbing.
This is achieved by maintaining a persistent on-disk scrub state.
While the state is 'paused' we do not scrub any more blocks.
We do however perform regular scan housekeeping such as
freeing async destroyed and deadlist blocks while paused.

Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Thomas Caputi <tcaputi@datto.com>
Reviewed-by: Serapheim Dimitropoulos <serapheimd@gmail.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Alek Pinchuk <apinchuk@datto.com>
Closes #6167
This commit is contained in:
Alek P
2017-07-06 22:16:13 -07:00
committed by Brian Behlendorf
parent 94b25662c5
commit 0ea05c64f8
17 changed files with 364 additions and 126 deletions
+26 -15
View File
@@ -1999,54 +1999,65 @@ function check_vdev_state # pool disk state{online,offline,unavail}
#
# Return 0 is contain, 1 otherwise
#
function check_pool_status # pool token keyword
function check_pool_status # pool token keyword <verbose>
{
typeset pool=$1
typeset token=$2
typeset keyword=$3
typeset verbose=${4:-false}
zpool status -v "$pool" 2>/dev/null | nawk -v token="$token:" '
($1==token) {print $0}' \
| grep -i "$keyword" > /dev/null 2>&1
scan=$(zpool status -v "$pool" 2>/dev/null | nawk -v token="$token:" '
($1==token) {print $0}')
if [[ $verbose == true ]]; then
log_note $scan
fi
echo $scan | grep -i "$keyword" > /dev/null 2>&1
return $?
}
#
# These 5 following functions are instance of check_pool_status()
# These 6 following functions are instance of check_pool_status()
# is_pool_resilvering - to check if the pool is resilver in progress
# is_pool_resilvered - to check if the pool is resilver completed
# is_pool_scrubbing - to check if the pool is scrub in progress
# is_pool_scrubbed - to check if the pool is scrub completed
# is_pool_scrub_stopped - to check if the pool is scrub stopped
# is_pool_scrub_paused - to check if the pool has scrub paused
#
function is_pool_resilvering #pool
function is_pool_resilvering #pool <verbose>
{
check_pool_status "$1" "scan" "resilver in progress since "
check_pool_status "$1" "scan" "resilver in progress since " $2
return $?
}
function is_pool_resilvered #pool
function is_pool_resilvered #pool <verbose>
{
check_pool_status "$1" "scan" "resilvered "
check_pool_status "$1" "scan" "resilvered " $2
return $?
}
function is_pool_scrubbing #pool
function is_pool_scrubbing #pool <verbose>
{
check_pool_status "$1" "scan" "scrub in progress since "
check_pool_status "$1" "scan" "scrub in progress since " $2
return $?
}
function is_pool_scrubbed #pool
function is_pool_scrubbed #pool <verbose>
{
check_pool_status "$1" "scan" "scrub repaired"
check_pool_status "$1" "scan" "scrub repaired" $2
return $?
}
function is_pool_scrub_stopped #pool
function is_pool_scrub_stopped #pool <verbose>
{
check_pool_status "$1" "scan" "scrub canceled"
check_pool_status "$1" "scan" "scrub canceled" $2
return $?
}
function is_pool_scrub_paused #pool <verbose>
{
check_pool_status "$1" "scan" "scrub paused since " $2
return $?
}
@@ -27,6 +27,7 @@
#
# Copyright (c) 2016 by Delphix. All rights reserved.
# Copyright (c) 2017 Datto Inc.
#
. $STF_SUITE/include/libtest.shlib
@@ -34,12 +35,15 @@
#
# DESCRIPTION:
# Verify scrub -s works correctly.
# Verify scrub, scrub -p, and scrub -s show the right status.
#
# STRATEGY:
# 1. Create pool and fill with hundreds data.
# 2. zpool scrub the pool
# 3. Verify zpool scrub -s succeed when the system is scrubbing.
# 1. Create pool and create a 100MB file in it.
# 2. zpool scrub the pool and verify it's doing a scrub.
# 3. Pause scrub and verify it's paused.
# 4. Try to pause a paused scrub and make sure that fails.
# 5. Resume the paused scrub and verify scrub is again being performed.
# 6. Verify zpool scrub -s succeed when the system is scrubbing.
#
# NOTES:
# A 10ms delay is added to the ZIOs in order to ensure that the
@@ -49,11 +53,25 @@
verify_runnable "global"
log_assert "Verify scrub -s works correctly."
log_must zinject -d $DISK1 -D10:1 $TESTPOOL
log_must zpool scrub $TESTPOOL
log_must zpool scrub -s $TESTPOOL
log_must is_pool_scrub_stopped $TESTPOOL
function cleanup
{
log_must zinject -c all
}
log_must zinject -c all
log_pass "Verify scrub -s works correctly."
log_onexit cleanup
log_assert "Verify scrub, scrub -p, and scrub -s show the right status."
log_must zinject -d $DISK1 -D20:1 $TESTPOOL
log_must zpool scrub $TESTPOOL
log_must is_pool_scrubbing $TESTPOOL true
log_must zpool scrub -p $TESTPOOL
log_must is_pool_scrub_paused $TESTPOOL true
log_mustnot zpool scrub -p $TESTPOOL
log_must is_pool_scrub_paused $TESTPOOL true
log_must zpool scrub $TESTPOOL
log_must is_pool_scrubbing $TESTPOOL true
log_must zpool scrub -s $TESTPOOL
log_must is_pool_scrub_stopped $TESTPOOL true
log_pass "Verified scrub, -s, and -p show expected status."
@@ -27,6 +27,7 @@
#
# Copyright (c) 2016 by Delphix. All rights reserved.
# Copyright (c) 2017 by Datto Inc.
#
. $STF_SUITE/include/libtest.shlib
@@ -34,14 +35,12 @@
#
# DESCRIPTION:
# scrub command terminates the existing scrub process and starts
# a new scrub.
# scrub command fails when there is an existing scrub in progress
#
# STRATEGY:
# 1. Setup a pool and fill with data
# 1. Setup a pool and fill it with data
# 2. Kick off a scrub
# 3. Check the completed percent and invoke another scrub
# 4. Check the percent again, verify a new scrub started.
# 2. Kick off a second scrub and verify it fails
#
# NOTES:
# A 10ms delay is added to the ZIOs in order to ensure that the
@@ -51,33 +50,21 @@
verify_runnable "global"
function get_scrub_percent
function cleanup
{
typeset -i percent
percent=$(zpool status $TESTPOOL | grep "^ scrub" | \
awk '{print $7}' | awk -F. '{print $1}')
if is_pool_scrubbed $TESTPOOL ; then
percent=100
fi
echo $percent
log_must zinject -c all
}
log_assert "scrub command terminates the existing scrub process and starts" \
"a new scrub."
log_onexit cleanup
log_assert "Scrub command fails when there is already a scrub in progress"
log_must zinject -d $DISK1 -D10:1 $TESTPOOL
log_must zpool scrub $TESTPOOL
typeset -i PERCENT=30 percent=0
while ((percent < PERCENT)) ; do
percent=$(get_scrub_percent)
done
log_must is_pool_scrubbing $TESTPOOL true
log_mustnot zpool scrub $TESTPOOL
log_must is_pool_scrubbing $TESTPOOL true
log_must zpool scrub -s $TESTPOOL
log_must is_pool_scrub_stopped $TESTPOOL true
log_must zpool scrub $TESTPOOL
percent=$(get_scrub_percent)
if ((percent > PERCENT)); then
log_fail "zpool scrub don't stop existing scrubbing process."
fi
log_must zinject -c all
log_pass "scrub command terminates the existing scrub process and starts" \
"a new scrub."
log_pass "Issuing a scrub command failed when scrub was already in progress"
@@ -27,6 +27,7 @@
#
# Copyright (c) 2013, 2016 by Delphix. All rights reserved.
# Copyright (c) 2017 Datto Inc.
#
. $STF_SUITE/include/libtest.shlib
@@ -48,6 +49,7 @@ verify_runnable "global"
log_assert "zpool scrub returns an error when run as a user"
log_mustnot zpool scrub $TESTPOOL
log_mustnot zpool scrub -p $TESTPOOL
log_mustnot zpool scrub -s $TESTPOOL
log_pass "zpool scrub returns an error when run as a user"