Teach zpool scrub to scrub only blocks in error log

Added a flag '-e' in zpool scrub to scrub only blocks in error log. A
user can pause, resume and cancel the error scrub by passing additional
command line arguments -p -s just like a regular scrub. This involves
adding a new flag, creating new libzfs interfaces, a new ioctl, and the
actual iteration and read-issuing logic. Error scrubbing is executed in
multiple txg to make sure pool performance is not affected.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Co-authored-by: TulsiJain tulsi.jain@delphix.com
Signed-off-by: George Amanakis <gamanakis@gmail.com>
Closes #8995
Closes #12355
This commit is contained in:
George Amanakis
2021-12-17 21:35:28 +01:00
committed by Brian Behlendorf
parent e34e15ed6d
commit 482eeef804
29 changed files with 1602 additions and 71 deletions
+15
View File
@@ -27,6 +27,7 @@
#include <sys/vdev_impl.h>
#include <sys/zfs_ioctl.h>
#include <sys/zfs_bootenv.h>
#include <sys/fs/zfs.h>
/*
* Test the nvpair inputs for the non-legacy zfs ioctl commands.
@@ -688,6 +689,17 @@ test_vdev_trim(const char *pool)
nvlist_free(required);
}
/* Test with invalid values */
static void
test_scrub(const char *pool)
{
nvlist_t *required = fnvlist_alloc();
fnvlist_add_uint64(required, "scan_type", POOL_SCAN_FUNCS + 1);
fnvlist_add_uint64(required, "scan_command", POOL_SCRUB_FLAGS_END + 1);
IOC_INPUT_TEST(ZFS_IOC_POOL_SCRUB, pool, required, NULL, EINVAL);
nvlist_free(required);
}
static int
zfs_destroy(const char *dataset)
{
@@ -868,6 +880,8 @@ zfs_ioc_input_tests(const char *pool)
test_set_bootenv(pool);
test_get_bootenv(pool);
test_scrub(pool);
/*
* cleanup
*/
@@ -1022,6 +1036,7 @@ validate_ioc_values(void)
CHECK(ZFS_IOC_BASE + 82 == ZFS_IOC_GET_BOOKMARK_PROPS);
CHECK(ZFS_IOC_BASE + 83 == ZFS_IOC_WAIT);
CHECK(ZFS_IOC_BASE + 84 == ZFS_IOC_WAIT_FS);
CHECK(ZFS_IOC_BASE + 87 == ZFS_IOC_POOL_SCRUB);
CHECK(ZFS_IOC_PLATFORM_BASE + 1 == ZFS_IOC_EVENTS_NEXT);
CHECK(ZFS_IOC_PLATFORM_BASE + 2 == ZFS_IOC_EVENTS_CLEAR);
CHECK(ZFS_IOC_PLATFORM_BASE + 3 == ZFS_IOC_EVENTS_SEEK);
+18
View File
@@ -1969,6 +1969,12 @@ function is_pool_scrubbing #pool <verbose>
check_pool_status "$1" "scan" "scrub in progress since " $2
}
function is_pool_error_scrubbing #pool <verbose>
{
check_pool_status "$1" "scrub" "error scrub in progress since " $2
return $?
}
function is_pool_scrubbed #pool <verbose>
{
check_pool_status "$1" "scan" "scrub repaired" $2
@@ -1979,11 +1985,23 @@ function is_pool_scrub_stopped #pool <verbose>
check_pool_status "$1" "scan" "scrub canceled" $2
}
function is_pool_error_scrub_stopped #pool <verbose>
{
check_pool_status "$1" "scrub" "error scrub canceled on " $2
return $?
}
function is_pool_scrub_paused #pool <verbose>
{
check_pool_status "$1" "scan" "scrub paused since " $2
}
function is_pool_error_scrub_paused #pool <verbose>
{
check_pool_status "$1" "scrub" "error scrub paused since " $2
return $?
}
function is_pool_removing #pool
{
check_pool_status "$1" "remove" "in progress since "
+4
View File
@@ -1153,6 +1153,10 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/cli_root/zpool_scrub/zpool_scrub_multiple_copies.ksh \
functional/cli_root/zpool_scrub/zpool_scrub_offline_device.ksh \
functional/cli_root/zpool_scrub/zpool_scrub_print_repairing.ksh \
functional/cli_root/zpool_scrub/zpool_error_scrub_001_pos.ksh \
functional/cli_root/zpool_scrub/zpool_error_scrub_002_pos.ksh \
functional/cli_root/zpool_scrub/zpool_error_scrub_003_pos.ksh \
functional/cli_root/zpool_scrub/zpool_error_scrub_004_pos.ksh \
functional/cli_root/zpool_set/cleanup.ksh \
functional/cli_root/zpool_set/setup.ksh \
functional/cli_root/zpool/setup.ksh \
@@ -0,0 +1,79 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
# Copyright (c) 2019, Delphix. All rights reserved.
# Copyright (c) 2023, George Amanakis. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/cli_root/zpool_scrub/zpool_scrub.cfg
#
# DESCRIPTION:
# Verify scrub -e, -p, and -s show the right status.
#
# STRATEGY:
# 1. Create a pool and create a 10MB file in it.
# 2. Start a error scrub (-e) and verify it's doing a scrub.
# 3. Pause error scrub (-p) and verify it's paused.
# 4. Try to pause a paused error scrub (-p) and make sure that fails.
# 5. Resume the paused error scrub and verify again it's doing a scrub.
# 6. Verify zpool scrub -s succeed when the system is error scrubbing.
#
verify_runnable "global"
function cleanup
{
log_must set_tunable32 SCAN_SUSPEND_PROGRESS 0
log_must zinject -c all
rm -f /$TESTPOOL/10m_file
}
log_onexit cleanup
log_assert "Verify scrub -e, -p, and -s show the right status."
log_must fio --rw=write --name=job --size=10M --filename=/$TESTPOOL/10m_file
log_must zpool export $TESTPOOL
log_must zpool import $TESTPOOL
log_must zinject -t data -e checksum -f 100 -am /$TESTPOOL/10m_file
# create some error blocks
dd if=/$TESTPOOL/10m_file bs=1M count=1 || true
# sync error blocks to disk
log_must sync_pool $TESTPOOL
log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1
log_must zpool scrub -e $TESTPOOL
log_must is_pool_error_scrubbing $TESTPOOL true
log_must zpool scrub -p $TESTPOOL
log_must is_pool_error_scrub_paused $TESTPOOL true
log_mustnot zpool scrub -p $TESTPOOL
log_must is_pool_error_scrub_paused $TESTPOOL true
log_must zpool scrub -e $TESTPOOL
log_must is_pool_error_scrubbing $TESTPOOL true
log_must zpool scrub -s $TESTPOOL
log_must is_pool_error_scrub_stopped $TESTPOOL true
log_pass "Verified scrub -e, -p, and -s show expected status."
@@ -0,0 +1,99 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
# Copyright (c) 2019, Delphix. All rights reserved.
# Copyright (c) 2023, George Amanakis. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/cli_root/zpool_scrub/zpool_scrub.cfg
#
# DESCRIPTION:
# Verify regular scrub and error scrub can't run at the same time.
#
# STRATEGY:
# 1. Create a pool and create a 10MB file in it.
# 2. Start a scrub and verify it's doing a scrub.
# 3. Start a error scrub (-e) and verify it fails.
# 4. Pause scrub (-p) and verify it's paused.
# 5. Start a error scrub (-e) verify it fails again.
# 6. Resume the paused scrub, verify it and cancel it.
# 7. Start a error scrub (-e) and verify it's doing error scrub.
# 8. Start a scrub and verify it fails.
# 9. Cancel error scrub (-e) and verify it is canceled.
# 10. Start scrub, verify it, cancel it and verify it.
#
verify_runnable "global"
function cleanup
{
log_must set_tunable32 SCAN_SUSPEND_PROGRESS 0
log_must zinject -c all
rm -f /$TESTPOOL/10m_file
}
log_onexit cleanup
log_assert "Verify regular scrub and error scrub can't run at the same time."
log_must fio --rw=write --name=job --size=10M --filename=/$TESTPOOL/10m_file
log_must zpool export $TESTPOOL
log_must zpool import $TESTPOOL
log_must zinject -t data -e checksum -f 100 -am /$TESTPOOL/10m_file
# create some error blocks before error scrub is requested.
dd if=/$TESTPOOL/10m_file bs=1M count=1 || true
# sync error blocks to disk
log_must sync_pool $TESTPOOL
log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1
log_must zpool scrub $TESTPOOL
log_must is_pool_scrubbing $TESTPOOL true
log_mustnot zpool scrub -e $TESTPOOL
log_must zpool scrub -p $TESTPOOL
log_must is_pool_scrub_paused $TESTPOOL true
log_mustnot zpool scrub -e $TESTPOOL
log_must zpool scrub $TESTPOOL
log_must is_pool_scrubbing $TESTPOOL true
log_must zpool scrub -s $TESTPOOL
log_must is_pool_scrub_stopped $TESTPOOL true
# create some error blocks before error scrub is requested.
dd if=/$TESTPOOL/10m_file bs=1M count=1 || true
# sync error blocks to disk
log_must sync_pool $TESTPOOL
log_must zpool scrub -e $TESTPOOL
log_must is_pool_error_scrubbing $TESTPOOL true
log_mustnot zpool scrub $TESTPOOL
log_must zpool scrub -s $TESTPOOL
log_must is_pool_error_scrub_stopped $TESTPOOL true
log_must zpool scrub $TESTPOOL
log_must is_pool_scrubbing $TESTPOOL true
log_must zpool scrub -s $TESTPOOL
log_must is_pool_scrub_stopped $TESTPOOL true
log_pass "Verified regular scrub and error scrub can't run at the same time."
@@ -0,0 +1,109 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
# Copyright (c) 2019, Delphix. All rights reserved.
# Copyright (c) 2023, George Amanakis. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/cli_root/zpool_scrub/zpool_scrub.cfg
#
# DESCRIPTION:
# Verify error scrub clears the errorlog, if errors no longer exist.
#
# STRATEGY:
# 1. Create a pool and create file in it.
# 2. Zinject errors and read using dd to log errors to disk.
# 3. Make sure file name is mentioned in the list of error files.
# 4. Start error scrub and wait for it finish.
# 5. Check scrub ran and errors are still reported.
# 6. Clear corruption and error scrub again.
# 7. Check scrub ran and errors are cleared.
#
verify_runnable "global"
function cleanup
{
zinject -c all
rm -f /$TESTPOOL2/$TESTFILE0
destroy_pool $TESTPOOL2
}
log_onexit cleanup
log_assert "Verify error scrub clears the errorlog, if errors no longer exist."
truncate -s $MINVDEVSIZE $TESTDIR/vdev_a
log_must zpool create -f -O primarycache=none $TESTPOOL2 $TESTDIR/vdev_a
log_must zfs create $TESTPOOL2/$TESTFS1
typeset file=/$TESTPOOL2/$TESTFS1/$TESTFILE0
log_must dd if=/dev/urandom of=$file bs=2M count=10
lastfs="$(zfs list -r $TESTPOOL2 | tail -1 | awk '{print $1}')"
for i in {1..3}; do
log_must zfs snap $lastfs@snap$i
log_must zfs clone $lastfs@snap$i $TESTPOOL2/clone$i
lastfs="$(zfs list -r $TESTPOOL2/clone$i | tail -1 | awk '{print $1}')"
done
log_must zinject -t data -e checksum -f 100 -a $file
dd if=$file of=/dev/null bs=2M count=10
# Important: sync error log to disk
log_must sync_pool $TESTPOOL2
# Check reported errors
log_must zpool status -v $TESTPOOL2
log_must eval "zpool status -v $TESTPOOL2 | \
grep \"Permanent errors have been detected\""
log_must eval "zpool status -v | grep '$TESTPOOL2/$TESTFS1/$TESTFILE0'"
log_must eval "zpool status -v | grep '$TESTPOOL2/$TESTFS1@snap1:/$TESTFILE0'"
log_must eval "zpool status -v | grep '$TESTPOOL2/clone1/$TESTFILE0'"
log_must eval "zpool status -v | grep '$TESTPOOL2/clone1@snap2:/$TESTFILE0'"
log_must eval "zpool status -v | grep '$TESTPOOL2/clone2/$TESTFILE0'"
log_must eval "zpool status -v | grep '$TESTPOOL2/clone2@snap3:/$TESTFILE0'"
log_must eval "zpool status -v | grep '$TESTPOOL2/clone3/$TESTFILE0'"
# Check errors are reported if corruption persists
log_must zpool scrub -e -w $TESTPOOL2
log_must eval "zpool status -v | grep 'error blocks'"
log_must zpool status -v $TESTPOOL2
log_must eval "zpool status -v $TESTPOOL2 | \
grep \"Permanent errors have been detected\""
log_must eval "zpool status -v | grep '$TESTPOOL2/$TESTFS1/$TESTFILE0'"
log_must eval "zpool status -v | grep '$TESTPOOL2/$TESTFS1@snap1:/$TESTFILE0'"
log_must eval "zpool status -v | grep '$TESTPOOL2/clone1/$TESTFILE0'"
log_must eval "zpool status -v | grep '$TESTPOOL2/clone1@snap2:/$TESTFILE0'"
log_must eval "zpool status -v | grep '$TESTPOOL2/clone2/$TESTFILE0'"
log_must eval "zpool status -v | grep '$TESTPOOL2/clone2@snap3:/$TESTFILE0'"
log_must eval "zpool status -v | grep '$TESTPOOL2/clone3/$TESTFILE0'"
# Check errors are cleared
log_must zinject -c all
log_must zpool scrub -e -w $TESTPOOL2
log_must zpool status -v $TESTPOOL2
log_must eval "zpool status -v | grep 'error blocks'"
log_mustnot eval "zpool status -v | grep '$TESTFILE0'"
log_pass "Verify error scrub clears the errorlog, if errors no longer exist."
@@ -0,0 +1,54 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
# Copyright (c) 2023, George Amanakis. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/cli_root/zpool_scrub/zpool_scrub.cfg
#
# DESCRIPTION:
# Verify error scrub clears the errorlog, if errors no longer exist.
#
# STRATEGY:
# 1. Create a pool with head_errlog disabled.
# 2. Run an error scrub and verify it is not supported.
#
verify_runnable "global"
function cleanup
{
rm -f /$TESTPOOL2/$TESTFILE0
destroy_pool $TESTPOOL2
}
log_onexit cleanup
log_assert "Verify error scrub cannot run without the head_errlog feature."
truncate -s $MINVDEVSIZE $TESTDIR/vdev_a
log_must zpool create -f -o feature@head_errlog=disabled $TESTPOOL2 $TESTDIR/vdev_a
log_mustnot zpool scrub -ew $TESTPOOL2
log_pass "Verify error scrub cannot run without the head_errlog feature."