mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-04-13 15:11:45 +03:00
zpool: Change zpool offline spares policy
The zpool offline man page says that you cannot use 'zpool offline' on spares. However, testing found that you could in fact force fault (zpool offline -f) spares. Change the policy to: 1. You can never force-fault or offline dRAID spares. 2. You can only force-fault or offline traditional spares if they're active. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Ameer Hamza <ahamza@ixsystems.com> Reviewed-by: Akash B <akash-b@hpe.com> Signed-off-by: Tony Hutter <hutter2@llnl.gov> Closes #18282
This commit is contained in:
parent
931deb290c
commit
b44a3ecf4a
@ -3571,10 +3571,53 @@ zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
|
|||||||
zfs_cmd_t zc = {"\0"};
|
zfs_cmd_t zc = {"\0"};
|
||||||
char errbuf[ERRBUFLEN];
|
char errbuf[ERRBUFLEN];
|
||||||
libzfs_handle_t *hdl = zhp->zpool_hdl;
|
libzfs_handle_t *hdl = zhp->zpool_hdl;
|
||||||
|
nvlist_t *vdev_nv;
|
||||||
|
boolean_t avail_spare, l2cache;
|
||||||
|
char *vdev_name;
|
||||||
|
char guid_str[21]; /* 64-bit num + '\0' */
|
||||||
|
boolean_t is_draid_spare = B_FALSE;
|
||||||
|
const char *vdev_type;
|
||||||
|
|
||||||
(void) snprintf(errbuf, sizeof (errbuf),
|
(void) snprintf(errbuf, sizeof (errbuf),
|
||||||
dgettext(TEXT_DOMAIN, "cannot fault %llu"), (u_longlong_t)guid);
|
dgettext(TEXT_DOMAIN, "cannot fault %llu"), (u_longlong_t)guid);
|
||||||
|
|
||||||
|
snprintf(guid_str, sizeof (guid_str), "%llu", (u_longlong_t)guid);
|
||||||
|
if ((vdev_nv = zpool_find_vdev(zhp, guid_str, &avail_spare,
|
||||||
|
&l2cache, NULL)) == NULL)
|
||||||
|
return (zfs_error(hdl, EZFS_NODEVICE, errbuf));
|
||||||
|
|
||||||
|
vdev_name = zpool_vdev_name(hdl, zhp, vdev_nv, 0);
|
||||||
|
if (vdev_name != NULL) {
|
||||||
|
/*
|
||||||
|
* We have the actual vdev name, so use that instead of the GUID
|
||||||
|
* in any error messages.
|
||||||
|
*/
|
||||||
|
(void) snprintf(errbuf, sizeof (errbuf),
|
||||||
|
dgettext(TEXT_DOMAIN, "cannot fault %s"), vdev_name);
|
||||||
|
free(vdev_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Spares (traditional or draid) cannot be faulted by libzfs, except:
|
||||||
|
*
|
||||||
|
* - Any spare type that exceeds it's errors can be faulted (aux =
|
||||||
|
* VDEV_AUX_ERR_EXCEEDED). This is only used by zed.
|
||||||
|
*
|
||||||
|
* - Traditional spares that are active can be force faulted.
|
||||||
|
*/
|
||||||
|
if (nvlist_lookup_string(vdev_nv, ZPOOL_CONFIG_TYPE, &vdev_type) == 0)
|
||||||
|
if (strcmp(vdev_type, VDEV_TYPE_DRAID_SPARE) == 0)
|
||||||
|
is_draid_spare = B_TRUE;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If vdev is a spare that is not being used, or is a dRAID spare (in
|
||||||
|
* use or not), then don't allow it to be force-faulted. However, an
|
||||||
|
* in-use dRAID spare can be faulted by ZED if see too many errors
|
||||||
|
* (aux = VDEV_AUX_ERR_EXCEEDED).
|
||||||
|
*/
|
||||||
|
if (avail_spare || (is_draid_spare && aux != VDEV_AUX_ERR_EXCEEDED))
|
||||||
|
return (zfs_error(hdl, EZFS_ISSPARE, errbuf));
|
||||||
|
|
||||||
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
|
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
|
||||||
zc.zc_guid = guid;
|
zc.zc_guid = guid;
|
||||||
zc.zc_cookie = VDEV_STATE_FAULTED;
|
zc.zc_cookie = VDEV_STATE_FAULTED;
|
||||||
|
|||||||
@ -56,11 +56,12 @@
|
|||||||
.Ar pool
|
.Ar pool
|
||||||
.Ar device Ns …
|
.Ar device Ns …
|
||||||
.Xc
|
.Xc
|
||||||
Takes the specified physical device offline.
|
Takes the specified physical device offline or force-fault it.
|
||||||
While the
|
While the
|
||||||
.Ar device
|
.Ar device
|
||||||
is offline, no attempt is made to read or write to the device.
|
is offline or force-faulted, no attempt is made to read or write to the device.
|
||||||
This command is not applicable to spares.
|
dRAID spares can not be offlined or force faulted.
|
||||||
|
Traditional spares can only be offlined or force-faulted when they are active.
|
||||||
.Bl -tag -width Ds
|
.Bl -tag -width Ds
|
||||||
.It Fl -power
|
.It Fl -power
|
||||||
Power off the device's slot in the storage enclosure.
|
Power off the device's slot in the storage enclosure.
|
||||||
|
|||||||
@ -525,7 +525,7 @@ tags = ['functional', 'cli_root', 'zpool_initialize']
|
|||||||
|
|
||||||
[tests/functional/cli_root/zpool_offline]
|
[tests/functional/cli_root/zpool_offline]
|
||||||
tests = ['zpool_offline_001_pos', 'zpool_offline_002_neg',
|
tests = ['zpool_offline_001_pos', 'zpool_offline_002_neg',
|
||||||
'zpool_offline_003_pos']
|
'zpool_offline_003_pos', 'zpool_offline_spare']
|
||||||
tags = ['functional', 'cli_root', 'zpool_offline']
|
tags = ['functional', 'cli_root', 'zpool_offline']
|
||||||
|
|
||||||
[tests/functional/cli_root/zpool_online]
|
[tests/functional/cli_root/zpool_online]
|
||||||
|
|||||||
@ -323,7 +323,8 @@ pre =
|
|||||||
tags = ['functional', 'cli_root', 'zpool_initialize']
|
tags = ['functional', 'cli_root', 'zpool_initialize']
|
||||||
|
|
||||||
[tests/functional/cli_root/zpool_offline]
|
[tests/functional/cli_root/zpool_offline]
|
||||||
tests = ['zpool_offline_001_pos', 'zpool_offline_002_neg']
|
tests = ['zpool_offline_001_pos', 'zpool_offline_002_neg',
|
||||||
|
'zpool_offline_spare']
|
||||||
tags = ['functional', 'cli_root', 'zpool_offline']
|
tags = ['functional', 'cli_root', 'zpool_offline']
|
||||||
|
|
||||||
[tests/functional/cli_root/zpool_online]
|
[tests/functional/cli_root/zpool_online]
|
||||||
|
|||||||
@ -1220,6 +1220,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
|
|||||||
functional/cli_root/zpool_offline/zpool_offline_001_pos.ksh \
|
functional/cli_root/zpool_offline/zpool_offline_001_pos.ksh \
|
||||||
functional/cli_root/zpool_offline/zpool_offline_002_neg.ksh \
|
functional/cli_root/zpool_offline/zpool_offline_002_neg.ksh \
|
||||||
functional/cli_root/zpool_offline/zpool_offline_003_pos.ksh \
|
functional/cli_root/zpool_offline/zpool_offline_003_pos.ksh \
|
||||||
|
functional/cli_root/zpool_offline/zpool_offline_spare.ksh \
|
||||||
functional/cli_root/zpool_online/cleanup.ksh \
|
functional/cli_root/zpool_online/cleanup.ksh \
|
||||||
functional/cli_root/zpool_online/setup.ksh \
|
functional/cli_root/zpool_online/setup.ksh \
|
||||||
functional/cli_root/zpool_online/zpool_online_001_pos.ksh \
|
functional/cli_root/zpool_online/zpool_online_001_pos.ksh \
|
||||||
|
|||||||
@ -0,0 +1,84 @@
|
|||||||
|
#!/bin/ksh -p
|
||||||
|
# SPDX-License-Identifier: CDDL-1.0
|
||||||
|
#
|
||||||
|
# CDDL HEADER START
|
||||||
|
#
|
||||||
|
# The contents of this file are subject to the terms of the
|
||||||
|
# Common Development and Distribution License (the "License").
|
||||||
|
# You may not use this file except in compliance with the License.
|
||||||
|
#
|
||||||
|
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||||
|
# or https://opensource.org/licenses/CDDL-1.0.
|
||||||
|
# See the License for the specific language governing permissions
|
||||||
|
# and limitations under the License.
|
||||||
|
#
|
||||||
|
# When distributing Covered Code, include this CDDL HEADER in each
|
||||||
|
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||||
|
# If applicable, add the following below this CDDL HEADER, with the
|
||||||
|
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||||
|
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||||
|
#
|
||||||
|
# CDDL HEADER END
|
||||||
|
#
|
||||||
|
|
||||||
|
# Copyright 2026 by Lawrence Livermore National Security, LLC.
|
||||||
|
|
||||||
|
. $STF_SUITE/include/libtest.shlib
|
||||||
|
|
||||||
|
#
|
||||||
|
# DESCRIPTION:
|
||||||
|
# Verify that traditional spares that are active can be offlined or
|
||||||
|
# force-faulted. Verify that in all other cases, spares cannot be
|
||||||
|
# offlined or faulted.
|
||||||
|
#
|
||||||
|
# STRATEGY:
|
||||||
|
# 1. Create pool with traditional spare
|
||||||
|
# 2. Verify we can't offline and fault an inactive traditional spare
|
||||||
|
# 3. Verify we can offline and fault an active traditional spare
|
||||||
|
# 4. Create draid pool with draid spare
|
||||||
|
# 5. Verify we can't offline/fault draid spare
|
||||||
|
|
||||||
|
TESTPOOL2=testpool2
|
||||||
|
function cleanup
|
||||||
|
{
|
||||||
|
destroy_pool $TESTPOOL2
|
||||||
|
log_must rm -f $TESTDIR/file-vdev-{1..3}
|
||||||
|
}
|
||||||
|
|
||||||
|
log_onexit cleanup
|
||||||
|
verify_runnable "global"
|
||||||
|
|
||||||
|
log_assert "Verify zpool offline has the correct behavior on spares"
|
||||||
|
|
||||||
|
# Verify any old file vdevs are gone
|
||||||
|
log_mustnot ls $TESTDIR/file-vdev-* &> /dev/null
|
||||||
|
|
||||||
|
log_must truncate -s 100M $TESTDIR/file-vdev-{1..3}
|
||||||
|
|
||||||
|
log_must zpool create $TESTPOOL2 mirror $TESTDIR/file-vdev-1 \
|
||||||
|
$TESTDIR/file-vdev-2 spare $TESTDIR/file-vdev-3
|
||||||
|
|
||||||
|
# Test that we can't offline an inactive spare
|
||||||
|
log_mustnot zpool offline $TESTPOOL2 $TESTDIR/file-vdev-3
|
||||||
|
log_mustnot zpool offline -f $TESTPOOL2 $TESTDIR/file-vdev-3
|
||||||
|
|
||||||
|
# Test that we can offline an active spare
|
||||||
|
log_must zpool replace $TESTPOOL2 $TESTDIR/file-vdev-1 $TESTDIR/file-vdev-3
|
||||||
|
log_must zpool offline $TESTPOOL2 $TESTDIR/file-vdev-3
|
||||||
|
log_must zpool online $TESTPOOL2 $TESTDIR/file-vdev-3
|
||||||
|
log_must zpool offline -f $TESTPOOL2 $TESTDIR/file-vdev-3
|
||||||
|
|
||||||
|
destroy_pool $TESTPOOL2
|
||||||
|
|
||||||
|
log_must zpool create -f $TESTPOOL2 draid1:1d:1s:3c $TESTDIR/file-vdev-{1..3}
|
||||||
|
|
||||||
|
# Test that we can't offline an inactive draid spare
|
||||||
|
log_mustnot zpool offline $TESTPOOL2 draid1-0-0
|
||||||
|
log_mustnot zpool offline -f $TESTPOOL2 draid1-0-0
|
||||||
|
|
||||||
|
# Test that we can't offline an active draid spare
|
||||||
|
log_must zpool replace $TESTPOOL2 $TESTDIR/file-vdev-1 draid1-0-0
|
||||||
|
log_mustnot zpool offline $TESTPOOL2 draid1-0-0
|
||||||
|
log_mustnot zpool offline -f $TESTPOOL2 draid1-0-0
|
||||||
|
|
||||||
|
log_pass "zpool offline has the correct behavior on spares"
|
||||||
@ -166,9 +166,8 @@ do
|
|||||||
|
|
||||||
mntpnt=$(get_prop mountpoint /$TESTPOOL)
|
mntpnt=$(get_prop mountpoint /$TESTPOOL)
|
||||||
|
|
||||||
# 2. Fault the spare device making it unavailable
|
# 2. Remove the spare device making it unavailable
|
||||||
log_must zpool offline -f $TESTPOOL $sparedev
|
log_must zpool remove $TESTPOOL $sparedev
|
||||||
log_must wait_hotspare_state $TESTPOOL $sparedev "FAULTED"
|
|
||||||
|
|
||||||
# 3. Simulate physical removal of one device
|
# 3. Simulate physical removal of one device
|
||||||
remove_disk $removedev
|
remove_disk $removedev
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user