mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-26 09:54:22 +03:00
zed: mark disks as REMOVED when they are removed
ZED does not take any action for disk removal events if there is no spare VDEV available. Added zpool_vdev_remove_wanted() in libzfs and vdev_remove_wanted() in vdev.c to remove the VDEV through ZED on removal event. This means that if you are running zed and remove a disk, it will be properly marked as REMOVED. Reviewed-by: Alexander Motin <mav@FreeBSD.org> Reviewed-by: Ryan Moeller <ryan@iXsystems.com> Reviewed-by: Tony Hutter <hutter2@llnl.gov> Signed-off-by: Ameer Hamza <ahamza@ixsystems.com> Closes #13797
This commit is contained in:
parent
eb9bec0a5d
commit
55c12724d3
@ -80,6 +80,7 @@ zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg)
|
||||
char *path = NULL;
|
||||
uint_t c, children;
|
||||
nvlist_t **child;
|
||||
uint64_t vdev_guid;
|
||||
|
||||
/*
|
||||
* First iterate over any children.
|
||||
@ -100,7 +101,7 @@ zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg)
|
||||
&child, &children) == 0) {
|
||||
for (c = 0; c < children; c++) {
|
||||
if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {
|
||||
gsp->gs_vdev_type = DEVICE_TYPE_L2ARC;
|
||||
gsp->gs_vdev_type = DEVICE_TYPE_SPARE;
|
||||
return (B_TRUE);
|
||||
}
|
||||
}
|
||||
@ -109,7 +110,7 @@ zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg)
|
||||
&child, &children) == 0) {
|
||||
for (c = 0; c < children; c++) {
|
||||
if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {
|
||||
gsp->gs_vdev_type = DEVICE_TYPE_SPARE;
|
||||
gsp->gs_vdev_type = DEVICE_TYPE_L2ARC;
|
||||
return (B_TRUE);
|
||||
}
|
||||
}
|
||||
@ -126,6 +127,21 @@ zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg)
|
||||
&gsp->gs_vdev_expandtime);
|
||||
return (B_TRUE);
|
||||
}
|
||||
/*
|
||||
* Otherwise, on a vdev guid match, grab the devid and expansion
|
||||
* time. The devid might be missing on removal since its not part
|
||||
* of blkid cache and L2ARC VDEV does not contain pool guid in its
|
||||
* blkid, so this is a special case for L2ARC VDEV.
|
||||
*/
|
||||
else if (gsp->gs_vdev_guid != 0 && gsp->gs_devid == NULL &&
|
||||
nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID, &vdev_guid) == 0 &&
|
||||
gsp->gs_vdev_guid == vdev_guid) {
|
||||
(void) nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID,
|
||||
&gsp->gs_devid);
|
||||
(void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_EXPANSION_TIME,
|
||||
&gsp->gs_vdev_expandtime);
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
return (B_FALSE);
|
||||
}
|
||||
@ -148,7 +164,7 @@ zfs_agent_iter_pool(zpool_handle_t *zhp, void *arg)
|
||||
/*
|
||||
* if a match was found then grab the pool guid
|
||||
*/
|
||||
if (gsp->gs_vdev_guid) {
|
||||
if (gsp->gs_vdev_guid && gsp->gs_devid) {
|
||||
(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
|
||||
&gsp->gs_pool_guid);
|
||||
}
|
||||
@ -195,11 +211,13 @@ zfs_agent_post_event(const char *class, const char *subclass, nvlist_t *nvl)
|
||||
uint64_t pool_guid = 0, vdev_guid = 0;
|
||||
guid_search_t search = { 0 };
|
||||
device_type_t devtype = DEVICE_TYPE_PRIMARY;
|
||||
char *devid = NULL;
|
||||
|
||||
class = "resource.fs.zfs.removed";
|
||||
subclass = "";
|
||||
|
||||
(void) nvlist_add_string(payload, FM_CLASS, class);
|
||||
(void) nvlist_lookup_string(nvl, DEV_IDENTIFIER, &devid);
|
||||
(void) nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &pool_guid);
|
||||
(void) nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &vdev_guid);
|
||||
|
||||
@ -209,20 +227,24 @@ zfs_agent_post_event(const char *class, const char *subclass, nvlist_t *nvl)
|
||||
(void) nvlist_add_int64_array(payload, FM_EREPORT_TIME, tod, 2);
|
||||
|
||||
/*
|
||||
* If devid is missing but vdev_guid is available, find devid
|
||||
* and pool_guid from vdev_guid.
|
||||
* For multipath, spare and l2arc devices ZFS_EV_VDEV_GUID or
|
||||
* ZFS_EV_POOL_GUID may be missing so find them.
|
||||
*/
|
||||
if (pool_guid == 0 || vdev_guid == 0) {
|
||||
if ((nvlist_lookup_string(nvl, DEV_IDENTIFIER,
|
||||
&search.gs_devid) == 0) &&
|
||||
(zpool_iter(g_zfs_hdl, zfs_agent_iter_pool, &search)
|
||||
== 1)) {
|
||||
if (pool_guid == 0)
|
||||
pool_guid = search.gs_pool_guid;
|
||||
if (vdev_guid == 0)
|
||||
vdev_guid = search.gs_vdev_guid;
|
||||
devtype = search.gs_vdev_type;
|
||||
}
|
||||
if (devid == NULL || pool_guid == 0 || vdev_guid == 0) {
|
||||
if (devid == NULL)
|
||||
search.gs_vdev_guid = vdev_guid;
|
||||
else
|
||||
search.gs_devid = devid;
|
||||
zpool_iter(g_zfs_hdl, zfs_agent_iter_pool, &search);
|
||||
if (devid == NULL)
|
||||
devid = search.gs_devid;
|
||||
if (pool_guid == 0)
|
||||
pool_guid = search.gs_pool_guid;
|
||||
if (vdev_guid == 0)
|
||||
vdev_guid = search.gs_vdev_guid;
|
||||
devtype = search.gs_vdev_type;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -235,7 +257,9 @@ zfs_agent_post_event(const char *class, const char *subclass, nvlist_t *nvl)
|
||||
search.gs_vdev_expandtime + 10 > tv.tv_sec) {
|
||||
zed_log_msg(LOG_INFO, "agent post event: ignoring '%s' "
|
||||
"for recently expanded device '%s'", EC_DEV_REMOVE,
|
||||
search.gs_devid);
|
||||
devid);
|
||||
fnvlist_free(payload);
|
||||
free(event);
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
@ -323,6 +323,9 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
|
||||
boolean_t is_disk;
|
||||
vdev_aux_t aux;
|
||||
uint64_t state = 0;
|
||||
int l2arc;
|
||||
vdev_stat_t *vs;
|
||||
unsigned int c;
|
||||
|
||||
fmd_hdl_debug(hdl, "zfs_retire_recv: '%s'", class);
|
||||
|
||||
@ -352,13 +355,32 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
|
||||
|
||||
devname = zpool_vdev_name(NULL, zhp, vdev, B_FALSE);
|
||||
|
||||
/* Can't replace l2arc with a spare: offline the device */
|
||||
if (nvlist_lookup_string(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE,
|
||||
&devtype) == 0 && strcmp(devtype, VDEV_TYPE_L2CACHE) == 0) {
|
||||
fmd_hdl_debug(hdl, "zpool_vdev_offline '%s'", devname);
|
||||
zpool_vdev_offline(zhp, devname, B_TRUE);
|
||||
} else if (!fmd_prop_get_int32(hdl, "spare_on_remove") ||
|
||||
replace_with_spare(hdl, zhp, vdev) == B_FALSE) {
|
||||
nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS,
|
||||
(uint64_t **)&vs, &c);
|
||||
|
||||
/*
|
||||
* If state removed is requested for already removed vdev,
|
||||
* its a loopback event from spa_async_remove(). Just
|
||||
* ignore it.
|
||||
*/
|
||||
if (vs->vs_state == VDEV_STATE_REMOVED &&
|
||||
state == VDEV_STATE_REMOVED)
|
||||
return;
|
||||
|
||||
l2arc = (nvlist_lookup_string(nvl,
|
||||
FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, &devtype) == 0 &&
|
||||
strcmp(devtype, VDEV_TYPE_L2CACHE) == 0);
|
||||
|
||||
/* Remove the vdev since device is unplugged */
|
||||
if (l2arc || (strcmp(class, "resource.fs.zfs.removed") == 0)) {
|
||||
int status = zpool_vdev_remove_wanted(zhp, devname);
|
||||
fmd_hdl_debug(hdl, "zpool_vdev_remove_wanted '%s'"
|
||||
", ret:%d", devname, status);
|
||||
}
|
||||
|
||||
/* Replace the vdev with a spare if its not a l2arc */
|
||||
if (!l2arc && (!fmd_prop_get_int32(hdl, "spare_on_remove") ||
|
||||
replace_with_spare(hdl, zhp, vdev) == B_FALSE)) {
|
||||
/* Could not handle with spare */
|
||||
fmd_hdl_debug(hdl, "no spare for '%s'", devname);
|
||||
}
|
||||
|
@ -1166,7 +1166,7 @@ ztest_kill(ztest_shared_t *zs)
|
||||
* See comment above spa_write_cachefile().
|
||||
*/
|
||||
mutex_enter(&spa_namespace_lock);
|
||||
spa_write_cachefile(ztest_spa, B_FALSE, B_FALSE);
|
||||
spa_write_cachefile(ztest_spa, B_FALSE, B_FALSE, B_FALSE);
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
|
||||
(void) raise(SIGKILL);
|
||||
|
@ -103,6 +103,57 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_CHECK_DISK_CHANGE], [
|
||||
])
|
||||
])
|
||||
|
||||
dnl #
|
||||
dnl # bdev_kobj() is introduced from 5.12
|
||||
dnl #
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_KOBJ], [
|
||||
ZFS_LINUX_TEST_SRC([bdev_kobj], [
|
||||
#include <linux/fs.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/kobject.h>
|
||||
], [
|
||||
struct block_device *bdev = NULL;
|
||||
struct kobject *disk_kobj;
|
||||
disk_kobj = bdev_kobj(bdev);
|
||||
])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BDEV_KOBJ], [
|
||||
AC_MSG_CHECKING([whether bdev_kobj() exists])
|
||||
ZFS_LINUX_TEST_RESULT([bdev_kobj], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_BDEV_KOBJ, 1,
|
||||
[bdev_kobj() exists])
|
||||
], [
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
])
|
||||
|
||||
dnl #
|
||||
dnl # part_to_dev() was removed in 5.12
|
||||
dnl #
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_PART_TO_DEV], [
|
||||
ZFS_LINUX_TEST_SRC([part_to_dev], [
|
||||
#include <linux/fs.h>
|
||||
#include <linux/blkdev.h>
|
||||
], [
|
||||
struct hd_struct *p = NULL;
|
||||
struct device *pdev;
|
||||
pdev = part_to_dev(p);
|
||||
])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_PART_TO_DEV], [
|
||||
AC_MSG_CHECKING([whether part_to_dev() exists])
|
||||
ZFS_LINUX_TEST_RESULT([part_to_dev], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_PART_TO_DEV, 1,
|
||||
[part_to_dev() exists])
|
||||
], [
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
])
|
||||
|
||||
dnl #
|
||||
dnl # 5.10 API, check_disk_change() is removed, in favor of
|
||||
dnl # bdev_check_media_change(), which doesn't force revalidation
|
||||
@ -405,6 +456,8 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV], [
|
||||
ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_WHOLE
|
||||
ZFS_AC_KERNEL_SRC_BLKDEV_BDEVNAME
|
||||
ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_SECURE_ERASE
|
||||
ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_KOBJ
|
||||
ZFS_AC_KERNEL_SRC_BLKDEV_PART_TO_DEV
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_BLKDEV], [
|
||||
@ -421,4 +474,6 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV], [
|
||||
ZFS_AC_KERNEL_BLKDEV_BDEVNAME
|
||||
ZFS_AC_KERNEL_BLKDEV_GET_ERESTARTSYS
|
||||
ZFS_AC_KERNEL_BLKDEV_ISSUE_SECURE_ERASE
|
||||
ZFS_AC_KERNEL_BLKDEV_BDEV_KOBJ
|
||||
ZFS_AC_KERNEL_BLKDEV_PART_TO_DEV
|
||||
])
|
||||
|
@ -310,6 +310,7 @@ _LIBZFS_H int zpool_vdev_indirect_size(zpool_handle_t *, const char *,
|
||||
uint64_t *);
|
||||
_LIBZFS_H int zpool_vdev_split(zpool_handle_t *, char *, nvlist_t **,
|
||||
nvlist_t *, splitflags_t);
|
||||
_LIBZFS_H int zpool_vdev_remove_wanted(zpool_handle_t *, const char *);
|
||||
|
||||
_LIBZFS_H int zpool_vdev_fault(zpool_handle_t *, uint64_t, vdev_aux_t);
|
||||
_LIBZFS_H int zpool_vdev_degrade(zpool_handle_t *, uint64_t, vdev_aux_t);
|
||||
|
@ -71,6 +71,7 @@ kernel_spl_sys_HEADERS = \
|
||||
%D%/spl/sys/kmem_cache.h \
|
||||
%D%/spl/sys/kstat.h \
|
||||
%D%/spl/sys/list.h \
|
||||
%D%/spl/sys/misc.h \
|
||||
%D%/spl/sys/mod_os.h \
|
||||
%D%/spl/sys/mutex.h \
|
||||
%D%/spl/sys/param.h \
|
||||
|
@ -261,6 +261,32 @@ bio_set_bi_error(struct bio *bio, int error)
|
||||
#define BIO_END_IO(bio, error) bio_endio(bio, error);
|
||||
#endif /* HAVE_1ARG_BIO_END_IO_T */
|
||||
|
||||
/*
|
||||
* 5.15 MACRO,
|
||||
* GD_DEAD
|
||||
*
|
||||
* 2.6.36 - 5.14 MACRO,
|
||||
* GENHD_FL_UP
|
||||
*
|
||||
* Check the disk status and return B_TRUE if alive
|
||||
* otherwise B_FALSE
|
||||
*/
|
||||
static inline boolean_t
|
||||
zfs_check_disk_status(struct block_device *bdev)
|
||||
{
|
||||
#if defined(GENHD_FL_UP)
|
||||
return (!!(bdev->bd_disk->flags & GENHD_FL_UP));
|
||||
#elif defined(GD_DEAD)
|
||||
return (!test_bit(GD_DEAD, &bdev->bd_disk->state));
|
||||
#else
|
||||
/*
|
||||
* This is encountered if neither GENHD_FL_UP nor GD_DEAD is available in
|
||||
* the kernel - likely due to an MACRO change that needs to be chased down.
|
||||
*/
|
||||
#error "Unsupported kernel: no usable disk status check"
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* 4.1 API,
|
||||
* 3.10.0 CentOS 7.x API,
|
||||
|
29
include/os/linux/spl/sys/misc.h
Normal file
29
include/os/linux/spl/sys/misc.h
Normal file
@ -0,0 +1,29 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or https://opensource.org/licenses/CDDL-1.0.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
#ifndef _OS_LINUX_SPL_MISC_H
|
||||
#define _OS_LINUX_SPL_MISC_H
|
||||
|
||||
#include <linux/kobject.h>
|
||||
|
||||
extern void spl_signal_kobj_evt(struct block_device *bdev);
|
||||
|
||||
#endif
|
@ -838,7 +838,7 @@ extern kmutex_t spa_namespace_lock;
|
||||
#define SPA_CONFIG_UPDATE_POOL 0
|
||||
#define SPA_CONFIG_UPDATE_VDEVS 1
|
||||
|
||||
extern void spa_write_cachefile(spa_t *, boolean_t, boolean_t);
|
||||
extern void spa_write_cachefile(spa_t *, boolean_t, boolean_t, boolean_t);
|
||||
extern void spa_config_load(void);
|
||||
extern nvlist_t *spa_all_configs(uint64_t *);
|
||||
extern void spa_config_set(spa_t *spa, nvlist_t *config);
|
||||
|
@ -148,6 +148,7 @@ extern int vdev_degrade(spa_t *spa, uint64_t guid, vdev_aux_t aux);
|
||||
extern int vdev_online(spa_t *spa, uint64_t guid, uint64_t flags,
|
||||
vdev_state_t *);
|
||||
extern int vdev_offline(spa_t *spa, uint64_t guid, uint64_t flags);
|
||||
extern int vdev_remove_wanted(spa_t *spa, uint64_t guid);
|
||||
extern void vdev_clear(spa_t *spa, vdev_t *vd);
|
||||
|
||||
extern boolean_t vdev_is_dead(vdev_t *vd);
|
||||
@ -190,6 +191,8 @@ typedef enum vdev_config_flag {
|
||||
VDEV_CONFIG_MISSING = 1 << 4
|
||||
} vdev_config_flag_t;
|
||||
|
||||
extern void vdev_post_kobj_evt(vdev_t *vd);
|
||||
extern void vdev_clear_kobj_evt(vdev_t *vd);
|
||||
extern void vdev_top_config_generate(spa_t *spa, nvlist_t *config);
|
||||
extern nvlist_t *vdev_config_generate(spa_t *spa, vdev_t *vd,
|
||||
boolean_t getstats, vdev_config_flag_t flags);
|
||||
|
@ -69,6 +69,7 @@ extern uint_t zfs_vdev_async_write_max_active;
|
||||
* Virtual device operations
|
||||
*/
|
||||
typedef int vdev_init_func_t(spa_t *spa, nvlist_t *nv, void **tsd);
|
||||
typedef void vdev_kobj_post_evt_func_t(vdev_t *vd);
|
||||
typedef void vdev_fini_func_t(vdev_t *vd);
|
||||
typedef int vdev_open_func_t(vdev_t *vd, uint64_t *size, uint64_t *max_size,
|
||||
uint64_t *ashift, uint64_t *pshift);
|
||||
@ -123,6 +124,7 @@ typedef const struct vdev_ops {
|
||||
vdev_config_generate_func_t *vdev_op_config_generate;
|
||||
vdev_nparity_func_t *vdev_op_nparity;
|
||||
vdev_ndisks_func_t *vdev_op_ndisks;
|
||||
vdev_kobj_post_evt_func_t *vdev_op_kobj_evt_post;
|
||||
char vdev_op_type[16];
|
||||
boolean_t vdev_op_leaf;
|
||||
} vdev_ops_t;
|
||||
@ -436,6 +438,7 @@ struct vdev {
|
||||
boolean_t vdev_isl2cache; /* was a l2cache device */
|
||||
boolean_t vdev_copy_uberblocks; /* post expand copy uberblocks */
|
||||
boolean_t vdev_resilver_deferred; /* resilver deferred */
|
||||
boolean_t vdev_kobj_flag; /* kobj event record */
|
||||
vdev_queue_t vdev_queue; /* I/O deadline schedule queue */
|
||||
vdev_cache_t vdev_cache; /* physical block cache */
|
||||
spa_aux_vdev_t *vdev_aux; /* for l2cache and spares vdevs */
|
||||
|
@ -50,6 +50,7 @@ extern "C" {
|
||||
#include <sys/kmem.h>
|
||||
#include <sys/kmem_cache.h>
|
||||
#include <sys/vmem.h>
|
||||
#include <sys/misc.h>
|
||||
#include <sys/taskq.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/disp.h>
|
||||
|
@ -550,6 +550,7 @@
|
||||
<elf-symbol name='zpool_vdev_path_to_guid' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zpool_vdev_remove' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zpool_vdev_remove_cancel' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zpool_vdev_remove_wanted' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zpool_vdev_split' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zpool_wait' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zpool_wait_status' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
@ -3505,6 +3506,11 @@
|
||||
<parameter type-id='c19b74c3' name='istmp'/>
|
||||
<return type-id='95e97e5e'/>
|
||||
</function-decl>
|
||||
<function-decl name='zpool_vdev_remove_wanted' mangled-name='zpool_vdev_remove_wanted' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_vdev_remove_wanted'>
|
||||
<parameter type-id='4c81de99' name='zhp'/>
|
||||
<parameter type-id='80f4b756' name='path'/>
|
||||
<return type-id='95e97e5e'/>
|
||||
</function-decl>
|
||||
<function-decl name='zpool_vdev_fault' mangled-name='zpool_vdev_fault' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_vdev_fault'>
|
||||
<parameter type-id='4c81de99' name='zhp'/>
|
||||
<parameter type-id='9c313c2d' name='guid'/>
|
||||
|
@ -3073,6 +3073,43 @@ zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove the specified vdev asynchronously from the configuration, so
|
||||
* that it may come ONLINE if reinserted. This is called from zed on
|
||||
* Udev remove event.
|
||||
* Note: We also have a similar function zpool_vdev_remove() that
|
||||
* removes the vdev from the pool.
|
||||
*/
|
||||
int
|
||||
zpool_vdev_remove_wanted(zpool_handle_t *zhp, const char *path)
|
||||
{
|
||||
zfs_cmd_t zc = {"\0"};
|
||||
char errbuf[ERRBUFLEN];
|
||||
nvlist_t *tgt;
|
||||
boolean_t avail_spare, l2cache;
|
||||
libzfs_handle_t *hdl = zhp->zpool_hdl;
|
||||
|
||||
(void) snprintf(errbuf, sizeof (errbuf),
|
||||
dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
|
||||
|
||||
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
|
||||
if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
|
||||
NULL)) == NULL)
|
||||
return (zfs_error(hdl, EZFS_NODEVICE, errbuf));
|
||||
|
||||
zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID);
|
||||
|
||||
if (avail_spare)
|
||||
return (zfs_error(hdl, EZFS_ISSPARE, errbuf));
|
||||
|
||||
zc.zc_cookie = VDEV_STATE_REMOVED;
|
||||
|
||||
if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
|
||||
return (0);
|
||||
|
||||
return (zpool_standard_error(hdl, errno, errbuf));
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark the given vdev faulted.
|
||||
*/
|
||||
|
@ -47,6 +47,7 @@
|
||||
#include <linux/mod_compat.h>
|
||||
#include <sys/cred.h>
|
||||
#include <sys/vnode.h>
|
||||
#include <sys/misc.h>
|
||||
|
||||
unsigned long spl_hostid = 0;
|
||||
EXPORT_SYMBOL(spl_hostid);
|
||||
@ -517,6 +518,38 @@ ddi_copyin(const void *from, void *to, size_t len, int flags)
|
||||
}
|
||||
EXPORT_SYMBOL(ddi_copyin);
|
||||
|
||||
/*
|
||||
* Post a uevent to userspace whenever a new vdev adds to the pool. It is
|
||||
* necessary to sync blkid information with udev, which zed daemon uses
|
||||
* during device hotplug to identify the vdev.
|
||||
*/
|
||||
void
|
||||
spl_signal_kobj_evt(struct block_device *bdev)
|
||||
{
|
||||
#if defined(HAVE_BDEV_KOBJ) || defined(HAVE_PART_TO_DEV)
|
||||
#ifdef HAVE_BDEV_KOBJ
|
||||
struct kobject *disk_kobj = bdev_kobj(bdev);
|
||||
#else
|
||||
struct kobject *disk_kobj = &part_to_dev(bdev->bd_part)->kobj;
|
||||
#endif
|
||||
if (disk_kobj) {
|
||||
int ret = kobject_uevent(disk_kobj, KOBJ_CHANGE);
|
||||
if (ret) {
|
||||
pr_warn("ZFS: Sending event '%d' to kobject: '%s'"
|
||||
" (%p): failed(ret:%d)\n", KOBJ_CHANGE,
|
||||
kobject_name(disk_kobj), disk_kobj, ret);
|
||||
}
|
||||
}
|
||||
#else
|
||||
/*
|
||||
* This is encountered if neither bdev_kobj() nor part_to_dev() is available
|
||||
* in the kernel - likely due to an API change that needs to be chased down.
|
||||
*/
|
||||
#error "Unsupported kernel: unable to get struct kobj from bdev"
|
||||
#endif
|
||||
}
|
||||
EXPORT_SYMBOL(spl_signal_kobj_evt);
|
||||
|
||||
int
|
||||
ddi_copyout(const void *from, void *to, size_t len, int flags)
|
||||
{
|
||||
|
@ -179,6 +179,18 @@ vdev_disk_error(zio_t *zio)
|
||||
zio->io_flags);
|
||||
}
|
||||
|
||||
static void
|
||||
vdev_disk_kobj_evt_post(vdev_t *v)
|
||||
{
|
||||
vdev_disk_t *vd = v->vdev_tsd;
|
||||
if (vd && vd->vd_bdev) {
|
||||
spl_signal_kobj_evt(vd->vd_bdev);
|
||||
} else {
|
||||
vdev_dbgmsg(v, "vdev_disk_t is NULL for VDEV:%s\n",
|
||||
v->vdev_path);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
|
||||
uint64_t *logical_ashift, uint64_t *physical_ashift)
|
||||
@ -290,6 +302,13 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
|
||||
bdev = blkdev_get_by_path(v->vdev_path, mode | FMODE_EXCL,
|
||||
zfs_vdev_holder);
|
||||
if (unlikely(PTR_ERR(bdev) == -ENOENT)) {
|
||||
/*
|
||||
* There is no point of waiting since device is removed
|
||||
* explicitly
|
||||
*/
|
||||
if (v->vdev_removed)
|
||||
break;
|
||||
|
||||
schedule_timeout(MSEC_TO_TICK(10));
|
||||
} else if (unlikely(PTR_ERR(bdev) == -ERESTARTSYS)) {
|
||||
timeout = MSEC2NSEC(zfs_vdev_open_timeout_ms * 10);
|
||||
@ -901,7 +920,7 @@ vdev_disk_io_done(zio_t *zio)
|
||||
vdev_t *v = zio->io_vd;
|
||||
vdev_disk_t *vd = v->vdev_tsd;
|
||||
|
||||
if (zfs_check_media_change(vd->vd_bdev)) {
|
||||
if (!zfs_check_disk_status(vd->vd_bdev)) {
|
||||
invalidate_bdev(vd->vd_bdev);
|
||||
v->vdev_remove_wanted = B_TRUE;
|
||||
spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE);
|
||||
@ -957,7 +976,8 @@ vdev_ops_t vdev_disk_ops = {
|
||||
.vdev_op_nparity = NULL,
|
||||
.vdev_op_ndisks = NULL,
|
||||
.vdev_op_type = VDEV_TYPE_DISK, /* name of this vdev type */
|
||||
.vdev_op_leaf = B_TRUE /* leaf vdev */
|
||||
.vdev_op_leaf = B_TRUE, /* leaf vdev */
|
||||
.vdev_op_kobj_evt_post = vdev_disk_kobj_evt_post
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -909,7 +909,16 @@ spa_change_guid(spa_t *spa)
|
||||
spa_change_guid_sync, &guid, 5, ZFS_SPACE_CHECK_RESERVED);
|
||||
|
||||
if (error == 0) {
|
||||
spa_write_cachefile(spa, B_FALSE, B_TRUE);
|
||||
/*
|
||||
* Clear the kobj flag from all the vdevs to allow
|
||||
* vdev_cache_process_kobj_evt() to post events to all the
|
||||
* vdevs since GUID is updated.
|
||||
*/
|
||||
vdev_clear_kobj_evt(spa->spa_root_vdev);
|
||||
for (int i = 0; i < spa->spa_l2cache.sav_count; i++)
|
||||
vdev_clear_kobj_evt(spa->spa_l2cache.sav_vdevs[i]);
|
||||
|
||||
spa_write_cachefile(spa, B_FALSE, B_TRUE, B_TRUE);
|
||||
spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_REGUID);
|
||||
}
|
||||
|
||||
@ -5220,7 +5229,7 @@ spa_open_common(const char *pool, spa_t **spapp, const void *tag,
|
||||
*/
|
||||
spa_unload(spa);
|
||||
spa_deactivate(spa);
|
||||
spa_write_cachefile(spa, B_TRUE, B_TRUE);
|
||||
spa_write_cachefile(spa, B_TRUE, B_TRUE, B_FALSE);
|
||||
spa_remove(spa);
|
||||
if (locked)
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
@ -6044,7 +6053,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
|
||||
|
||||
spa_spawn_aux_threads(spa);
|
||||
|
||||
spa_write_cachefile(spa, B_FALSE, B_TRUE);
|
||||
spa_write_cachefile(spa, B_FALSE, B_TRUE, B_TRUE);
|
||||
|
||||
/*
|
||||
* Don't count references from objsets that are already closed
|
||||
@ -6107,7 +6116,7 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
|
||||
if (props != NULL)
|
||||
spa_configfile_set(spa, props, B_FALSE);
|
||||
|
||||
spa_write_cachefile(spa, B_FALSE, B_TRUE);
|
||||
spa_write_cachefile(spa, B_FALSE, B_TRUE, B_FALSE);
|
||||
spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_IMPORT);
|
||||
zfs_dbgmsg("spa_import: verbatim import of %s", pool);
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
@ -6503,7 +6512,7 @@ export_spa:
|
||||
|
||||
if (new_state != POOL_STATE_UNINITIALIZED) {
|
||||
if (!hardforce)
|
||||
spa_write_cachefile(spa, B_TRUE, B_TRUE);
|
||||
spa_write_cachefile(spa, B_TRUE, B_TRUE, B_FALSE);
|
||||
spa_remove(spa);
|
||||
} else {
|
||||
/*
|
||||
|
@ -240,7 +240,8 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl)
|
||||
* would be required.
|
||||
*/
|
||||
void
|
||||
spa_write_cachefile(spa_t *target, boolean_t removing, boolean_t postsysevent)
|
||||
spa_write_cachefile(spa_t *target, boolean_t removing, boolean_t postsysevent,
|
||||
boolean_t postblkidevent)
|
||||
{
|
||||
spa_config_dirent_t *dp, *tdp;
|
||||
nvlist_t *nvl;
|
||||
@ -346,6 +347,16 @@ spa_write_cachefile(spa_t *target, boolean_t removing, boolean_t postsysevent)
|
||||
|
||||
if (postsysevent)
|
||||
spa_event_notify(target, NULL, NULL, ESC_ZFS_CONFIG_SYNC);
|
||||
|
||||
/*
|
||||
* Post udev event to sync blkid information if the pool is created
|
||||
* or a new vdev is added to the pool.
|
||||
*/
|
||||
if ((target->spa_root_vdev) && postblkidevent) {
|
||||
vdev_post_kobj_evt(target->spa_root_vdev);
|
||||
for (int i = 0; i < target->spa_l2cache.sav_count; i++)
|
||||
vdev_post_kobj_evt(target->spa_l2cache.sav_vdevs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -600,6 +611,7 @@ spa_config_update(spa_t *spa, int what)
|
||||
*/
|
||||
if (!spa->spa_is_root) {
|
||||
spa_write_cachefile(spa, B_FALSE,
|
||||
what != SPA_CONFIG_UPDATE_POOL,
|
||||
what != SPA_CONFIG_UPDATE_POOL);
|
||||
}
|
||||
|
||||
|
@ -1290,7 +1290,7 @@ spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error,
|
||||
* If the config changed, update the config cache.
|
||||
*/
|
||||
if (config_changed)
|
||||
spa_write_cachefile(spa, B_FALSE, B_TRUE);
|
||||
spa_write_cachefile(spa, B_FALSE, B_TRUE, B_FALSE);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1385,7 +1385,7 @@ spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error)
|
||||
*/
|
||||
if (config_changed) {
|
||||
mutex_enter(&spa_namespace_lock);
|
||||
spa_write_cachefile(spa, B_FALSE, B_TRUE);
|
||||
spa_write_cachefile(spa, B_FALSE, B_TRUE, B_FALSE);
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
}
|
||||
|
||||
|
@ -1948,6 +1948,14 @@ vdev_open(vdev_t *vd)
|
||||
|
||||
error = vd->vdev_ops->vdev_op_open(vd, &osize, &max_osize,
|
||||
&logical_ashift, &physical_ashift);
|
||||
|
||||
/* Keep the device in removed state if unplugged */
|
||||
if (error == ENOENT && vd->vdev_removed) {
|
||||
vdev_set_state(vd, B_TRUE, VDEV_STATE_REMOVED,
|
||||
VDEV_AUX_NONE);
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Physical volume size should never be larger than its max size, unless
|
||||
* the disk has shrunk while we were reading it or the device is buggy
|
||||
@ -3166,6 +3174,34 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
|
||||
mutex_exit(&vd->vdev_dtl_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Iterate over all the vdevs except spare, and post kobj events
|
||||
*/
|
||||
void
|
||||
vdev_post_kobj_evt(vdev_t *vd)
|
||||
{
|
||||
if (vd->vdev_ops->vdev_op_kobj_evt_post &&
|
||||
vd->vdev_kobj_flag == B_FALSE) {
|
||||
vd->vdev_kobj_flag = B_TRUE;
|
||||
vd->vdev_ops->vdev_op_kobj_evt_post(vd);
|
||||
}
|
||||
|
||||
for (int c = 0; c < vd->vdev_children; c++)
|
||||
vdev_post_kobj_evt(vd->vdev_child[c]);
|
||||
}
|
||||
|
||||
/*
|
||||
* Iterate over all the vdevs except spare, and clear kobj events
|
||||
*/
|
||||
void
|
||||
vdev_clear_kobj_evt(vdev_t *vd)
|
||||
{
|
||||
vd->vdev_kobj_flag = B_FALSE;
|
||||
|
||||
for (int c = 0; c < vd->vdev_children; c++)
|
||||
vdev_clear_kobj_evt(vd->vdev_child[c]);
|
||||
}
|
||||
|
||||
int
|
||||
vdev_dtl_load(vdev_t *vd)
|
||||
{
|
||||
@ -3947,6 +3983,29 @@ vdev_degrade(spa_t *spa, uint64_t guid, vdev_aux_t aux)
|
||||
return (spa_vdev_state_exit(spa, vd, 0));
|
||||
}
|
||||
|
||||
int
|
||||
vdev_remove_wanted(spa_t *spa, uint64_t guid)
|
||||
{
|
||||
vdev_t *vd;
|
||||
|
||||
spa_vdev_state_enter(spa, SCL_NONE);
|
||||
|
||||
if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL)
|
||||
return (spa_vdev_state_exit(spa, NULL, SET_ERROR(ENODEV)));
|
||||
|
||||
/*
|
||||
* If the vdev is already removed, then don't do anything.
|
||||
*/
|
||||
if (vd->vdev_removed)
|
||||
return (spa_vdev_state_exit(spa, NULL, 0));
|
||||
|
||||
vd->vdev_remove_wanted = B_TRUE;
|
||||
spa_async_request(spa, SPA_ASYNC_REMOVE);
|
||||
|
||||
return (spa_vdev_state_exit(spa, vd, 0));
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Online the given vdev.
|
||||
*
|
||||
|
@ -1912,6 +1912,10 @@ zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
|
||||
error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
|
||||
break;
|
||||
|
||||
case VDEV_STATE_REMOVED:
|
||||
error = vdev_remove_wanted(spa, zc->zc_guid);
|
||||
break;
|
||||
|
||||
default:
|
||||
error = SET_ERROR(EINVAL);
|
||||
}
|
||||
@ -2928,7 +2932,7 @@ zfs_ioc_pool_set_props(zfs_cmd_t *zc)
|
||||
mutex_enter(&spa_namespace_lock);
|
||||
if ((spa = spa_lookup(zc->zc_name)) != NULL) {
|
||||
spa_configfile_set(spa, props, B_FALSE);
|
||||
spa_write_cachefile(spa, B_FALSE, B_TRUE);
|
||||
spa_write_cachefile(spa, B_FALSE, B_TRUE, B_FALSE);
|
||||
}
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
if (spa != NULL) {
|
||||
|
@ -3923,7 +3923,7 @@ zio_vdev_io_done(zio_t *zio)
|
||||
|
||||
ops->vdev_op_io_done(zio);
|
||||
|
||||
if (unexpected_error)
|
||||
if (unexpected_error && vd->vdev_remove_wanted == B_FALSE)
|
||||
VERIFY(vdev_probe(vd, zio) == NULL);
|
||||
|
||||
return (zio);
|
||||
|
@ -1884,7 +1884,7 @@ function wait_hotspare_state # pool disk state timeout
|
||||
#
|
||||
# Return 0 is pool/disk matches expected state, 1 otherwise
|
||||
#
|
||||
function check_vdev_state # pool disk state{online,offline,unavail}
|
||||
function check_vdev_state # pool disk state{online,offline,unavail,removed}
|
||||
{
|
||||
typeset pool=$1
|
||||
typeset disk=${2#*$DEV_DSKDIR/}
|
||||
|
@ -24,29 +24,28 @@
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# Testing Fault Management Agent ZED Logic - Physically removed device is
|
||||
# made unavail and onlined when reattached
|
||||
# Testing Fault Management Agent ZED Logic - Physically detached device is
|
||||
# made removed and onlined when reattached
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create a pool
|
||||
# 2. Simulate physical removal of one device
|
||||
# 3. Verify the device is unavailable
|
||||
# 3. Verify the device is removed when detached
|
||||
# 4. Reattach the device
|
||||
# 5. Verify the device is onlined
|
||||
# 6. Repeat the same tests with a spare device:
|
||||
# zed will use the spare to handle the removed data device
|
||||
# 7. Repeat the same tests again with a faulted spare device:
|
||||
# the removed data device should be unavailable
|
||||
# the removed data device should be removed
|
||||
#
|
||||
# NOTE: the use of 'block_device_wait' throughout the test helps avoid race
|
||||
# conditions caused by mixing creation/removal events from partitioning the
|
||||
# disk (zpool create) and events from physically removing it (remove_disk).
|
||||
#
|
||||
# NOTE: the test relies on 'zpool sync' to prompt the kmods to transition a
|
||||
# vdev to the unavailable state. The ZED does receive a removal notification
|
||||
# but only relies on it to activate a hot spare. Additional work is planned
|
||||
# to extend an existing ioctl interface to allow the ZED to transition the
|
||||
# vdev in to a removed state.
|
||||
# NOTE: the test relies on ZED to transit state to removed on device removed
|
||||
# event. The ZED does receive a removal notification but only relies on it to
|
||||
# activate a hot spare. Additional work is planned to extend an existing ioctl
|
||||
# interface to allow the ZED to transition the vdev in to a removed state.
|
||||
#
|
||||
verify_runnable "both"
|
||||
|
||||
@ -103,8 +102,8 @@ do
|
||||
log_must mkfile 1m $mntpnt/file
|
||||
sync_pool $TESTPOOL
|
||||
|
||||
# 3. Verify the device is unavailable.
|
||||
log_must wait_vdev_state $TESTPOOL $removedev "UNAVAIL"
|
||||
# 3. Verify the device is removed.
|
||||
log_must wait_vdev_state $TESTPOOL $removedev "REMOVED"
|
||||
|
||||
# 4. Reattach the device
|
||||
insert_disk $removedev
|
||||
@ -136,7 +135,7 @@ do
|
||||
|
||||
# 3. Verify the device is handled by the spare.
|
||||
log_must wait_hotspare_state $TESTPOOL $sparedev "INUSE"
|
||||
log_must wait_vdev_state $TESTPOOL $removedev "UNAVAIL"
|
||||
log_must wait_vdev_state $TESTPOOL $removedev "REMOVED"
|
||||
|
||||
# 4. Reattach the device
|
||||
insert_disk $removedev
|
||||
@ -170,8 +169,8 @@ do
|
||||
log_must mkfile 1m $mntpnt/file
|
||||
sync_pool $TESTPOOL
|
||||
|
||||
# 4. Verify the device is unavailable
|
||||
log_must wait_vdev_state $TESTPOOL $removedev "UNAVAIL"
|
||||
# 4. Verify the device is removed
|
||||
log_must wait_vdev_state $TESTPOOL $removedev "REMOVED"
|
||||
|
||||
# 5. Reattach the device
|
||||
insert_disk $removedev
|
||||
|
Loading…
Reference in New Issue
Block a user