zed: mark disks as REMOVED when they are removed

ZED does not take any action for disk removal events if there is no
spare VDEV available. Added zpool_vdev_remove_wanted() in libzfs
and vdev_remove_wanted() in vdev.c to remove the VDEV through ZED
on removal event.  This means that if you are running zed and
remove a disk, it will be properly marked as REMOVED.

Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Ameer Hamza <ahamza@ixsystems.com>
Closes #13797
This commit is contained in:
Ameer Hamza 2022-09-28 21:48:46 +05:00 committed by GitHub
parent eb9bec0a5d
commit 55c12724d3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
24 changed files with 395 additions and 51 deletions

View File

@ -80,6 +80,7 @@ zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg)
char *path = NULL; char *path = NULL;
uint_t c, children; uint_t c, children;
nvlist_t **child; nvlist_t **child;
uint64_t vdev_guid;
/* /*
* First iterate over any children. * First iterate over any children.
@ -100,7 +101,7 @@ zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg)
&child, &children) == 0) { &child, &children) == 0) {
for (c = 0; c < children; c++) { for (c = 0; c < children; c++) {
if (zfs_agent_iter_vdev(zhp, child[c], gsp)) { if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {
gsp->gs_vdev_type = DEVICE_TYPE_L2ARC; gsp->gs_vdev_type = DEVICE_TYPE_SPARE;
return (B_TRUE); return (B_TRUE);
} }
} }
@ -109,7 +110,7 @@ zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg)
&child, &children) == 0) { &child, &children) == 0) {
for (c = 0; c < children; c++) { for (c = 0; c < children; c++) {
if (zfs_agent_iter_vdev(zhp, child[c], gsp)) { if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {
gsp->gs_vdev_type = DEVICE_TYPE_SPARE; gsp->gs_vdev_type = DEVICE_TYPE_L2ARC;
return (B_TRUE); return (B_TRUE);
} }
} }
@ -126,6 +127,21 @@ zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg)
&gsp->gs_vdev_expandtime); &gsp->gs_vdev_expandtime);
return (B_TRUE); return (B_TRUE);
} }
/*
* Otherwise, on a vdev guid match, grab the devid and expansion
* time. The devid might be missing on removal since its not part
* of blkid cache and L2ARC VDEV does not contain pool guid in its
* blkid, so this is a special case for L2ARC VDEV.
*/
else if (gsp->gs_vdev_guid != 0 && gsp->gs_devid == NULL &&
nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID, &vdev_guid) == 0 &&
gsp->gs_vdev_guid == vdev_guid) {
(void) nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID,
&gsp->gs_devid);
(void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_EXPANSION_TIME,
&gsp->gs_vdev_expandtime);
return (B_TRUE);
}
return (B_FALSE); return (B_FALSE);
} }
@ -148,7 +164,7 @@ zfs_agent_iter_pool(zpool_handle_t *zhp, void *arg)
/* /*
* if a match was found then grab the pool guid * if a match was found then grab the pool guid
*/ */
if (gsp->gs_vdev_guid) { if (gsp->gs_vdev_guid && gsp->gs_devid) {
(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
&gsp->gs_pool_guid); &gsp->gs_pool_guid);
} }
@ -195,11 +211,13 @@ zfs_agent_post_event(const char *class, const char *subclass, nvlist_t *nvl)
uint64_t pool_guid = 0, vdev_guid = 0; uint64_t pool_guid = 0, vdev_guid = 0;
guid_search_t search = { 0 }; guid_search_t search = { 0 };
device_type_t devtype = DEVICE_TYPE_PRIMARY; device_type_t devtype = DEVICE_TYPE_PRIMARY;
char *devid = NULL;
class = "resource.fs.zfs.removed"; class = "resource.fs.zfs.removed";
subclass = ""; subclass = "";
(void) nvlist_add_string(payload, FM_CLASS, class); (void) nvlist_add_string(payload, FM_CLASS, class);
(void) nvlist_lookup_string(nvl, DEV_IDENTIFIER, &devid);
(void) nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &pool_guid); (void) nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &pool_guid);
(void) nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &vdev_guid); (void) nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &vdev_guid);
@ -209,21 +227,25 @@ zfs_agent_post_event(const char *class, const char *subclass, nvlist_t *nvl)
(void) nvlist_add_int64_array(payload, FM_EREPORT_TIME, tod, 2); (void) nvlist_add_int64_array(payload, FM_EREPORT_TIME, tod, 2);
/* /*
* If devid is missing but vdev_guid is available, find devid
* and pool_guid from vdev_guid.
* For multipath, spare and l2arc devices ZFS_EV_VDEV_GUID or * For multipath, spare and l2arc devices ZFS_EV_VDEV_GUID or
* ZFS_EV_POOL_GUID may be missing so find them. * ZFS_EV_POOL_GUID may be missing so find them.
*/ */
if (pool_guid == 0 || vdev_guid == 0) { if (devid == NULL || pool_guid == 0 || vdev_guid == 0) {
if ((nvlist_lookup_string(nvl, DEV_IDENTIFIER, if (devid == NULL)
&search.gs_devid) == 0) && search.gs_vdev_guid = vdev_guid;
(zpool_iter(g_zfs_hdl, zfs_agent_iter_pool, &search) else
== 1)) { search.gs_devid = devid;
zpool_iter(g_zfs_hdl, zfs_agent_iter_pool, &search);
if (devid == NULL)
devid = search.gs_devid;
if (pool_guid == 0) if (pool_guid == 0)
pool_guid = search.gs_pool_guid; pool_guid = search.gs_pool_guid;
if (vdev_guid == 0) if (vdev_guid == 0)
vdev_guid = search.gs_vdev_guid; vdev_guid = search.gs_vdev_guid;
devtype = search.gs_vdev_type; devtype = search.gs_vdev_type;
} }
}
/* /*
* We want to avoid reporting "remove" events coming from * We want to avoid reporting "remove" events coming from
@ -235,7 +257,9 @@ zfs_agent_post_event(const char *class, const char *subclass, nvlist_t *nvl)
search.gs_vdev_expandtime + 10 > tv.tv_sec) { search.gs_vdev_expandtime + 10 > tv.tv_sec) {
zed_log_msg(LOG_INFO, "agent post event: ignoring '%s' " zed_log_msg(LOG_INFO, "agent post event: ignoring '%s' "
"for recently expanded device '%s'", EC_DEV_REMOVE, "for recently expanded device '%s'", EC_DEV_REMOVE,
search.gs_devid); devid);
fnvlist_free(payload);
free(event);
goto out; goto out;
} }

View File

@ -323,6 +323,9 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
boolean_t is_disk; boolean_t is_disk;
vdev_aux_t aux; vdev_aux_t aux;
uint64_t state = 0; uint64_t state = 0;
int l2arc;
vdev_stat_t *vs;
unsigned int c;
fmd_hdl_debug(hdl, "zfs_retire_recv: '%s'", class); fmd_hdl_debug(hdl, "zfs_retire_recv: '%s'", class);
@ -352,13 +355,32 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
devname = zpool_vdev_name(NULL, zhp, vdev, B_FALSE); devname = zpool_vdev_name(NULL, zhp, vdev, B_FALSE);
/* Can't replace l2arc with a spare: offline the device */ nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS,
if (nvlist_lookup_string(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, (uint64_t **)&vs, &c);
&devtype) == 0 && strcmp(devtype, VDEV_TYPE_L2CACHE) == 0) {
fmd_hdl_debug(hdl, "zpool_vdev_offline '%s'", devname); /*
zpool_vdev_offline(zhp, devname, B_TRUE); * If state removed is requested for already removed vdev,
} else if (!fmd_prop_get_int32(hdl, "spare_on_remove") || * its a loopback event from spa_async_remove(). Just
replace_with_spare(hdl, zhp, vdev) == B_FALSE) { * ignore it.
*/
if (vs->vs_state == VDEV_STATE_REMOVED &&
state == VDEV_STATE_REMOVED)
return;
l2arc = (nvlist_lookup_string(nvl,
FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, &devtype) == 0 &&
strcmp(devtype, VDEV_TYPE_L2CACHE) == 0);
/* Remove the vdev since device is unplugged */
if (l2arc || (strcmp(class, "resource.fs.zfs.removed") == 0)) {
int status = zpool_vdev_remove_wanted(zhp, devname);
fmd_hdl_debug(hdl, "zpool_vdev_remove_wanted '%s'"
", ret:%d", devname, status);
}
/* Replace the vdev with a spare if its not a l2arc */
if (!l2arc && (!fmd_prop_get_int32(hdl, "spare_on_remove") ||
replace_with_spare(hdl, zhp, vdev) == B_FALSE)) {
/* Could not handle with spare */ /* Could not handle with spare */
fmd_hdl_debug(hdl, "no spare for '%s'", devname); fmd_hdl_debug(hdl, "no spare for '%s'", devname);
} }

View File

@ -1166,7 +1166,7 @@ ztest_kill(ztest_shared_t *zs)
* See comment above spa_write_cachefile(). * See comment above spa_write_cachefile().
*/ */
mutex_enter(&spa_namespace_lock); mutex_enter(&spa_namespace_lock);
spa_write_cachefile(ztest_spa, B_FALSE, B_FALSE); spa_write_cachefile(ztest_spa, B_FALSE, B_FALSE, B_FALSE);
mutex_exit(&spa_namespace_lock); mutex_exit(&spa_namespace_lock);
(void) raise(SIGKILL); (void) raise(SIGKILL);

View File

@ -103,6 +103,57 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_CHECK_DISK_CHANGE], [
]) ])
]) ])
dnl #
dnl # bdev_kobj() is introduced from 5.12
dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_KOBJ], [
ZFS_LINUX_TEST_SRC([bdev_kobj], [
#include <linux/fs.h>
#include <linux/blkdev.h>
#include <linux/kobject.h>
], [
struct block_device *bdev = NULL;
struct kobject *disk_kobj;
disk_kobj = bdev_kobj(bdev);
])
])
AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BDEV_KOBJ], [
AC_MSG_CHECKING([whether bdev_kobj() exists])
ZFS_LINUX_TEST_RESULT([bdev_kobj], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_BDEV_KOBJ, 1,
[bdev_kobj() exists])
], [
AC_MSG_RESULT(no)
])
])
dnl #
dnl # part_to_dev() was removed in 5.12
dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_PART_TO_DEV], [
ZFS_LINUX_TEST_SRC([part_to_dev], [
#include <linux/fs.h>
#include <linux/blkdev.h>
], [
struct hd_struct *p = NULL;
struct device *pdev;
pdev = part_to_dev(p);
])
])
AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_PART_TO_DEV], [
AC_MSG_CHECKING([whether part_to_dev() exists])
ZFS_LINUX_TEST_RESULT([part_to_dev], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_PART_TO_DEV, 1,
[part_to_dev() exists])
], [
AC_MSG_RESULT(no)
])
])
dnl # dnl #
dnl # 5.10 API, check_disk_change() is removed, in favor of dnl # 5.10 API, check_disk_change() is removed, in favor of
dnl # bdev_check_media_change(), which doesn't force revalidation dnl # bdev_check_media_change(), which doesn't force revalidation
@ -405,6 +456,8 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV], [
ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_WHOLE ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_WHOLE
ZFS_AC_KERNEL_SRC_BLKDEV_BDEVNAME ZFS_AC_KERNEL_SRC_BLKDEV_BDEVNAME
ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_SECURE_ERASE ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_SECURE_ERASE
ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_KOBJ
ZFS_AC_KERNEL_SRC_BLKDEV_PART_TO_DEV
]) ])
AC_DEFUN([ZFS_AC_KERNEL_BLKDEV], [ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV], [
@ -421,4 +474,6 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV], [
ZFS_AC_KERNEL_BLKDEV_BDEVNAME ZFS_AC_KERNEL_BLKDEV_BDEVNAME
ZFS_AC_KERNEL_BLKDEV_GET_ERESTARTSYS ZFS_AC_KERNEL_BLKDEV_GET_ERESTARTSYS
ZFS_AC_KERNEL_BLKDEV_ISSUE_SECURE_ERASE ZFS_AC_KERNEL_BLKDEV_ISSUE_SECURE_ERASE
ZFS_AC_KERNEL_BLKDEV_BDEV_KOBJ
ZFS_AC_KERNEL_BLKDEV_PART_TO_DEV
]) ])

View File

@ -310,6 +310,7 @@ _LIBZFS_H int zpool_vdev_indirect_size(zpool_handle_t *, const char *,
uint64_t *); uint64_t *);
_LIBZFS_H int zpool_vdev_split(zpool_handle_t *, char *, nvlist_t **, _LIBZFS_H int zpool_vdev_split(zpool_handle_t *, char *, nvlist_t **,
nvlist_t *, splitflags_t); nvlist_t *, splitflags_t);
_LIBZFS_H int zpool_vdev_remove_wanted(zpool_handle_t *, const char *);
_LIBZFS_H int zpool_vdev_fault(zpool_handle_t *, uint64_t, vdev_aux_t); _LIBZFS_H int zpool_vdev_fault(zpool_handle_t *, uint64_t, vdev_aux_t);
_LIBZFS_H int zpool_vdev_degrade(zpool_handle_t *, uint64_t, vdev_aux_t); _LIBZFS_H int zpool_vdev_degrade(zpool_handle_t *, uint64_t, vdev_aux_t);

View File

@ -71,6 +71,7 @@ kernel_spl_sys_HEADERS = \
%D%/spl/sys/kmem_cache.h \ %D%/spl/sys/kmem_cache.h \
%D%/spl/sys/kstat.h \ %D%/spl/sys/kstat.h \
%D%/spl/sys/list.h \ %D%/spl/sys/list.h \
%D%/spl/sys/misc.h \
%D%/spl/sys/mod_os.h \ %D%/spl/sys/mod_os.h \
%D%/spl/sys/mutex.h \ %D%/spl/sys/mutex.h \
%D%/spl/sys/param.h \ %D%/spl/sys/param.h \

View File

@ -261,6 +261,32 @@ bio_set_bi_error(struct bio *bio, int error)
#define BIO_END_IO(bio, error) bio_endio(bio, error); #define BIO_END_IO(bio, error) bio_endio(bio, error);
#endif /* HAVE_1ARG_BIO_END_IO_T */ #endif /* HAVE_1ARG_BIO_END_IO_T */
/*
* 5.15 MACRO,
* GD_DEAD
*
* 2.6.36 - 5.14 MACRO,
* GENHD_FL_UP
*
* Check the disk status and return B_TRUE if alive
* otherwise B_FALSE
*/
static inline boolean_t
zfs_check_disk_status(struct block_device *bdev)
{
#if defined(GENHD_FL_UP)
return (!!(bdev->bd_disk->flags & GENHD_FL_UP));
#elif defined(GD_DEAD)
return (!test_bit(GD_DEAD, &bdev->bd_disk->state));
#else
/*
* This is encountered if neither GENHD_FL_UP nor GD_DEAD is available in
* the kernel - likely due to an MACRO change that needs to be chased down.
*/
#error "Unsupported kernel: no usable disk status check"
#endif
}
/* /*
* 4.1 API, * 4.1 API,
* 3.10.0 CentOS 7.x API, * 3.10.0 CentOS 7.x API,

View File

@ -0,0 +1,29 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
#ifndef _OS_LINUX_SPL_MISC_H
#define _OS_LINUX_SPL_MISC_H
#include <linux/kobject.h>
extern void spl_signal_kobj_evt(struct block_device *bdev);
#endif

View File

@ -838,7 +838,7 @@ extern kmutex_t spa_namespace_lock;
#define SPA_CONFIG_UPDATE_POOL 0 #define SPA_CONFIG_UPDATE_POOL 0
#define SPA_CONFIG_UPDATE_VDEVS 1 #define SPA_CONFIG_UPDATE_VDEVS 1
extern void spa_write_cachefile(spa_t *, boolean_t, boolean_t); extern void spa_write_cachefile(spa_t *, boolean_t, boolean_t, boolean_t);
extern void spa_config_load(void); extern void spa_config_load(void);
extern nvlist_t *spa_all_configs(uint64_t *); extern nvlist_t *spa_all_configs(uint64_t *);
extern void spa_config_set(spa_t *spa, nvlist_t *config); extern void spa_config_set(spa_t *spa, nvlist_t *config);

View File

@ -148,6 +148,7 @@ extern int vdev_degrade(spa_t *spa, uint64_t guid, vdev_aux_t aux);
extern int vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, extern int vdev_online(spa_t *spa, uint64_t guid, uint64_t flags,
vdev_state_t *); vdev_state_t *);
extern int vdev_offline(spa_t *spa, uint64_t guid, uint64_t flags); extern int vdev_offline(spa_t *spa, uint64_t guid, uint64_t flags);
extern int vdev_remove_wanted(spa_t *spa, uint64_t guid);
extern void vdev_clear(spa_t *spa, vdev_t *vd); extern void vdev_clear(spa_t *spa, vdev_t *vd);
extern boolean_t vdev_is_dead(vdev_t *vd); extern boolean_t vdev_is_dead(vdev_t *vd);
@ -190,6 +191,8 @@ typedef enum vdev_config_flag {
VDEV_CONFIG_MISSING = 1 << 4 VDEV_CONFIG_MISSING = 1 << 4
} vdev_config_flag_t; } vdev_config_flag_t;
extern void vdev_post_kobj_evt(vdev_t *vd);
extern void vdev_clear_kobj_evt(vdev_t *vd);
extern void vdev_top_config_generate(spa_t *spa, nvlist_t *config); extern void vdev_top_config_generate(spa_t *spa, nvlist_t *config);
extern nvlist_t *vdev_config_generate(spa_t *spa, vdev_t *vd, extern nvlist_t *vdev_config_generate(spa_t *spa, vdev_t *vd,
boolean_t getstats, vdev_config_flag_t flags); boolean_t getstats, vdev_config_flag_t flags);

View File

@ -69,6 +69,7 @@ extern uint_t zfs_vdev_async_write_max_active;
* Virtual device operations * Virtual device operations
*/ */
typedef int vdev_init_func_t(spa_t *spa, nvlist_t *nv, void **tsd); typedef int vdev_init_func_t(spa_t *spa, nvlist_t *nv, void **tsd);
typedef void vdev_kobj_post_evt_func_t(vdev_t *vd);
typedef void vdev_fini_func_t(vdev_t *vd); typedef void vdev_fini_func_t(vdev_t *vd);
typedef int vdev_open_func_t(vdev_t *vd, uint64_t *size, uint64_t *max_size, typedef int vdev_open_func_t(vdev_t *vd, uint64_t *size, uint64_t *max_size,
uint64_t *ashift, uint64_t *pshift); uint64_t *ashift, uint64_t *pshift);
@ -123,6 +124,7 @@ typedef const struct vdev_ops {
vdev_config_generate_func_t *vdev_op_config_generate; vdev_config_generate_func_t *vdev_op_config_generate;
vdev_nparity_func_t *vdev_op_nparity; vdev_nparity_func_t *vdev_op_nparity;
vdev_ndisks_func_t *vdev_op_ndisks; vdev_ndisks_func_t *vdev_op_ndisks;
vdev_kobj_post_evt_func_t *vdev_op_kobj_evt_post;
char vdev_op_type[16]; char vdev_op_type[16];
boolean_t vdev_op_leaf; boolean_t vdev_op_leaf;
} vdev_ops_t; } vdev_ops_t;
@ -436,6 +438,7 @@ struct vdev {
boolean_t vdev_isl2cache; /* was a l2cache device */ boolean_t vdev_isl2cache; /* was a l2cache device */
boolean_t vdev_copy_uberblocks; /* post expand copy uberblocks */ boolean_t vdev_copy_uberblocks; /* post expand copy uberblocks */
boolean_t vdev_resilver_deferred; /* resilver deferred */ boolean_t vdev_resilver_deferred; /* resilver deferred */
boolean_t vdev_kobj_flag; /* kobj event record */
vdev_queue_t vdev_queue; /* I/O deadline schedule queue */ vdev_queue_t vdev_queue; /* I/O deadline schedule queue */
vdev_cache_t vdev_cache; /* physical block cache */ vdev_cache_t vdev_cache; /* physical block cache */
spa_aux_vdev_t *vdev_aux; /* for l2cache and spares vdevs */ spa_aux_vdev_t *vdev_aux; /* for l2cache and spares vdevs */

View File

@ -50,6 +50,7 @@ extern "C" {
#include <sys/kmem.h> #include <sys/kmem.h>
#include <sys/kmem_cache.h> #include <sys/kmem_cache.h>
#include <sys/vmem.h> #include <sys/vmem.h>
#include <sys/misc.h>
#include <sys/taskq.h> #include <sys/taskq.h>
#include <sys/param.h> #include <sys/param.h>
#include <sys/disp.h> #include <sys/disp.h>

View File

@ -550,6 +550,7 @@
<elf-symbol name='zpool_vdev_path_to_guid' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_vdev_path_to_guid' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_vdev_remove' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_vdev_remove' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_vdev_remove_cancel' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_vdev_remove_cancel' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_vdev_remove_wanted' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_vdev_split' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_vdev_split' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_wait' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_wait' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zpool_wait_status' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_wait_status' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
@ -3505,6 +3506,11 @@
<parameter type-id='c19b74c3' name='istmp'/> <parameter type-id='c19b74c3' name='istmp'/>
<return type-id='95e97e5e'/> <return type-id='95e97e5e'/>
</function-decl> </function-decl>
<function-decl name='zpool_vdev_remove_wanted' mangled-name='zpool_vdev_remove_wanted' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_vdev_remove_wanted'>
<parameter type-id='4c81de99' name='zhp'/>
<parameter type-id='80f4b756' name='path'/>
<return type-id='95e97e5e'/>
</function-decl>
<function-decl name='zpool_vdev_fault' mangled-name='zpool_vdev_fault' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_vdev_fault'> <function-decl name='zpool_vdev_fault' mangled-name='zpool_vdev_fault' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_vdev_fault'>
<parameter type-id='4c81de99' name='zhp'/> <parameter type-id='4c81de99' name='zhp'/>
<parameter type-id='9c313c2d' name='guid'/> <parameter type-id='9c313c2d' name='guid'/>

View File

@ -3073,6 +3073,43 @@ zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
} }
} }
/*
* Remove the specified vdev asynchronously from the configuration, so
* that it may come ONLINE if reinserted. This is called from zed on
* Udev remove event.
* Note: We also have a similar function zpool_vdev_remove() that
* removes the vdev from the pool.
*/
int
zpool_vdev_remove_wanted(zpool_handle_t *zhp, const char *path)
{
zfs_cmd_t zc = {"\0"};
char errbuf[ERRBUFLEN];
nvlist_t *tgt;
boolean_t avail_spare, l2cache;
libzfs_handle_t *hdl = zhp->zpool_hdl;
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
NULL)) == NULL)
return (zfs_error(hdl, EZFS_NODEVICE, errbuf));
zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID);
if (avail_spare)
return (zfs_error(hdl, EZFS_ISSPARE, errbuf));
zc.zc_cookie = VDEV_STATE_REMOVED;
if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
return (0);
return (zpool_standard_error(hdl, errno, errbuf));
}
/* /*
* Mark the given vdev faulted. * Mark the given vdev faulted.
*/ */

View File

@ -47,6 +47,7 @@
#include <linux/mod_compat.h> #include <linux/mod_compat.h>
#include <sys/cred.h> #include <sys/cred.h>
#include <sys/vnode.h> #include <sys/vnode.h>
#include <sys/misc.h>
unsigned long spl_hostid = 0; unsigned long spl_hostid = 0;
EXPORT_SYMBOL(spl_hostid); EXPORT_SYMBOL(spl_hostid);
@ -517,6 +518,38 @@ ddi_copyin(const void *from, void *to, size_t len, int flags)
} }
EXPORT_SYMBOL(ddi_copyin); EXPORT_SYMBOL(ddi_copyin);
/*
* Post a uevent to userspace whenever a new vdev adds to the pool. It is
* necessary to sync blkid information with udev, which zed daemon uses
* during device hotplug to identify the vdev.
*/
void
spl_signal_kobj_evt(struct block_device *bdev)
{
#if defined(HAVE_BDEV_KOBJ) || defined(HAVE_PART_TO_DEV)
#ifdef HAVE_BDEV_KOBJ
struct kobject *disk_kobj = bdev_kobj(bdev);
#else
struct kobject *disk_kobj = &part_to_dev(bdev->bd_part)->kobj;
#endif
if (disk_kobj) {
int ret = kobject_uevent(disk_kobj, KOBJ_CHANGE);
if (ret) {
pr_warn("ZFS: Sending event '%d' to kobject: '%s'"
" (%p): failed(ret:%d)\n", KOBJ_CHANGE,
kobject_name(disk_kobj), disk_kobj, ret);
}
}
#else
/*
* This is encountered if neither bdev_kobj() nor part_to_dev() is available
* in the kernel - likely due to an API change that needs to be chased down.
*/
#error "Unsupported kernel: unable to get struct kobj from bdev"
#endif
}
EXPORT_SYMBOL(spl_signal_kobj_evt);
int int
ddi_copyout(const void *from, void *to, size_t len, int flags) ddi_copyout(const void *from, void *to, size_t len, int flags)
{ {

View File

@ -179,6 +179,18 @@ vdev_disk_error(zio_t *zio)
zio->io_flags); zio->io_flags);
} }
static void
vdev_disk_kobj_evt_post(vdev_t *v)
{
vdev_disk_t *vd = v->vdev_tsd;
if (vd && vd->vd_bdev) {
spl_signal_kobj_evt(vd->vd_bdev);
} else {
vdev_dbgmsg(v, "vdev_disk_t is NULL for VDEV:%s\n",
v->vdev_path);
}
}
static int static int
vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
uint64_t *logical_ashift, uint64_t *physical_ashift) uint64_t *logical_ashift, uint64_t *physical_ashift)
@ -290,6 +302,13 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
bdev = blkdev_get_by_path(v->vdev_path, mode | FMODE_EXCL, bdev = blkdev_get_by_path(v->vdev_path, mode | FMODE_EXCL,
zfs_vdev_holder); zfs_vdev_holder);
if (unlikely(PTR_ERR(bdev) == -ENOENT)) { if (unlikely(PTR_ERR(bdev) == -ENOENT)) {
/*
* There is no point of waiting since device is removed
* explicitly
*/
if (v->vdev_removed)
break;
schedule_timeout(MSEC_TO_TICK(10)); schedule_timeout(MSEC_TO_TICK(10));
} else if (unlikely(PTR_ERR(bdev) == -ERESTARTSYS)) { } else if (unlikely(PTR_ERR(bdev) == -ERESTARTSYS)) {
timeout = MSEC2NSEC(zfs_vdev_open_timeout_ms * 10); timeout = MSEC2NSEC(zfs_vdev_open_timeout_ms * 10);
@ -901,7 +920,7 @@ vdev_disk_io_done(zio_t *zio)
vdev_t *v = zio->io_vd; vdev_t *v = zio->io_vd;
vdev_disk_t *vd = v->vdev_tsd; vdev_disk_t *vd = v->vdev_tsd;
if (zfs_check_media_change(vd->vd_bdev)) { if (!zfs_check_disk_status(vd->vd_bdev)) {
invalidate_bdev(vd->vd_bdev); invalidate_bdev(vd->vd_bdev);
v->vdev_remove_wanted = B_TRUE; v->vdev_remove_wanted = B_TRUE;
spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE); spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE);
@ -957,7 +976,8 @@ vdev_ops_t vdev_disk_ops = {
.vdev_op_nparity = NULL, .vdev_op_nparity = NULL,
.vdev_op_ndisks = NULL, .vdev_op_ndisks = NULL,
.vdev_op_type = VDEV_TYPE_DISK, /* name of this vdev type */ .vdev_op_type = VDEV_TYPE_DISK, /* name of this vdev type */
.vdev_op_leaf = B_TRUE /* leaf vdev */ .vdev_op_leaf = B_TRUE, /* leaf vdev */
.vdev_op_kobj_evt_post = vdev_disk_kobj_evt_post
}; };
/* /*

View File

@ -909,7 +909,16 @@ spa_change_guid(spa_t *spa)
spa_change_guid_sync, &guid, 5, ZFS_SPACE_CHECK_RESERVED); spa_change_guid_sync, &guid, 5, ZFS_SPACE_CHECK_RESERVED);
if (error == 0) { if (error == 0) {
spa_write_cachefile(spa, B_FALSE, B_TRUE); /*
* Clear the kobj flag from all the vdevs to allow
* vdev_cache_process_kobj_evt() to post events to all the
* vdevs since GUID is updated.
*/
vdev_clear_kobj_evt(spa->spa_root_vdev);
for (int i = 0; i < spa->spa_l2cache.sav_count; i++)
vdev_clear_kobj_evt(spa->spa_l2cache.sav_vdevs[i]);
spa_write_cachefile(spa, B_FALSE, B_TRUE, B_TRUE);
spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_REGUID); spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_REGUID);
} }
@ -5220,7 +5229,7 @@ spa_open_common(const char *pool, spa_t **spapp, const void *tag,
*/ */
spa_unload(spa); spa_unload(spa);
spa_deactivate(spa); spa_deactivate(spa);
spa_write_cachefile(spa, B_TRUE, B_TRUE); spa_write_cachefile(spa, B_TRUE, B_TRUE, B_FALSE);
spa_remove(spa); spa_remove(spa);
if (locked) if (locked)
mutex_exit(&spa_namespace_lock); mutex_exit(&spa_namespace_lock);
@ -6044,7 +6053,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
spa_spawn_aux_threads(spa); spa_spawn_aux_threads(spa);
spa_write_cachefile(spa, B_FALSE, B_TRUE); spa_write_cachefile(spa, B_FALSE, B_TRUE, B_TRUE);
/* /*
* Don't count references from objsets that are already closed * Don't count references from objsets that are already closed
@ -6107,7 +6116,7 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
if (props != NULL) if (props != NULL)
spa_configfile_set(spa, props, B_FALSE); spa_configfile_set(spa, props, B_FALSE);
spa_write_cachefile(spa, B_FALSE, B_TRUE); spa_write_cachefile(spa, B_FALSE, B_TRUE, B_FALSE);
spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_IMPORT); spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_IMPORT);
zfs_dbgmsg("spa_import: verbatim import of %s", pool); zfs_dbgmsg("spa_import: verbatim import of %s", pool);
mutex_exit(&spa_namespace_lock); mutex_exit(&spa_namespace_lock);
@ -6503,7 +6512,7 @@ export_spa:
if (new_state != POOL_STATE_UNINITIALIZED) { if (new_state != POOL_STATE_UNINITIALIZED) {
if (!hardforce) if (!hardforce)
spa_write_cachefile(spa, B_TRUE, B_TRUE); spa_write_cachefile(spa, B_TRUE, B_TRUE, B_FALSE);
spa_remove(spa); spa_remove(spa);
} else { } else {
/* /*

View File

@ -240,7 +240,8 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl)
* would be required. * would be required.
*/ */
void void
spa_write_cachefile(spa_t *target, boolean_t removing, boolean_t postsysevent) spa_write_cachefile(spa_t *target, boolean_t removing, boolean_t postsysevent,
boolean_t postblkidevent)
{ {
spa_config_dirent_t *dp, *tdp; spa_config_dirent_t *dp, *tdp;
nvlist_t *nvl; nvlist_t *nvl;
@ -346,6 +347,16 @@ spa_write_cachefile(spa_t *target, boolean_t removing, boolean_t postsysevent)
if (postsysevent) if (postsysevent)
spa_event_notify(target, NULL, NULL, ESC_ZFS_CONFIG_SYNC); spa_event_notify(target, NULL, NULL, ESC_ZFS_CONFIG_SYNC);
/*
* Post udev event to sync blkid information if the pool is created
* or a new vdev is added to the pool.
*/
if ((target->spa_root_vdev) && postblkidevent) {
vdev_post_kobj_evt(target->spa_root_vdev);
for (int i = 0; i < target->spa_l2cache.sav_count; i++)
vdev_post_kobj_evt(target->spa_l2cache.sav_vdevs[i]);
}
} }
/* /*
@ -600,6 +611,7 @@ spa_config_update(spa_t *spa, int what)
*/ */
if (!spa->spa_is_root) { if (!spa->spa_is_root) {
spa_write_cachefile(spa, B_FALSE, spa_write_cachefile(spa, B_FALSE,
what != SPA_CONFIG_UPDATE_POOL,
what != SPA_CONFIG_UPDATE_POOL); what != SPA_CONFIG_UPDATE_POOL);
} }

View File

@ -1290,7 +1290,7 @@ spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error,
* If the config changed, update the config cache. * If the config changed, update the config cache.
*/ */
if (config_changed) if (config_changed)
spa_write_cachefile(spa, B_FALSE, B_TRUE); spa_write_cachefile(spa, B_FALSE, B_TRUE, B_FALSE);
} }
/* /*
@ -1385,7 +1385,7 @@ spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error)
*/ */
if (config_changed) { if (config_changed) {
mutex_enter(&spa_namespace_lock); mutex_enter(&spa_namespace_lock);
spa_write_cachefile(spa, B_FALSE, B_TRUE); spa_write_cachefile(spa, B_FALSE, B_TRUE, B_FALSE);
mutex_exit(&spa_namespace_lock); mutex_exit(&spa_namespace_lock);
} }

View File

@ -1948,6 +1948,14 @@ vdev_open(vdev_t *vd)
error = vd->vdev_ops->vdev_op_open(vd, &osize, &max_osize, error = vd->vdev_ops->vdev_op_open(vd, &osize, &max_osize,
&logical_ashift, &physical_ashift); &logical_ashift, &physical_ashift);
/* Keep the device in removed state if unplugged */
if (error == ENOENT && vd->vdev_removed) {
vdev_set_state(vd, B_TRUE, VDEV_STATE_REMOVED,
VDEV_AUX_NONE);
return (error);
}
/* /*
* Physical volume size should never be larger than its max size, unless * Physical volume size should never be larger than its max size, unless
* the disk has shrunk while we were reading it or the device is buggy * the disk has shrunk while we were reading it or the device is buggy
@ -3166,6 +3174,34 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
mutex_exit(&vd->vdev_dtl_lock); mutex_exit(&vd->vdev_dtl_lock);
} }
/*
* Iterate over all the vdevs except spare, and post kobj events
*/
void
vdev_post_kobj_evt(vdev_t *vd)
{
if (vd->vdev_ops->vdev_op_kobj_evt_post &&
vd->vdev_kobj_flag == B_FALSE) {
vd->vdev_kobj_flag = B_TRUE;
vd->vdev_ops->vdev_op_kobj_evt_post(vd);
}
for (int c = 0; c < vd->vdev_children; c++)
vdev_post_kobj_evt(vd->vdev_child[c]);
}
/*
* Iterate over all the vdevs except spare, and clear kobj events
*/
void
vdev_clear_kobj_evt(vdev_t *vd)
{
vd->vdev_kobj_flag = B_FALSE;
for (int c = 0; c < vd->vdev_children; c++)
vdev_clear_kobj_evt(vd->vdev_child[c]);
}
int int
vdev_dtl_load(vdev_t *vd) vdev_dtl_load(vdev_t *vd)
{ {
@ -3947,6 +3983,29 @@ vdev_degrade(spa_t *spa, uint64_t guid, vdev_aux_t aux)
return (spa_vdev_state_exit(spa, vd, 0)); return (spa_vdev_state_exit(spa, vd, 0));
} }
int
vdev_remove_wanted(spa_t *spa, uint64_t guid)
{
vdev_t *vd;
spa_vdev_state_enter(spa, SCL_NONE);
if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL)
return (spa_vdev_state_exit(spa, NULL, SET_ERROR(ENODEV)));
/*
* If the vdev is already removed, then don't do anything.
*/
if (vd->vdev_removed)
return (spa_vdev_state_exit(spa, NULL, 0));
vd->vdev_remove_wanted = B_TRUE;
spa_async_request(spa, SPA_ASYNC_REMOVE);
return (spa_vdev_state_exit(spa, vd, 0));
}
/* /*
* Online the given vdev. * Online the given vdev.
* *

View File

@ -1912,6 +1912,10 @@ zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj); error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
break; break;
case VDEV_STATE_REMOVED:
error = vdev_remove_wanted(spa, zc->zc_guid);
break;
default: default:
error = SET_ERROR(EINVAL); error = SET_ERROR(EINVAL);
} }
@ -2928,7 +2932,7 @@ zfs_ioc_pool_set_props(zfs_cmd_t *zc)
mutex_enter(&spa_namespace_lock); mutex_enter(&spa_namespace_lock);
if ((spa = spa_lookup(zc->zc_name)) != NULL) { if ((spa = spa_lookup(zc->zc_name)) != NULL) {
spa_configfile_set(spa, props, B_FALSE); spa_configfile_set(spa, props, B_FALSE);
spa_write_cachefile(spa, B_FALSE, B_TRUE); spa_write_cachefile(spa, B_FALSE, B_TRUE, B_FALSE);
} }
mutex_exit(&spa_namespace_lock); mutex_exit(&spa_namespace_lock);
if (spa != NULL) { if (spa != NULL) {

View File

@ -3923,7 +3923,7 @@ zio_vdev_io_done(zio_t *zio)
ops->vdev_op_io_done(zio); ops->vdev_op_io_done(zio);
if (unexpected_error) if (unexpected_error && vd->vdev_remove_wanted == B_FALSE)
VERIFY(vdev_probe(vd, zio) == NULL); VERIFY(vdev_probe(vd, zio) == NULL);
return (zio); return (zio);

View File

@ -1884,7 +1884,7 @@ function wait_hotspare_state # pool disk state timeout
# #
# Return 0 is pool/disk matches expected state, 1 otherwise # Return 0 is pool/disk matches expected state, 1 otherwise
# #
function check_vdev_state # pool disk state{online,offline,unavail} function check_vdev_state # pool disk state{online,offline,unavail,removed}
{ {
typeset pool=$1 typeset pool=$1
typeset disk=${2#*$DEV_DSKDIR/} typeset disk=${2#*$DEV_DSKDIR/}

View File

@ -24,29 +24,28 @@
# #
# DESCRIPTION: # DESCRIPTION:
# Testing Fault Management Agent ZED Logic - Physically removed device is # Testing Fault Management Agent ZED Logic - Physically detached device is
# made unavail and onlined when reattached # made removed and onlined when reattached
# #
# STRATEGY: # STRATEGY:
# 1. Create a pool # 1. Create a pool
# 2. Simulate physical removal of one device # 2. Simulate physical removal of one device
# 3. Verify the device is unavailable # 3. Verify the device is removed when detached
# 4. Reattach the device # 4. Reattach the device
# 5. Verify the device is onlined # 5. Verify the device is onlined
# 6. Repeat the same tests with a spare device: # 6. Repeat the same tests with a spare device:
# zed will use the spare to handle the removed data device # zed will use the spare to handle the removed data device
# 7. Repeat the same tests again with a faulted spare device: # 7. Repeat the same tests again with a faulted spare device:
# the removed data device should be unavailable # the removed data device should be removed
# #
# NOTE: the use of 'block_device_wait' throughout the test helps avoid race # NOTE: the use of 'block_device_wait' throughout the test helps avoid race
# conditions caused by mixing creation/removal events from partitioning the # conditions caused by mixing creation/removal events from partitioning the
# disk (zpool create) and events from physically removing it (remove_disk). # disk (zpool create) and events from physically removing it (remove_disk).
# #
# NOTE: the test relies on 'zpool sync' to prompt the kmods to transition a # NOTE: the test relies on ZED to transit state to removed on device removed
# vdev to the unavailable state. The ZED does receive a removal notification # event. The ZED does receive a removal notification but only relies on it to
# but only relies on it to activate a hot spare. Additional work is planned # activate a hot spare. Additional work is planned to extend an existing ioctl
# to extend an existing ioctl interface to allow the ZED to transition the # interface to allow the ZED to transition the vdev in to a removed state.
# vdev in to a removed state.
# #
verify_runnable "both" verify_runnable "both"
@ -103,8 +102,8 @@ do
log_must mkfile 1m $mntpnt/file log_must mkfile 1m $mntpnt/file
sync_pool $TESTPOOL sync_pool $TESTPOOL
# 3. Verify the device is unavailable. # 3. Verify the device is removed.
log_must wait_vdev_state $TESTPOOL $removedev "UNAVAIL" log_must wait_vdev_state $TESTPOOL $removedev "REMOVED"
# 4. Reattach the device # 4. Reattach the device
insert_disk $removedev insert_disk $removedev
@ -136,7 +135,7 @@ do
# 3. Verify the device is handled by the spare. # 3. Verify the device is handled by the spare.
log_must wait_hotspare_state $TESTPOOL $sparedev "INUSE" log_must wait_hotspare_state $TESTPOOL $sparedev "INUSE"
log_must wait_vdev_state $TESTPOOL $removedev "UNAVAIL" log_must wait_vdev_state $TESTPOOL $removedev "REMOVED"
# 4. Reattach the device # 4. Reattach the device
insert_disk $removedev insert_disk $removedev
@ -170,8 +169,8 @@ do
log_must mkfile 1m $mntpnt/file log_must mkfile 1m $mntpnt/file
sync_pool $TESTPOOL sync_pool $TESTPOOL
# 4. Verify the device is unavailable # 4. Verify the device is removed
log_must wait_vdev_state $TESTPOOL $removedev "UNAVAIL" log_must wait_vdev_state $TESTPOOL $removedev "REMOVED"
# 5. Reattach the device # 5. Reattach the device
insert_disk $removedev insert_disk $removedev